001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors;
020
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.Locale;
024import java.util.Map;
025
026/**
027 * File name mapping code for the compression formats.
028 * @ThreadSafe
029 * @since 1.4
030 */
031public class FileNameUtil {
032
033    /**
034     * Map from common filename suffixes to the suffixes that identify compressed
035     * versions of those file types. For example: from ".tar" to ".tgz".
036     */
037    private final Map<String, String> compressSuffix =
038        new HashMap<>();
039
040    /**
041     * Map from common filename suffixes of compressed files to the
042     * corresponding suffixes of uncompressed files. For example: from
043     * ".tgz" to ".tar".
044     * <p>
045     * This map also contains format-specific suffixes like ".gz" and "-z".
046     * These suffixes are mapped to the empty string, as they should simply
047     * be removed from the filename when the file is uncompressed.
048     */
049    private final Map<String, String> uncompressSuffix;
050
051    /**
052     * Length of the longest compressed suffix.
053     */
054    private final int longestCompressedSuffix;
055
056    /**
057     * Length of the shortest compressed suffix.
058     */
059    private final int shortestCompressedSuffix;
060
061    /**
062     * Length of the longest uncompressed suffix.
063     */
064    private final int longestUncompressedSuffix;
065
066    /**
067     * Length of the shortest uncompressed suffix longer than the
068     * empty string.
069     */
070    private final int shortestUncompressedSuffix;
071
072    /**
073     * The format's default extension.
074     */
075    private final String defaultExtension;
076
077    /**
078     * sets up the utility with a map of known compressed to
079     * uncompressed suffix mappings and the default extension of the
080     * format.
081     *
082     * @param uncompressSuffix Map from common filename suffixes of
083     * compressed files to the corresponding suffixes of uncompressed
084     * files. For example: from ".tgz" to ".tar".  This map also
085     * contains format-specific suffixes like ".gz" and "-z".  These
086     * suffixes are mapped to the empty string, as they should simply
087     * be removed from the filename when the file is uncompressed.
088     *
089     * @param defaultExtension the format's default extension like ".gz"
090     */
091    public FileNameUtil(final Map<String, String> uncompressSuffix,
092                        final String defaultExtension) {
093        this.uncompressSuffix = Collections.unmodifiableMap(uncompressSuffix);
094        int lc = Integer.MIN_VALUE, sc = Integer.MAX_VALUE;
095        int lu = Integer.MIN_VALUE, su = Integer.MAX_VALUE;
096        for (final Map.Entry<String, String> ent : uncompressSuffix.entrySet()) {
097            final int cl = ent.getKey().length();
098            if (cl > lc) {
099                lc = cl;
100            }
101            if (cl < sc) {
102                sc = cl;
103            }
104
105            final String u = ent.getValue();
106            final int ul = u.length();
107            if (ul > 0) {
108                if (!compressSuffix.containsKey(u)) {
109                    compressSuffix.put(u, ent.getKey());
110                }
111                if (ul > lu) {
112                    lu = ul;
113                }
114                if (ul < su) {
115                    su = ul;
116                }
117            }
118        }
119        longestCompressedSuffix = lc;
120        longestUncompressedSuffix = lu;
121        shortestCompressedSuffix = sc;
122        shortestUncompressedSuffix = su;
123        this.defaultExtension = defaultExtension;
124    }
125
126    /**
127     * Detects common format suffixes in the given filename.
128     *
129     * @param filename name of a file
130     * @return {@code true} if the filename has a common format suffix,
131     *         {@code false} otherwise
132     */
133    public boolean isCompressedFilename(final String filename) {
134        final String lower = filename.toLowerCase(Locale.ENGLISH);
135        final int n = lower.length();
136        for (int i = shortestCompressedSuffix;
137             i <= longestCompressedSuffix && i < n; i++) {
138            if (uncompressSuffix.containsKey(lower.substring(n - i))) {
139                return true;
140            }
141        }
142        return false;
143    }
144
145    /**
146     * Maps the given name of a compressed file to the name that the
147     * file should have after uncompression. Commonly used file type specific
148     * suffixes like ".tgz" or ".svgz" are automatically detected and
149     * correctly mapped. For example the name "package.tgz" is mapped to
150     * "package.tar". And any filenames with the generic ".gz" suffix
151     * (or any other generic gzip suffix) is mapped to a name without that
152     * suffix. If no format suffix is detected, then the filename is returned
153     * unmapped.
154     *
155     * @param filename name of a file
156     * @return name of the corresponding uncompressed file
157     */
158    public String getUncompressedFilename(final String filename) {
159        final String lower = filename.toLowerCase(Locale.ENGLISH);
160        final int n = lower.length();
161        for (int i = shortestCompressedSuffix;
162             i <= longestCompressedSuffix && i < n; i++) {
163            final String suffix = uncompressSuffix.get(lower.substring(n - i));
164            if (suffix != null) {
165                return filename.substring(0, n - i) + suffix;
166            }
167        }
168        return filename;
169    }
170
171    /**
172     * Maps the given filename to the name that the file should have after
173     * compression. Common file types with custom suffixes for
174     * compressed versions are automatically detected and correctly mapped.
175     * For example the name "package.tar" is mapped to "package.tgz". If no
176     * custom mapping is applicable, then the default ".gz" suffix is appended
177     * to the filename.
178     *
179     * @param filename name of a file
180     * @return name of the corresponding compressed file
181     */
182    public String getCompressedFilename(final String filename) {
183        final String lower = filename.toLowerCase(Locale.ENGLISH);
184        final int n = lower.length();
185        for (int i = shortestUncompressedSuffix;
186             i <= longestUncompressedSuffix && i < n; i++) {
187            final String suffix = compressSuffix.get(lower.substring(n - i));
188            if (suffix != null) {
189                return filename.substring(0, n - i) + suffix;
190            }
191        }
192        // No custom suffix found, just append the default
193        return filename + defaultExtension;
194    }
195
196}