001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.Iterator;
030import java.util.Locale;
031import java.util.Set;
032import java.util.SortedMap;
033import java.util.TreeMap;
034
035import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
036import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
037import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
039import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
040import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
042import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
043import org.apache.commons.compress.archivers.sevenz.SevenZFile;
044import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
045import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
047import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
048import org.apache.commons.compress.utils.IOUtils;
049import org.apache.commons.compress.utils.Lists;
050import org.apache.commons.compress.utils.ServiceLoaderIterator;
051import org.apache.commons.compress.utils.Sets;
052
053/**
054 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
055 * the InputStream. In order to add other implementations, you should extend
056 * ArchiveStreamFactory and override the appropriate methods (and call their
057 * implementation from super of course).
058 *
059 * Compressing a ZIP-File:
060 *
061 * <pre>
062 * final OutputStream out = Files.newOutputStream(output.toPath());
063 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
064 *
065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
066 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
067 * os.closeArchiveEntry();
068 *
069 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
070 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
071 * os.closeArchiveEntry();
072 * os.close();
073 * </pre>
074 *
075 * Decompressing a ZIP-File:
076 *
077 * <pre>
078 * final InputStream is = Files.newInputStream(input.toPath());
079 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
080 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
081 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
082 * IOUtils.copy(in, out);
083 * out.close();
084 * in.close();
085 * </pre>
086 * @Immutable provided that the deprecated method setEntryEncoding is not used.
087 * @ThreadSafe even if the deprecated method setEntryEncoding is used
088 */
089public class ArchiveStreamFactory implements ArchiveStreamProvider {
090
091    private static final int TAR_HEADER_SIZE = 512;
092
093    private static final int DUMP_SIGNATURE_SIZE = 32;
094
095    private static final int SIGNATURE_SIZE = 12;
096
097    private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory();
098
099    /**
100     * Constant (value {@value}) used to identify the AR archive format.
101     * @since 1.1
102     */
103    public static final String AR = "ar";
104
105    /**
106     * Constant (value {@value}) used to identify the ARJ archive format.
107     * Not supported as an output stream type.
108     * @since 1.6
109     */
110    public static final String ARJ = "arj";
111
112    /**
113     * Constant (value {@value}) used to identify the CPIO archive format.
114     * @since 1.1
115     */
116    public static final String CPIO = "cpio";
117
118    /**
119     * Constant (value {@value}) used to identify the Unix DUMP archive format.
120     * Not supported as an output stream type.
121     * @since 1.3
122     */
123    public static final String DUMP = "dump";
124
125    /**
126     * Constant (value {@value}) used to identify the JAR archive format.
127     * @since 1.1
128     */
129    public static final String JAR = "jar";
130
131    /**
132     * Constant used to identify the TAR archive format.
133     * @since 1.1
134     */
135    public static final String TAR = "tar";
136
137    /**
138     * Constant (value {@value}) used to identify the ZIP archive format.
139     * @since 1.1
140     */
141    public static final String ZIP = "zip";
142
143    /**
144     * Constant (value {@value}) used to identify the 7z archive format.
145     * @since 1.8
146     */
147    public static final String SEVEN_Z = "7z";
148
149    /**
150     * Entry encoding, null for the platform default.
151     */
152    private final String encoding;
153
154    /**
155     * Entry encoding, null for the default.
156     */
157    private volatile String entryEncoding;
158
159    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
160
161    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
162
163    private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() {
164        return Lists.newArrayList(serviceLoaderIterator());
165    }
166
167    static void putAll(Set<String> names, ArchiveStreamProvider provider,
168            TreeMap<String, ArchiveStreamProvider> map) {
169        for (String name : names) {
170            map.put(toKey(name), provider);
171        }
172    }
173
174    private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() {
175        return new ServiceLoaderIterator<>(ArchiveStreamProvider.class);
176    }
177
178    private static String toKey(final String name) {
179        return name.toUpperCase(Locale.ROOT);
180    }
181
182    /**
183     * Constructs a new sorted map from input stream provider names to provider
184     * objects.
185     *
186     * <p>
187     * The map returned by this method will have one entry for each provider for
188     * which support is available in the current Java virtual machine. If two or
189     * more supported provider have the same name then the resulting map will
190     * contain just one of them; which one it will contain is not specified.
191     * </p>
192     *
193     * <p>
194     * The invocation of this method, and the subsequent use of the resulting
195     * map, may cause time-consuming disk or network I/O operations to occur.
196     * This method is provided for applications that need to enumerate all of
197     * the available providers, for example to allow user provider selection.
198     * </p>
199     *
200     * <p>
201     * This method may return different results at different times if new
202     * providers are dynamically made available to the current Java virtual
203     * machine.
204     * </p>
205     *
206     * @return An immutable, map from names to provider objects
207     * @since 1.13
208     */
209    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
210        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
211            @Override
212            public SortedMap<String, ArchiveStreamProvider> run() {
213                TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
214                putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map);
215                for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
216                    putAll(provider.getInputStreamArchiveNames(), provider, map);
217                }
218                return map;
219            }
220        });
221    }
222
223    /**
224     * Constructs a new sorted map from output stream provider names to provider
225     * objects.
226     *
227     * <p>
228     * The map returned by this method will have one entry for each provider for
229     * which support is available in the current Java virtual machine. If two or
230     * more supported provider have the same name then the resulting map will
231     * contain just one of them; which one it will contain is not specified.
232     * </p>
233     *
234     * <p>
235     * The invocation of this method, and the subsequent use of the resulting
236     * map, may cause time-consuming disk or network I/O operations to occur.
237     * This method is provided for applications that need to enumerate all of
238     * the available providers, for example to allow user provider selection.
239     * </p>
240     *
241     * <p>
242     * This method may return different results at different times if new
243     * providers are dynamically made available to the current Java virtual
244     * machine.
245     * </p>
246     *
247     * @return An immutable, map from names to provider objects
248     * @since 1.13
249     */
250    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
251        return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
252            @Override
253            public SortedMap<String, ArchiveStreamProvider> run() {
254                TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
255                putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map);
256                for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
257                    putAll(provider.getOutputStreamArchiveNames(), provider, map);
258                }
259                return map;
260            }
261        });
262    }
263
264    /**
265     * Create an instance using the platform default encoding.
266     */
267    public ArchiveStreamFactory() {
268        this(null);
269    }
270
271    /**
272     * Create an instance using the specified encoding.
273     *
274     * @param encoding the encoding to be used.
275     *
276     * @since 1.10
277     */
278    public ArchiveStreamFactory(final String encoding) {
279        super();
280        this.encoding = encoding;
281        // Also set the original field so can continue to use it.
282        this.entryEncoding = encoding;
283    }
284
285    /**
286     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
287     * files, or null for the archiver default.
288     *
289     * @return entry encoding, or null for the archiver default
290     * @since 1.5
291     */
292    public String getEntryEncoding() {
293        return entryEncoding;
294    }
295
296    /**
297     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
298     *
299     * @param entryEncoding the entry encoding, null uses the archiver default.
300     * @since 1.5
301     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
302     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
303     * was used to specify the factory encoding.
304     */
305    @Deprecated
306    public void setEntryEncoding(final String entryEncoding) {
307        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
308        if (encoding != null) {
309            throw new IllegalStateException("Cannot overide encoding set by the constructor");
310        }
311        this.entryEncoding = entryEncoding;
312    }
313
314    /**
315     * Creates an archive input stream from an archiver name and an input stream.
316     *
317     * @param archiverName the archive name,
318     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
319     * @param in the input stream
320     * @return the archive input stream
321     * @throws ArchiveException if the archiver name is not known
322     * @throws StreamingNotSupportedException if the format cannot be
323     * read from a stream
324     * @throws IllegalArgumentException if the archiver name or stream is null
325     */
326    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in)
327            throws ArchiveException {
328        return createArchiveInputStream(archiverName, in, entryEncoding);
329    }
330
331    @Override
332    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
333            final String actualEncoding) throws ArchiveException {
334
335        if (archiverName == null) {
336            throw new IllegalArgumentException("Archivername must not be null.");
337        }
338
339        if (in == null) {
340            throw new IllegalArgumentException("InputStream must not be null.");
341        }
342
343        if (AR.equalsIgnoreCase(archiverName)) {
344            return new ArArchiveInputStream(in);
345        }
346        if (ARJ.equalsIgnoreCase(archiverName)) {
347            if (actualEncoding != null) {
348                return new ArjArchiveInputStream(in, actualEncoding);
349            }
350            return new ArjArchiveInputStream(in);
351        }
352        if (ZIP.equalsIgnoreCase(archiverName)) {
353            if (actualEncoding != null) {
354                return new ZipArchiveInputStream(in, actualEncoding);
355            }
356            return new ZipArchiveInputStream(in);
357        }
358        if (TAR.equalsIgnoreCase(archiverName)) {
359            if (actualEncoding != null) {
360                return new TarArchiveInputStream(in, actualEncoding);
361            }
362            return new TarArchiveInputStream(in);
363        }
364        if (JAR.equalsIgnoreCase(archiverName)) {
365            if (actualEncoding != null) {
366                return new JarArchiveInputStream(in, actualEncoding);
367            }
368            return new JarArchiveInputStream(in);
369        }
370        if (CPIO.equalsIgnoreCase(archiverName)) {
371            if (actualEncoding != null) {
372                return new CpioArchiveInputStream(in, actualEncoding);
373            }
374            return new CpioArchiveInputStream(in);
375        }
376        if (DUMP.equalsIgnoreCase(archiverName)) {
377            if (actualEncoding != null) {
378                return new DumpArchiveInputStream(in, actualEncoding);
379            }
380            return new DumpArchiveInputStream(in);
381        }
382        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
383            throw new StreamingNotSupportedException(SEVEN_Z);
384        }
385
386        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
387        if (archiveStreamProvider != null) {
388            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
389        }
390
391        throw new ArchiveException("Archiver: " + archiverName + " not found.");
392    }
393
394    /**
395     * Creates an archive output stream from an archiver name and an output stream.
396     *
397     * @param archiverName the archive name,
398     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
399     * @param out the output stream
400     * @return the archive output stream
401     * @throws ArchiveException if the archiver name is not known
402     * @throws StreamingNotSupportedException if the format cannot be
403     * written to a stream
404     * @throws IllegalArgumentException if the archiver name or stream is null
405     */
406    public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
407            throws ArchiveException {
408        return createArchiveOutputStream(archiverName, out, entryEncoding);
409    }
410
411    @Override
412    public ArchiveOutputStream createArchiveOutputStream(
413            final String archiverName, final OutputStream out, final String actualEncoding)
414            throws ArchiveException {
415        if (archiverName == null) {
416            throw new IllegalArgumentException("Archivername must not be null.");
417        }
418        if (out == null) {
419            throw new IllegalArgumentException("OutputStream must not be null.");
420        }
421
422        if (AR.equalsIgnoreCase(archiverName)) {
423            return new ArArchiveOutputStream(out);
424        }
425        if (ZIP.equalsIgnoreCase(archiverName)) {
426            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
427            if (actualEncoding != null) {
428                zip.setEncoding(actualEncoding);
429            }
430            return zip;
431        }
432        if (TAR.equalsIgnoreCase(archiverName)) {
433            if (actualEncoding != null) {
434                return new TarArchiveOutputStream(out, actualEncoding);
435            }
436            return new TarArchiveOutputStream(out);
437        }
438        if (JAR.equalsIgnoreCase(archiverName)) {
439            if (actualEncoding != null) {
440                return new JarArchiveOutputStream(out, actualEncoding);
441            }
442            return new JarArchiveOutputStream(out);
443        }
444        if (CPIO.equalsIgnoreCase(archiverName)) {
445            if (actualEncoding != null) {
446                return new CpioArchiveOutputStream(out, actualEncoding);
447            }
448            return new CpioArchiveOutputStream(out);
449        }
450        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
451            throw new StreamingNotSupportedException(SEVEN_Z);
452        }
453
454        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
455        if (archiveStreamProvider != null) {
456            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
457        }
458
459        throw new ArchiveException("Archiver: " + archiverName + " not found.");
460    }
461
462    /**
463     * Create an archive input stream from an input stream, autodetecting
464     * the archive type from the first few bytes of the stream. The InputStream
465     * must support marks, like BufferedInputStream.
466     *
467     * @param in the input stream
468     * @return the archive input stream
469     * @throws ArchiveException if the archiver name is not known
470     * @throws StreamingNotSupportedException if the format cannot be
471     * read from a stream
472     * @throws IllegalArgumentException if the stream is null or does not support mark
473     */
474    public ArchiveInputStream createArchiveInputStream(final InputStream in)
475            throws ArchiveException {
476        return createArchiveInputStream(detect(in), in);
477    }
478
479    /**
480     * Try to determine the type of Archiver
481     * @param in input stream
482     * @return type of archiver if found
483     * @throws ArchiveException if an archiver cannot be detected in the stream
484     * @since 1.14
485     */
486    public static String detect(InputStream in) throws ArchiveException {
487        if (in == null) {
488            throw new IllegalArgumentException("Stream must not be null.");
489        }
490
491        if (!in.markSupported()) {
492            throw new IllegalArgumentException("Mark is not supported.");
493        }
494
495        final byte[] signature = new byte[SIGNATURE_SIZE];
496        in.mark(signature.length);
497        int signatureLength = -1;
498        try {
499            signatureLength = IOUtils.readFully(in, signature);
500            in.reset();
501        } catch (IOException e) {
502            throw new ArchiveException("IOException while reading signature.", e);
503        }
504
505        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
506            return ZIP;
507        } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
508            return JAR;
509        } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
510            return AR;
511        } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
512            return CPIO;
513        } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
514            return ARJ;
515        } else if (SevenZFile.matches(signature, signatureLength)) {
516            return SEVEN_Z;
517        }
518
519        // Dump needs a bigger buffer to check the signature;
520        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
521        in.mark(dumpsig.length);
522        try {
523            signatureLength = IOUtils.readFully(in, dumpsig);
524            in.reset();
525        } catch (IOException e) {
526            throw new ArchiveException("IOException while reading dump signature", e);
527        }
528        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
529            return DUMP;
530        }
531
532        // Tar needs an even bigger buffer to check the signature; read the first block
533        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
534        in.mark(tarHeader.length);
535        try {
536            signatureLength = IOUtils.readFully(in, tarHeader);
537            in.reset();
538        } catch (IOException e) {
539            throw new ArchiveException("IOException while reading tar signature", e);
540        }
541        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
542            return TAR;
543        }
544
545        // COMPRESS-117 - improve auto-recognition
546        if (signatureLength >= TAR_HEADER_SIZE) {
547            TarArchiveInputStream tais = null;
548            try {
549                tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
550                // COMPRESS-191 - verify the header checksum
551                if (tais.getNextTarEntry().isCheckSumOK()) {
552                    return TAR;
553                }
554            } catch (final Exception e) { // NOPMD // NOSONAR
555                // can generate IllegalArgumentException as well
556                // as IOException
557                // autodetection, simply not a TAR
558                // ignored
559            } finally {
560                IOUtils.closeQuietly(tais);
561            }
562        }
563        throw new ArchiveException("No Archiver found for the stream signature");
564    }
565
566    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
567        if (archiveInputStreamProviders == null) {
568            archiveInputStreamProviders = Collections
569                    .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
570        }
571        return archiveInputStreamProviders;
572    }
573
574    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
575        if (archiveOutputStreamProviders == null) {
576            archiveOutputStreamProviders = Collections
577                    .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
578        }
579        return archiveOutputStreamProviders;
580    }
581
582    @Override
583    public Set<String> getInputStreamArchiveNames() {
584        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
585    }
586
587    @Override
588    public Set<String> getOutputStreamArchiveNames() {
589        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
590    }
591
592}