001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.ByteArrayInputStream;
022import java.io.Closeable;
023import java.io.EOFException;
024import java.io.File;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.SequenceInputStream;
028import java.nio.ByteBuffer;
029import java.nio.channels.FileChannel;
030import java.nio.channels.SeekableByteChannel;
031import java.nio.file.Files;
032import java.nio.file.StandardOpenOption;
033import java.util.Arrays;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.Enumeration;
037import java.util.EnumSet;
038import java.util.HashMap;
039import java.util.LinkedList;
040import java.util.List;
041import java.util.Map;
042import java.util.zip.Inflater;
043import java.util.zip.ZipException;
044
045import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
046import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
047import org.apache.commons.compress.utils.CountingInputStream;
048import org.apache.commons.compress.utils.IOUtils;
049import org.apache.commons.compress.utils.InputStreamStatistics;
050
051import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
052import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
053import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
054import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
055import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
056
057/**
058 * Replacement for <code>java.util.ZipFile</code>.
059 *
060 * <p>This class adds support for file name encodings other than UTF-8
061 * (which is required to work on ZIP files created by native zip tools
062 * and is able to skip a preamble like the one found in self
063 * extracting archives.  Furthermore it returns instances of
064 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
065 * instead of <code>java.util.zip.ZipEntry</code>.</p>
066 *
067 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
068 * have to reimplement all methods anyway.  Like
069 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
070 * covers and supports compressed and uncompressed entries.  As of
071 * Apache Commons Compress 1.3 it also transparently supports Zip64
072 * extensions and thus individual entries and archives larger than 4
073 * GB or with more than 65536 entries.</p>
074 *
075 * <p>The method signatures mimic the ones of
076 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
077 *
078 * <ul>
079 *   <li>There is no getName method.</li>
080 *   <li>entries has been renamed to getEntries.</li>
081 *   <li>getEntries and getEntry return
082 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
083 *   instances.</li>
084 *   <li>close is allowed to throw IOException.</li>
085 * </ul>
086 *
087 */
088public class ZipFile implements Closeable {
089    private static final int HASH_SIZE = 509;
090    static final int NIBLET_MASK = 0x0f;
091    static final int BYTE_SHIFT = 8;
092    private static final int POS_0 = 0;
093    private static final int POS_1 = 1;
094    private static final int POS_2 = 2;
095    private static final int POS_3 = 3;
096    private static final byte[] ONE_ZERO_BYTE = new byte[1];
097
098    /**
099     * List of entries in the order they appear inside the central
100     * directory.
101     */
102    private final List<ZipArchiveEntry> entries =
103        new LinkedList<>();
104
105    /**
106     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
107     */
108    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
109        new HashMap<>(HASH_SIZE);
110
111    /**
112     * The encoding to use for filenames and the file comment.
113     *
114     * <p>For a list of possible values see <a
115     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
116     * Defaults to UTF-8.</p>
117     */
118    private final String encoding;
119
120    /**
121     * The zip encoding to use for filenames and the file comment.
122     */
123    private final ZipEncoding zipEncoding;
124
125    /**
126     * File name of actual source.
127     */
128    private final String archiveName;
129
130    /**
131     * The actual data source.
132     */
133    private final SeekableByteChannel archive;
134
135    /**
136     * Whether to look for and use Unicode extra fields.
137     */
138    private final boolean useUnicodeExtraFields;
139
140    /**
141     * Whether the file is closed.
142     */
143    private volatile boolean closed = true;
144
145    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
146    private final byte[] dwordBuf = new byte[DWORD];
147    private final byte[] wordBuf = new byte[WORD];
148    private final byte[] cfhBuf = new byte[CFH_LEN];
149    private final byte[] shortBuf = new byte[SHORT];
150    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
151    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
152    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
153
154    /**
155     * Opens the given file for reading, assuming "UTF8" for file names.
156     *
157     * @param f the archive.
158     *
159     * @throws IOException if an error occurs while reading the file.
160     */
161    public ZipFile(final File f) throws IOException {
162        this(f, ZipEncodingHelper.UTF8);
163    }
164
165    /**
166     * Opens the given file for reading, assuming "UTF8".
167     *
168     * @param name name of the archive.
169     *
170     * @throws IOException if an error occurs while reading the file.
171     */
172    public ZipFile(final String name) throws IOException {
173        this(new File(name), ZipEncodingHelper.UTF8);
174    }
175
176    /**
177     * Opens the given file for reading, assuming the specified
178     * encoding for file names, scanning unicode extra fields.
179     *
180     * @param name name of the archive.
181     * @param encoding the encoding to use for file names, use null
182     * for the platform's default encoding
183     *
184     * @throws IOException if an error occurs while reading the file.
185     */
186    public ZipFile(final String name, final String encoding) throws IOException {
187        this(new File(name), encoding, true);
188    }
189
190    /**
191     * Opens the given file for reading, assuming the specified
192     * encoding for file names and scanning for unicode extra fields.
193     *
194     * @param f the archive.
195     * @param encoding the encoding to use for file names, use null
196     * for the platform's default encoding
197     *
198     * @throws IOException if an error occurs while reading the file.
199     */
200    public ZipFile(final File f, final String encoding) throws IOException {
201        this(f, encoding, true);
202    }
203
204    /**
205     * Opens the given file for reading, assuming the specified
206     * encoding for file names.
207     *
208     * @param f the archive.
209     * @param encoding the encoding to use for file names, use null
210     * for the platform's default encoding
211     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
212     * Extra Fields (if present) to set the file names.
213     *
214     * @throws IOException if an error occurs while reading the file.
215     */
216    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
217        throws IOException {
218        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
219             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
220    }
221
222    /**
223     * Opens the given channel for reading, assuming "UTF8" for file names.
224     *
225     * <p>{@link
226     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
227     * allows you to read from an in-memory archive.</p>
228     *
229     * @param channel the archive.
230     *
231     * @throws IOException if an error occurs while reading the file.
232     * @since 1.13
233     */
234    public ZipFile(final SeekableByteChannel channel)
235            throws IOException {
236        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
237    }
238
239    /**
240     * Opens the given channel for reading, assuming the specified
241     * encoding for file names.
242     *
243     * <p>{@link
244     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
245     * allows you to read from an in-memory archive.</p>
246     *
247     * @param channel the archive.
248     * @param encoding the encoding to use for file names, use null
249     * for the platform's default encoding
250     *
251     * @throws IOException if an error occurs while reading the file.
252     * @since 1.13
253     */
254    public ZipFile(final SeekableByteChannel channel, final String encoding)
255        throws IOException {
256        this(channel, "unknown archive", encoding, true);
257    }
258
259    /**
260     * Opens the given channel for reading, assuming the specified
261     * encoding for file names.
262     *
263     * <p>{@link
264     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
265     * allows you to read from an in-memory archive.</p>
266     *
267     * @param channel the archive.
268     * @param archiveName name of the archive, used for error messages only.
269     * @param encoding the encoding to use for file names, use null
270     * for the platform's default encoding
271     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
272     * Extra Fields (if present) to set the file names.
273     *
274     * @throws IOException if an error occurs while reading the file.
275     * @since 1.13
276     */
277    public ZipFile(final SeekableByteChannel channel, final String archiveName,
278                   final String encoding, final boolean useUnicodeExtraFields)
279        throws IOException {
280        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
281    }
282
283    private ZipFile(final SeekableByteChannel channel, final String archiveName,
284                    final String encoding, final boolean useUnicodeExtraFields,
285                    final boolean closeOnError)
286        throws IOException {
287        this.archiveName = archiveName;
288        this.encoding = encoding;
289        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
290        this.useUnicodeExtraFields = useUnicodeExtraFields;
291        archive = channel;
292        boolean success = false;
293        try {
294            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
295                populateFromCentralDirectory();
296            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
297            success = true;
298        } finally {
299            closed = !success;
300            if (!success && closeOnError) {
301                IOUtils.closeQuietly(archive);
302            }
303        }
304    }
305
306    /**
307     * The encoding to use for filenames and the file comment.
308     *
309     * @return null if using the platform's default character encoding.
310     */
311    public String getEncoding() {
312        return encoding;
313    }
314
315    /**
316     * Closes the archive.
317     * @throws IOException if an error occurs closing the archive.
318     */
319    @Override
320    public void close() throws IOException {
321        // this flag is only written here and read in finalize() which
322        // can never be run in parallel.
323        // no synchronization needed.
324        closed = true;
325
326        archive.close();
327    }
328
329    /**
330     * close a zipfile quietly; throw no io fault, do nothing
331     * on a null parameter
332     * @param zipfile file to close, can be null
333     */
334    public static void closeQuietly(final ZipFile zipfile) {
335        IOUtils.closeQuietly(zipfile);
336    }
337
338    /**
339     * Returns all entries.
340     *
341     * <p>Entries will be returned in the same order they appear
342     * within the archive's central directory.</p>
343     *
344     * @return all entries as {@link ZipArchiveEntry} instances
345     */
346    public Enumeration<ZipArchiveEntry> getEntries() {
347        return Collections.enumeration(entries);
348    }
349
350    /**
351     * Returns all entries in physical order.
352     *
353     * <p>Entries will be returned in the same order their contents
354     * appear within the archive.</p>
355     *
356     * @return all entries as {@link ZipArchiveEntry} instances
357     *
358     * @since 1.1
359     */
360    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
361        final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
362        Arrays.sort(allEntries, offsetComparator);
363        return Collections.enumeration(Arrays.asList(allEntries));
364    }
365
366    /**
367     * Returns a named entry - or {@code null} if no entry by
368     * that name exists.
369     *
370     * <p>If multiple entries with the same name exist the first entry
371     * in the archive's central directory by that name is
372     * returned.</p>
373     *
374     * @param name name of the entry.
375     * @return the ZipArchiveEntry corresponding to the given name - or
376     * {@code null} if not present.
377     */
378    public ZipArchiveEntry getEntry(final String name) {
379        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
380        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
381    }
382
383    /**
384     * Returns all named entries in the same order they appear within
385     * the archive's central directory.
386     *
387     * @param name name of the entry.
388     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
389     * given name
390     * @since 1.6
391     */
392    public Iterable<ZipArchiveEntry> getEntries(final String name) {
393        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
394        return entriesOfThatName != null ? entriesOfThatName
395            : Collections.<ZipArchiveEntry>emptyList();
396    }
397
398    /**
399     * Returns all named entries in the same order their contents
400     * appear within the archive.
401     *
402     * @param name name of the entry.
403     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
404     * given name
405     * @since 1.6
406     */
407    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
408        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
409        if (nameMap.containsKey(name)) {
410            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
411            Arrays.sort(entriesOfThatName, offsetComparator);
412        }
413        return Arrays.asList(entriesOfThatName);
414    }
415
416    /**
417     * Whether this class is able to read the given entry.
418     *
419     * <p>May return false if it is set up to use encryption or a
420     * compression method that hasn't been implemented yet.</p>
421     * @since 1.1
422     * @param ze the entry
423     * @return whether this class is able to read the given entry.
424     */
425    public boolean canReadEntryData(final ZipArchiveEntry ze) {
426        return ZipUtil.canHandleEntryData(ze);
427    }
428
429    /**
430     * Expose the raw stream of the archive entry (compressed form).
431     *
432     * <p>This method does not relate to how/if we understand the payload in the
433     * stream, since we really only intend to move it on to somewhere else.</p>
434     *
435     * @param ze The entry to get the stream for
436     * @return The raw input stream containing (possibly) compressed data.
437     * @since 1.11
438     */
439    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
440        if (!(ze instanceof Entry)) {
441            return null;
442        }
443        final long start = ze.getDataOffset();
444        return createBoundedInputStream(start, ze.getCompressedSize());
445    }
446
447
448    /**
449     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
450     * Compression and all other attributes will be as in this file.
451     * <p>This method transfers entries based on the central directory of the zip file.</p>
452     *
453     * @param target The zipArchiveOutputStream to write the entries to
454     * @param predicate A predicate that selects which entries to write
455     * @throws IOException on error
456     */
457    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
458            throws IOException {
459        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
460        while (src.hasMoreElements()) {
461            final ZipArchiveEntry entry = src.nextElement();
462            if (predicate.test( entry)) {
463                target.addRawArchiveEntry(entry, getRawInputStream(entry));
464            }
465        }
466    }
467
468    /**
469     * Returns an InputStream for reading the contents of the given entry.
470     *
471     * @param ze the entry to get the stream for.
472     * @return a stream to read the entry from. The returned stream
473     * implements {@link InputStreamStatistics}.
474     * @throws IOException if unable to create an input stream from the zipentry
475     */
476    public InputStream getInputStream(final ZipArchiveEntry ze)
477        throws IOException {
478        if (!(ze instanceof Entry)) {
479            return null;
480        }
481        // cast validity is checked just above
482        ZipUtil.checkRequestedFeatures(ze);
483        final long start = ze.getDataOffset();
484
485        // doesn't get closed if the method is not supported - which
486        // should never happen because of the checkRequestedFeatures
487        // call above
488        final InputStream is =
489            new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
490        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
491            case STORED:
492                return new StoredStatisticsStream(is);
493            case UNSHRINKING:
494                return new UnshrinkingInputStream(is);
495            case IMPLODING:
496                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
497                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
498            case DEFLATED:
499                final Inflater inflater = new Inflater(true);
500                // Inflater with nowrap=true has this odd contract for a zero padding
501                // byte following the data stream; this used to be zlib's requirement
502                // and has been fixed a long time ago, but the contract persists so
503                // we comply.
504                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
505                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
506                    inflater) {
507                    @Override
508                    public void close() throws IOException {
509                        try {
510                            super.close();
511                        } finally {
512                            inflater.end();
513                        }
514                    }
515                };
516            case BZIP2:
517                return new BZip2CompressorInputStream(is);
518            case ENHANCED_DEFLATED:
519                return new Deflate64CompressorInputStream(is);
520            case AES_ENCRYPTED:
521            case EXPANDING_LEVEL_1:
522            case EXPANDING_LEVEL_2:
523            case EXPANDING_LEVEL_3:
524            case EXPANDING_LEVEL_4:
525            case JPEG:
526            case LZMA:
527            case PKWARE_IMPLODING:
528            case PPMD:
529            case TOKENIZATION:
530            case UNKNOWN:
531            case WAVPACK:
532            case XZ:
533            default:
534                throw new ZipException("Found unsupported compression method "
535                                       + ze.getMethod());
536        }
537    }
538
539    /**
540     * <p>
541     * Convenience method to return the entry's content as a String if isUnixSymlink()
542     * returns true for it, otherwise returns null.
543     * </p>
544     *
545     * <p>This method assumes the symbolic link's file name uses the
546     * same encoding that as been specified for this ZipFile.</p>
547     *
548     * @param entry ZipArchiveEntry object that represents the symbolic link
549     * @return entry's content as a String
550     * @throws IOException problem with content's input stream
551     * @since 1.5
552     */
553    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
554        if (entry != null && entry.isUnixSymlink()) {
555            try (InputStream in = getInputStream(entry)) {
556                return zipEncoding.decode(IOUtils.toByteArray(in));
557            }
558        }
559        return null;
560    }
561
562    /**
563     * Ensures that the close method of this zipfile is called when
564     * there are no more references to it.
565     * @see #close()
566     */
567    @Override
568    protected void finalize() throws Throwable {
569        try {
570            if (!closed) {
571                System.err.println("Cleaning up unclosed ZipFile for archive "
572                                   + archiveName);
573                close();
574            }
575        } finally {
576            super.finalize();
577        }
578    }
579
580    /**
581     * Length of a "central directory" entry structure without file
582     * name, extra fields or comment.
583     */
584    private static final int CFH_LEN =
585        /* version made by                 */ SHORT
586        /* version needed to extract       */ + SHORT
587        /* general purpose bit flag        */ + SHORT
588        /* compression method              */ + SHORT
589        /* last mod file time              */ + SHORT
590        /* last mod file date              */ + SHORT
591        /* crc-32                          */ + WORD
592        /* compressed size                 */ + WORD
593        /* uncompressed size               */ + WORD
594        /* filename length                 */ + SHORT
595        /* extra field length              */ + SHORT
596        /* file comment length             */ + SHORT
597        /* disk number start               */ + SHORT
598        /* internal file attributes        */ + SHORT
599        /* external file attributes        */ + WORD
600        /* relative offset of local header */ + WORD;
601
602    private static final long CFH_SIG =
603        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
604
605    /**
606     * Reads the central directory of the given archive and populates
607     * the internal tables with ZipArchiveEntry instances.
608     *
609     * <p>The ZipArchiveEntrys will know all data that can be obtained from
610     * the central directory alone, but not the data that requires the
611     * local file header or additional data to be read.</p>
612     *
613     * @return a map of zipentries that didn't have the language
614     * encoding flag set when read.
615     */
616    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
617        throws IOException {
618        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
619            new HashMap<>();
620
621        positionAtCentralDirectory();
622
623        wordBbuf.rewind();
624        IOUtils.readFully(archive, wordBbuf);
625        long sig = ZipLong.getValue(wordBuf);
626
627        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
628            throw new IOException("central directory is empty, can't expand"
629                                  + " corrupt archive.");
630        }
631
632        while (sig == CFH_SIG) {
633            readCentralDirectoryEntry(noUTF8Flag);
634            wordBbuf.rewind();
635            IOUtils.readFully(archive, wordBbuf);
636            sig = ZipLong.getValue(wordBuf);
637        }
638        return noUTF8Flag;
639    }
640
641    /**
642     * Reads an individual entry of the central directory, creats an
643     * ZipArchiveEntry from it and adds it to the global maps.
644     *
645     * @param noUTF8Flag map used to collect entries that don't have
646     * their UTF-8 flag set and whose name will be set by data read
647     * from the local file header later.  The current entry may be
648     * added to this map.
649     */
650    private void
651        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
652        throws IOException {
653        cfhBbuf.rewind();
654        IOUtils.readFully(archive, cfhBbuf);
655        int off = 0;
656        final Entry ze = new Entry();
657
658        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
659        off += SHORT;
660        ze.setVersionMadeBy(versionMadeBy);
661        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
662
663        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
664        off += SHORT; // version required
665
666        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
667        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
668        final ZipEncoding entryEncoding =
669            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
670        if (hasUTF8Flag) {
671            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
672        }
673        ze.setGeneralPurposeBit(gpFlag);
674        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
675
676        off += SHORT;
677
678        //noinspection MagicConstant
679        ze.setMethod(ZipShort.getValue(cfhBuf, off));
680        off += SHORT;
681
682        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
683        ze.setTime(time);
684        off += WORD;
685
686        ze.setCrc(ZipLong.getValue(cfhBuf, off));
687        off += WORD;
688
689        ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
690        off += WORD;
691
692        ze.setSize(ZipLong.getValue(cfhBuf, off));
693        off += WORD;
694
695        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
696        off += SHORT;
697
698        final int extraLen = ZipShort.getValue(cfhBuf, off);
699        off += SHORT;
700
701        final int commentLen = ZipShort.getValue(cfhBuf, off);
702        off += SHORT;
703
704        final int diskStart = ZipShort.getValue(cfhBuf, off);
705        off += SHORT;
706
707        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
708        off += SHORT;
709
710        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
711        off += WORD;
712
713        final byte[] fileName = new byte[fileNameLen];
714        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
715        ze.setName(entryEncoding.decode(fileName), fileName);
716
717        // LFH offset,
718        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
719        // data offset will be filled later
720        entries.add(ze);
721
722        final byte[] cdExtraData = new byte[extraLen];
723        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
724        ze.setCentralDirectoryExtra(cdExtraData);
725
726        setSizesAndOffsetFromZip64Extra(ze, diskStart);
727
728        final byte[] comment = new byte[commentLen];
729        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
730        ze.setComment(entryEncoding.decode(comment));
731
732        if (!hasUTF8Flag && useUnicodeExtraFields) {
733            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
734        }
735    }
736
737    /**
738     * If the entry holds a Zip64 extended information extra field,
739     * read sizes from there if the entry's sizes are set to
740     * 0xFFFFFFFFF, do the same for the offset of the local file
741     * header.
742     *
743     * <p>Ensures the Zip64 extra either knows both compressed and
744     * uncompressed size or neither of both as the internal logic in
745     * ExtraFieldUtils forces the field to create local header data
746     * even if they are never used - and here a field with only one
747     * size would be invalid.</p>
748     */
749    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
750                                                 final int diskStart)
751        throws IOException {
752        final Zip64ExtendedInformationExtraField z64 =
753            (Zip64ExtendedInformationExtraField)
754            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
755        if (z64 != null) {
756            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
757            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
758            final boolean hasRelativeHeaderOffset =
759                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
760            z64.reparseCentralDirectoryData(hasUncompressedSize,
761                                            hasCompressedSize,
762                                            hasRelativeHeaderOffset,
763                                            diskStart == ZIP64_MAGIC_SHORT);
764
765            if (hasUncompressedSize) {
766                ze.setSize(z64.getSize().getLongValue());
767            } else if (hasCompressedSize) {
768                z64.setSize(new ZipEightByteInteger(ze.getSize()));
769            }
770
771            if (hasCompressedSize) {
772                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
773            } else if (hasUncompressedSize) {
774                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
775            }
776
777            if (hasRelativeHeaderOffset) {
778                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
779            }
780        }
781    }
782
783    /**
784     * Length of the "End of central directory record" - which is
785     * supposed to be the last structure of the archive - without file
786     * comment.
787     */
788    static final int MIN_EOCD_SIZE =
789        /* end of central dir signature    */ WORD
790        /* number of this disk             */ + SHORT
791        /* number of the disk with the     */
792        /* start of the central directory  */ + SHORT
793        /* total number of entries in      */
794        /* the central dir on this disk    */ + SHORT
795        /* total number of entries in      */
796        /* the central dir                 */ + SHORT
797        /* size of the central directory   */ + WORD
798        /* offset of start of central      */
799        /* directory with respect to       */
800        /* the starting disk number        */ + WORD
801        /* zipfile comment length          */ + SHORT;
802
803    /**
804     * Maximum length of the "End of central directory record" with a
805     * file comment.
806     */
807    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
808        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
809
810    /**
811     * Offset of the field that holds the location of the first
812     * central directory entry inside the "End of central directory
813     * record" relative to the start of the "End of central directory
814     * record".
815     */
816    private static final int CFD_LOCATOR_OFFSET =
817        /* end of central dir signature    */ WORD
818        /* number of this disk             */ + SHORT
819        /* number of the disk with the     */
820        /* start of the central directory  */ + SHORT
821        /* total number of entries in      */
822        /* the central dir on this disk    */ + SHORT
823        /* total number of entries in      */
824        /* the central dir                 */ + SHORT
825        /* size of the central directory   */ + WORD;
826
827    /**
828     * Length of the "Zip64 end of central directory locator" - which
829     * should be right in front of the "end of central directory
830     * record" if one is present at all.
831     */
832    private static final int ZIP64_EOCDL_LENGTH =
833        /* zip64 end of central dir locator sig */ WORD
834        /* number of the disk with the start    */
835        /* start of the zip64 end of            */
836        /* central directory                    */ + WORD
837        /* relative offset of the zip64         */
838        /* end of central directory record      */ + DWORD
839        /* total number of disks                */ + WORD;
840
841    /**
842     * Offset of the field that holds the location of the "Zip64 end
843     * of central directory record" inside the "Zip64 end of central
844     * directory locator" relative to the start of the "Zip64 end of
845     * central directory locator".
846     */
847    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
848        /* zip64 end of central dir locator sig */ WORD
849        /* number of the disk with the start    */
850        /* start of the zip64 end of            */
851        /* central directory                    */ + WORD;
852
853    /**
854     * Offset of the field that holds the location of the first
855     * central directory entry inside the "Zip64 end of central
856     * directory record" relative to the start of the "Zip64 end of
857     * central directory record".
858     */
859    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
860        /* zip64 end of central dir        */
861        /* signature                       */ WORD
862        /* size of zip64 end of central    */
863        /* directory record                */ + DWORD
864        /* version made by                 */ + SHORT
865        /* version needed to extract       */ + SHORT
866        /* number of this disk             */ + WORD
867        /* number of the disk with the     */
868        /* start of the central directory  */ + WORD
869        /* total number of entries in the  */
870        /* central directory on this disk  */ + DWORD
871        /* total number of entries in the  */
872        /* central directory               */ + DWORD
873        /* size of the central directory   */ + DWORD;
874
875    /**
876     * Searches for either the &quot;Zip64 end of central directory
877     * locator&quot; or the &quot;End of central dir record&quot;, parses
878     * it and positions the stream at the first central directory
879     * record.
880     */
881    private void positionAtCentralDirectory()
882        throws IOException {
883        positionAtEndOfCentralDirectoryRecord();
884        boolean found = false;
885        final boolean searchedForZip64EOCD =
886            archive.position() > ZIP64_EOCDL_LENGTH;
887        if (searchedForZip64EOCD) {
888            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
889            wordBbuf.rewind();
890            IOUtils.readFully(archive, wordBbuf);
891            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
892                                  wordBuf);
893        }
894        if (!found) {
895            // not a ZIP64 archive
896            if (searchedForZip64EOCD) {
897                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
898            }
899            positionAtCentralDirectory32();
900        } else {
901            positionAtCentralDirectory64();
902        }
903    }
904
905    /**
906     * Parses the &quot;Zip64 end of central directory locator&quot;,
907     * finds the &quot;Zip64 end of central directory record&quot; using the
908     * parsed information, parses that and positions the stream at the
909     * first central directory record.
910     *
911     * Expects stream to be positioned right behind the &quot;Zip64
912     * end of central directory locator&quot;'s signature.
913     */
914    private void positionAtCentralDirectory64()
915        throws IOException {
916        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
917                  - WORD /* signature has already been read */);
918        dwordBbuf.rewind();
919        IOUtils.readFully(archive, dwordBbuf);
920        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
921        wordBbuf.rewind();
922        IOUtils.readFully(archive, wordBbuf);
923        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
924            throw new ZipException("archive's ZIP64 end of central "
925                                   + "directory locator is corrupt.");
926        }
927        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
928                  - WORD /* signature has already been read */);
929        dwordBbuf.rewind();
930        IOUtils.readFully(archive, dwordBbuf);
931        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
932    }
933
934    /**
935     * Parses the &quot;End of central dir record&quot; and positions
936     * the stream at the first central directory record.
937     *
938     * Expects stream to be positioned at the beginning of the
939     * &quot;End of central dir record&quot;.
940     */
941    private void positionAtCentralDirectory32()
942        throws IOException {
943        skipBytes(CFD_LOCATOR_OFFSET);
944        wordBbuf.rewind();
945        IOUtils.readFully(archive, wordBbuf);
946        archive.position(ZipLong.getValue(wordBuf));
947    }
948
949    /**
950     * Searches for the and positions the stream at the start of the
951     * &quot;End of central dir record&quot;.
952     */
953    private void positionAtEndOfCentralDirectoryRecord()
954        throws IOException {
955        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
956                                             ZipArchiveOutputStream.EOCD_SIG);
957        if (!found) {
958            throw new ZipException("archive is not a ZIP archive");
959        }
960    }
961
962    /**
963     * Searches the archive backwards from minDistance to maxDistance
964     * for the given signature, positions the RandomaccessFile right
965     * at the signature if it has been found.
966     */
967    private boolean tryToLocateSignature(final long minDistanceFromEnd,
968                                         final long maxDistanceFromEnd,
969                                         final byte[] sig) throws IOException {
970        boolean found = false;
971        long off = archive.size() - minDistanceFromEnd;
972        final long stopSearching =
973            Math.max(0L, archive.size() - maxDistanceFromEnd);
974        if (off >= 0) {
975            for (; off >= stopSearching; off--) {
976                archive.position(off);
977                try {
978                    wordBbuf.rewind();
979                    IOUtils.readFully(archive, wordBbuf);
980                    wordBbuf.flip();
981                } catch (EOFException ex) {
982                    break;
983                }
984                int curr = wordBbuf.get();
985                if (curr == sig[POS_0]) {
986                    curr = wordBbuf.get();
987                    if (curr == sig[POS_1]) {
988                        curr = wordBbuf.get();
989                        if (curr == sig[POS_2]) {
990                            curr = wordBbuf.get();
991                            if (curr == sig[POS_3]) {
992                                found = true;
993                                break;
994                            }
995                        }
996                    }
997                }
998            }
999        }
1000        if (found) {
1001            archive.position(off);
1002        }
1003        return found;
1004    }
1005
1006    /**
1007     * Skips the given number of bytes or throws an EOFException if
1008     * skipping failed.
1009     */
1010    private void skipBytes(final int count) throws IOException {
1011        long currentPosition = archive.position();
1012        long newPosition = currentPosition + count;
1013        if (newPosition > archive.size()) {
1014            throw new EOFException();
1015        }
1016        archive.position(newPosition);
1017    }
1018
1019    /**
1020     * Number of bytes in local file header up to the &quot;length of
1021     * filename&quot; entry.
1022     */
1023    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1024        /* local file header signature     */ WORD
1025        /* version needed to extract       */ + SHORT
1026        /* general purpose bit flag        */ + SHORT
1027        /* compression method              */ + SHORT
1028        /* last mod file time              */ + SHORT
1029        /* last mod file date              */ + SHORT
1030        /* crc-32                          */ + WORD
1031        /* compressed size                 */ + WORD
1032        /* uncompressed size               */ + (long) WORD;
1033
1034    /**
1035     * Walks through all recorded entries and adds the data available
1036     * from the local file header.
1037     *
1038     * <p>Also records the offsets for the data to read from the
1039     * entries.</p>
1040     */
1041    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1042                                            entriesWithoutUTF8Flag)
1043        throws IOException {
1044        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1045            // entries is filled in populateFromCentralDirectory and
1046            // never modified
1047            final Entry ze = (Entry) zipArchiveEntry;
1048            final long offset = ze.getLocalHeaderOffset();
1049            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1050            wordBbuf.rewind();
1051            IOUtils.readFully(archive, wordBbuf);
1052            wordBbuf.flip();
1053            wordBbuf.get(shortBuf);
1054            final int fileNameLen = ZipShort.getValue(shortBuf);
1055            wordBbuf.get(shortBuf);
1056            final int extraFieldLen = ZipShort.getValue(shortBuf);
1057            skipBytes(fileNameLen);
1058            final byte[] localExtraData = new byte[extraFieldLen];
1059            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1060            ze.setExtra(localExtraData);
1061            ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1062                + SHORT + SHORT + fileNameLen + extraFieldLen);
1063            ze.setStreamContiguous(true);
1064
1065            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1066                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1067                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1068                                                         nc.comment);
1069            }
1070
1071            final String name = ze.getName();
1072            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1073            if (entriesOfThatName == null) {
1074                entriesOfThatName = new LinkedList<>();
1075                nameMap.put(name, entriesOfThatName);
1076            }
1077            entriesOfThatName.addLast(ze);
1078        }
1079    }
1080
1081    /**
1082     * Checks whether the archive starts with a LFH.  If it doesn't,
1083     * it may be an empty archive.
1084     */
1085    private boolean startsWithLocalFileHeader() throws IOException {
1086        archive.position(0);
1087        wordBbuf.rewind();
1088        IOUtils.readFully(archive, wordBbuf);
1089        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1090    }
1091
1092    /**
1093     * Creates new BoundedInputStream, according to implementation of
1094     * underlying archive channel.
1095     */
1096    private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1097        return archive instanceof FileChannel ?
1098            new BoundedFileChannelInputStream(start, remaining) :
1099            new BoundedInputStream(start, remaining);
1100    }
1101
1102    /**
1103     * InputStream that delegates requests to the underlying
1104     * SeekableByteChannel, making sure that only bytes from a certain
1105     * range can be read.
1106     */
1107    private class BoundedInputStream extends InputStream {
1108        private ByteBuffer singleByteBuffer;
1109        private final long end;
1110        private long loc;
1111
1112        BoundedInputStream(final long start, final long remaining) {
1113            this.end = start+remaining;
1114            if (this.end < start) {
1115                // check for potential vulnerability due to overflow
1116                throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1117            }
1118            loc = start;
1119        }
1120
1121        @Override
1122        public synchronized int read() throws IOException {
1123            if (loc >= end) {
1124                return -1;
1125            }
1126            if (singleByteBuffer == null) {
1127                singleByteBuffer = ByteBuffer.allocate(1);
1128            }
1129            else {
1130                singleByteBuffer.rewind();
1131            }
1132            int read = read(loc, singleByteBuffer);
1133            if (read < 0) {
1134                return read;
1135            }
1136            loc++;
1137            return singleByteBuffer.get() & 0xff;
1138        }
1139
1140        @Override
1141        public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1142            if (len <= 0) {
1143                return 0;
1144            }
1145
1146            if (len > end-loc) {
1147                if (loc >= end) {
1148                    return -1;
1149                }
1150                len = (int)(end-loc);
1151            }
1152
1153            ByteBuffer buf;
1154            buf = ByteBuffer.wrap(b, off, len);
1155            int ret = read(loc, buf);
1156            if (ret > 0) {
1157                loc += ret;
1158                return ret;
1159            }
1160            return ret;
1161        }
1162
1163        protected int read(long pos, ByteBuffer buf) throws IOException {
1164            int read;
1165            synchronized (archive) {
1166                archive.position(pos);
1167                read = archive.read(buf);
1168            }
1169            buf.flip();
1170            return read;
1171        }
1172    }
1173
1174    /**
1175     * Lock-free implementation of BoundedInputStream. The
1176     * implementation uses positioned reads on the underlying archive
1177     * file channel and therefore performs significantly faster in
1178     * concurrent environment.
1179     */
1180    private class BoundedFileChannelInputStream extends BoundedInputStream {
1181        private final FileChannel archive;
1182
1183        BoundedFileChannelInputStream(final long start, final long remaining) {
1184            super(start, remaining);
1185            archive = (FileChannel)ZipFile.this.archive;
1186        }
1187
1188        @Override
1189        protected int read(long pos, ByteBuffer buf) throws IOException {
1190            int read = archive.read(buf, pos);
1191            buf.flip();
1192            return read;
1193        }
1194    }
1195
1196    private static final class NameAndComment {
1197        private final byte[] name;
1198        private final byte[] comment;
1199        private NameAndComment(final byte[] name, final byte[] comment) {
1200            this.name = name;
1201            this.comment = comment;
1202        }
1203    }
1204
1205    /**
1206     * Compares two ZipArchiveEntries based on their offset within the archive.
1207     *
1208     * <p>Won't return any meaningful results if one of the entries
1209     * isn't part of the archive at all.</p>
1210     *
1211     * @since 1.1
1212     */
1213    private final Comparator<ZipArchiveEntry> offsetComparator =
1214        new Comparator<ZipArchiveEntry>() {
1215        @Override
1216        public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1217            if (e1 == e2) {
1218                return 0;
1219            }
1220
1221            final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1222            final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1223            if (ent1 == null) {
1224                return 1;
1225            }
1226            if (ent2 == null) {
1227                return -1;
1228            }
1229            final long val = (ent1.getLocalHeaderOffset()
1230                        - ent2.getLocalHeaderOffset());
1231            return val == 0 ? 0 : val < 0 ? -1 : +1;
1232        }
1233    };
1234
1235    /**
1236     * Extends ZipArchiveEntry to store the offset within the archive.
1237     */
1238    private static class Entry extends ZipArchiveEntry {
1239
1240        Entry() {
1241        }
1242
1243        @Override
1244        public int hashCode() {
1245            return 3 * super.hashCode()
1246                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1247        }
1248
1249        @Override
1250        public boolean equals(final Object other) {
1251            if (super.equals(other)) {
1252                // super.equals would return false if other were not an Entry
1253                final Entry otherEntry = (Entry) other;
1254                return getLocalHeaderOffset()
1255                        == otherEntry.getLocalHeaderOffset()
1256                    && getDataOffset()
1257                        == otherEntry.getDataOffset();
1258            }
1259            return false;
1260        }
1261    }
1262
1263    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
1264        StoredStatisticsStream(InputStream in) {
1265            super(in);
1266        }
1267
1268        @Override
1269        public long getCompressedCount() {
1270            return super.getBytesRead();
1271        }
1272
1273        @Override
1274        public long getUncompressedCount() {
1275            return getCompressedCount();
1276        }
1277    }
1278}