001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.math.BigInteger;
028import java.nio.ByteBuffer;
029import java.util.Arrays;
030import java.util.zip.CRC32;
031import java.util.zip.DataFormatException;
032import java.util.zip.Inflater;
033import java.util.zip.ZipEntry;
034import java.util.zip.ZipException;
035
036import org.apache.commons.compress.archivers.ArchiveEntry;
037import org.apache.commons.compress.archivers.ArchiveInputStream;
038import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
039import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
040import org.apache.commons.compress.utils.ArchiveUtils;
041import org.apache.commons.compress.utils.IOUtils;
042import org.apache.commons.compress.utils.InputStreamStatistics;
043
044import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
048
049/**
050 * Implements an input stream that can read Zip archives.
051 *
052 * <p>As of Apache Commons Compress it transparently supports Zip64
053 * extensions and thus individual entries and archives larger than 4
054 * GB or with more than 65536 entries.</p>
055 *
056 * <p>The {@link ZipFile} class is preferred when reading from files
057 * as {@link ZipArchiveInputStream} is limited by not being able to
058 * read the central directory header before returning entries.  In
059 * particular {@link ZipArchiveInputStream}</p>
060 *
061 * <ul>
062 *
063 *  <li>may return entries that are not part of the central directory
064 *  at all and shouldn't be considered part of the archive.</li>
065 *
066 *  <li>may return several entries with the same name.</li>
067 *
068 *  <li>will not return internal or external attributes.</li>
069 *
070 *  <li>may return incomplete extra field data.</li>
071 *
072 *  <li>may return unknown sizes and CRC values for entries until the
073 *  next entry has been reached if the archive uses the data
074 *  descriptor feature.</li>
075 *
076 * </ul>
077 *
078 * @see ZipFile
079 * @NotThreadSafe
080 */
081public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
082
083    /** The zip encoding to use for filenames and the file comment. */
084    private final ZipEncoding zipEncoding;
085
086    // the provided encoding (for unit tests)
087    final String encoding;
088
089    /** Whether to look for and use Unicode extra fields. */
090    private final boolean useUnicodeExtraFields;
091
092    /** Wrapped stream, will always be a PushbackInputStream. */
093    private final InputStream in;
094
095    /** Inflater used for all deflated entries. */
096    private final Inflater inf = new Inflater(true);
097
098    /** Buffer used to read from the wrapped stream. */
099    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
100
101    /** The entry that is currently being read. */
102    private CurrentEntry current = null;
103
104    /** Whether the stream has been closed. */
105    private boolean closed = false;
106
107    /** Whether the stream has reached the central directory - and thus found all entries. */
108    private boolean hitCentralDirectory = false;
109
110    /**
111     * When reading a stored entry that uses the data descriptor this
112     * stream has to read the full entry and caches it.  This is the
113     * cache.
114     */
115    private ByteArrayInputStream lastStoredEntry = null;
116
117    /** Whether the stream will try to read STORED entries that use a data descriptor. */
118    private boolean allowStoredEntriesWithDataDescriptor = false;
119
120    /** Count decompressed bytes for current entry */
121    private long uncompressedCount = 0;
122
123    private static final int LFH_LEN = 30;
124    /*
125      local file header signature     WORD
126      version needed to extract       SHORT
127      general purpose bit flag        SHORT
128      compression method              SHORT
129      last mod file time              SHORT
130      last mod file date              SHORT
131      crc-32                          WORD
132      compressed size                 WORD
133      uncompressed size               WORD
134      file name length                SHORT
135      extra field length              SHORT
136    */
137
138    private static final int CFH_LEN = 46;
139    /*
140        central file header signature   WORD
141        version made by                 SHORT
142        version needed to extract       SHORT
143        general purpose bit flag        SHORT
144        compression method              SHORT
145        last mod file time              SHORT
146        last mod file date              SHORT
147        crc-32                          WORD
148        compressed size                 WORD
149        uncompressed size               WORD
150        file name length                SHORT
151        extra field length              SHORT
152        file comment length             SHORT
153        disk number start               SHORT
154        internal file attributes        SHORT
155        external file attributes        WORD
156        relative offset of local header WORD
157    */
158
159    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
160
161    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
162    private final byte[] lfhBuf = new byte[LFH_LEN];
163    private final byte[] skipBuf = new byte[1024];
164    private final byte[] shortBuf = new byte[SHORT];
165    private final byte[] wordBuf = new byte[WORD];
166    private final byte[] twoDwordBuf = new byte[2 * DWORD];
167
168    private int entriesRead = 0;
169
170    /**
171     * Create an instance using UTF-8 encoding
172     * @param inputStream the stream to wrap
173     */
174    public ZipArchiveInputStream(final InputStream inputStream) {
175        this(inputStream, ZipEncodingHelper.UTF8);
176    }
177
178    /**
179     * Create an instance using the specified encoding
180     * @param inputStream the stream to wrap
181     * @param encoding the encoding to use for file names, use null
182     * for the platform's default encoding
183     * @since 1.5
184     */
185    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
186        this(inputStream, encoding, true);
187    }
188
189    /**
190     * Create an instance using the specified encoding
191     * @param inputStream the stream to wrap
192     * @param encoding the encoding to use for file names, use null
193     * for the platform's default encoding
194     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
195     * Extra Fields (if present) to set the file names.
196     */
197    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
198        this(inputStream, encoding, useUnicodeExtraFields, false);
199    }
200
201    /**
202     * Create an instance using the specified encoding
203     * @param inputStream the stream to wrap
204     * @param encoding the encoding to use for file names, use null
205     * for the platform's default encoding
206     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
207     * Extra Fields (if present) to set the file names.
208     * @param allowStoredEntriesWithDataDescriptor whether the stream
209     * will try to read STORED entries that use a data descriptor
210     * @since 1.1
211     */
212    public ZipArchiveInputStream(final InputStream inputStream,
213                                 final String encoding,
214                                 final boolean useUnicodeExtraFields,
215                                 final boolean allowStoredEntriesWithDataDescriptor) {
216        this.encoding = encoding;
217        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
218        this.useUnicodeExtraFields = useUnicodeExtraFields;
219        in = new PushbackInputStream(inputStream, buf.capacity());
220        this.allowStoredEntriesWithDataDescriptor =
221            allowStoredEntriesWithDataDescriptor;
222        // haven't read anything so far
223        buf.limit(0);
224    }
225
226    public ZipArchiveEntry getNextZipEntry() throws IOException {
227        uncompressedCount = 0;
228
229        boolean firstEntry = true;
230        if (closed || hitCentralDirectory) {
231            return null;
232        }
233        if (current != null) {
234            closeEntry();
235            firstEntry = false;
236        }
237
238        long currentHeaderOffset = getBytesRead();
239        try {
240            if (firstEntry) {
241                // split archives have a special signature before the
242                // first local file header - look for it and fail with
243                // the appropriate error message if this is a split
244                // archive.
245                readFirstLocalFileHeader(lfhBuf);
246            } else {
247                readFully(lfhBuf);
248            }
249        } catch (final EOFException e) {
250            return null;
251        }
252
253        final ZipLong sig = new ZipLong(lfhBuf);
254        if (!sig.equals(ZipLong.LFH_SIG)) {
255            if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
256                hitCentralDirectory = true;
257                skipRemainderOfArchive();
258                return null;
259            }
260            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
261        }
262
263        int off = WORD;
264        current = new CurrentEntry();
265
266        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
267        off += SHORT;
268        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
269
270        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
271        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
272        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
273        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
274        current.entry.setGeneralPurposeBit(gpFlag);
275
276        off += SHORT;
277
278        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
279        off += SHORT;
280
281        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
282        current.entry.setTime(time);
283        off += WORD;
284
285        ZipLong size = null, cSize = null;
286        if (!current.hasDataDescriptor) {
287            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
288            off += WORD;
289
290            cSize = new ZipLong(lfhBuf, off);
291            off += WORD;
292
293            size = new ZipLong(lfhBuf, off);
294            off += WORD;
295        } else {
296            off += 3 * WORD;
297        }
298
299        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
300
301        off += SHORT;
302
303        final int extraLen = ZipShort.getValue(lfhBuf, off);
304        off += SHORT; // NOSONAR - assignment as documentation
305
306        final byte[] fileName = new byte[fileNameLen];
307        readFully(fileName);
308        current.entry.setName(entryEncoding.decode(fileName), fileName);
309        if (hasUTF8Flag) {
310            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
311        }
312
313        final byte[] extraData = new byte[extraLen];
314        readFully(extraData);
315        current.entry.setExtra(extraData);
316
317        if (!hasUTF8Flag && useUnicodeExtraFields) {
318            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
319        }
320
321        processZip64Extra(size, cSize);
322
323        current.entry.setLocalHeaderOffset(currentHeaderOffset);
324        current.entry.setDataOffset(getBytesRead());
325        current.entry.setStreamContiguous(true);
326
327        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
328        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
329            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
330                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
331                switch (m) {
332                case UNSHRINKING:
333                    current.in = new UnshrinkingInputStream(bis);
334                    break;
335                case IMPLODING:
336                    current.in = new ExplodingInputStream(
337                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
338                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
339                        bis);
340                    break;
341                case BZIP2:
342                    current.in = new BZip2CompressorInputStream(bis);
343                    break;
344                case ENHANCED_DEFLATED:
345                    current.in = new Deflate64CompressorInputStream(bis);
346                    break;
347                default:
348                    // we should never get here as all supported methods have been covered
349                    // will cause an error when read is invoked, don't throw an exception here so people can
350                    // skip unsupported entries
351                    break;
352                }
353            }
354        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
355            current.in = new Deflate64CompressorInputStream(in);
356        }
357
358        entriesRead++;
359        return current.entry;
360    }
361
362    /**
363     * Fills the given array with the first local file header and
364     * deals with splitting/spanning markers that may prefix the first
365     * LFH.
366     */
367    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
368        readFully(lfh);
369        final ZipLong sig = new ZipLong(lfh);
370        if (sig.equals(ZipLong.DD_SIG)) {
371            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
372        }
373
374        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
375            // The archive is not really split as only one segment was
376            // needed in the end.  Just skip over the marker.
377            final byte[] missedLfhBytes = new byte[4];
378            readFully(missedLfhBytes);
379            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
380            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
381        }
382    }
383
384    /**
385     * Records whether a Zip64 extra is present and sets the size
386     * information from it if sizes are 0xFFFFFFFF and the entry
387     * doesn't use a data descriptor.
388     */
389    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
390        final Zip64ExtendedInformationExtraField z64 =
391            (Zip64ExtendedInformationExtraField)
392            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
393        current.usesZip64 = z64 != null;
394        if (!current.hasDataDescriptor) {
395            if (z64 != null // same as current.usesZip64 but avoids NPE warning
396                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
397                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
398                current.entry.setSize(z64.getSize().getLongValue());
399            } else {
400                current.entry.setCompressedSize(cSize.getValue());
401                current.entry.setSize(size.getValue());
402            }
403        }
404    }
405
406    @Override
407    public ArchiveEntry getNextEntry() throws IOException {
408        return getNextZipEntry();
409    }
410
411    /**
412     * Whether this class is able to read the given entry.
413     *
414     * <p>May return false if it is set up to use encryption or a
415     * compression method that hasn't been implemented yet.</p>
416     * @since 1.1
417     */
418    @Override
419    public boolean canReadEntryData(final ArchiveEntry ae) {
420        if (ae instanceof ZipArchiveEntry) {
421            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
422            return ZipUtil.canHandleEntryData(ze)
423                && supportsDataDescriptorFor(ze)
424                && supportsCompressedSizeFor(ze);
425        }
426        return false;
427    }
428
429    @Override
430    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
431        if (closed) {
432            throw new IOException("The stream is closed");
433        }
434
435        if (current == null) {
436            return -1;
437        }
438
439        // avoid int overflow, check null buffer
440        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
441            throw new ArrayIndexOutOfBoundsException();
442        }
443
444        ZipUtil.checkRequestedFeatures(current.entry);
445        if (!supportsDataDescriptorFor(current.entry)) {
446            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
447                    current.entry);
448        }
449        if (!supportsCompressedSizeFor(current.entry)) {
450            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
451                    current.entry);
452        }
453
454        int read;
455        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
456            read = readStored(buffer, offset, length);
457        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
458            read = readDeflated(buffer, offset, length);
459        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
460                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
461                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
462                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
463            read = current.in.read(buffer, offset, length);
464        } else {
465            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
466                    current.entry);
467        }
468
469        if (read >= 0) {
470            current.crc.update(buffer, offset, read);
471            uncompressedCount += read;
472        }
473
474        return read;
475    }
476
477    /**
478     * @since 1.17
479     */
480    @Override
481    public long getCompressedCount() {
482        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
483            return current.bytesRead;
484        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
485            return getBytesInflated();
486        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
487            return ((UnshrinkingInputStream) current.in).getCompressedCount();
488        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
489            return ((ExplodingInputStream) current.in).getCompressedCount();
490        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
491            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
492        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
493            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
494        } else {
495            return -1;
496        }
497    }
498
499    /**
500     * @since 1.17
501     */
502    @Override
503    public long getUncompressedCount() {
504        return uncompressedCount;
505    }
506
507    /**
508     * Implementation of read for STORED entries.
509     */
510    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
511
512        if (current.hasDataDescriptor) {
513            if (lastStoredEntry == null) {
514                readStoredEntry();
515            }
516            return lastStoredEntry.read(buffer, offset, length);
517        }
518
519        final long csize = current.entry.getSize();
520        if (current.bytesRead >= csize) {
521            return -1;
522        }
523
524        if (buf.position() >= buf.limit()) {
525            buf.position(0);
526            final int l = in.read(buf.array());
527            if (l == -1) {
528                buf.limit(0);
529                throw new IOException("Truncated ZIP file");
530            }
531            buf.limit(l);
532
533            count(l);
534            current.bytesReadFromStream += l;
535        }
536
537        int toRead = Math.min(buf.remaining(), length);
538        if ((csize - current.bytesRead) < toRead) {
539            // if it is smaller than toRead then it fits into an int
540            toRead = (int) (csize - current.bytesRead);
541        }
542        buf.get(buffer, offset, toRead);
543        current.bytesRead += toRead;
544        return toRead;
545    }
546
547    /**
548     * Implementation of read for DEFLATED entries.
549     */
550    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
551        final int read = readFromInflater(buffer, offset, length);
552        if (read <= 0) {
553            if (inf.finished()) {
554                return -1;
555            } else if (inf.needsDictionary()) {
556                throw new ZipException("This archive needs a preset dictionary"
557                                       + " which is not supported by Commons"
558                                       + " Compress.");
559            } else if (read == -1) {
560                throw new IOException("Truncated ZIP file");
561            }
562        }
563        return read;
564    }
565
566    /**
567     * Potentially reads more bytes to fill the inflater's buffer and
568     * reads from it.
569     */
570    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
571        int read = 0;
572        do {
573            if (inf.needsInput()) {
574                final int l = fill();
575                if (l > 0) {
576                    current.bytesReadFromStream += buf.limit();
577                } else if (l == -1) {
578                    return -1;
579                } else {
580                    break;
581                }
582            }
583            try {
584                read = inf.inflate(buffer, offset, length);
585            } catch (final DataFormatException e) {
586                throw (IOException) new ZipException(e.getMessage()).initCause(e);
587            }
588        } while (read == 0 && inf.needsInput());
589        return read;
590    }
591
592    @Override
593    public void close() throws IOException {
594        if (!closed) {
595            closed = true;
596            try {
597                in.close();
598            } finally {
599                inf.end();
600            }
601        }
602    }
603
604    /**
605     * Skips over and discards value bytes of data from this input
606     * stream.
607     *
608     * <p>This implementation may end up skipping over some smaller
609     * number of bytes, possibly 0, if and only if it reaches the end
610     * of the underlying stream.</p>
611     *
612     * <p>The actual number of bytes skipped is returned.</p>
613     *
614     * @param value the number of bytes to be skipped.
615     * @return the actual number of bytes skipped.
616     * @throws IOException - if an I/O error occurs.
617     * @throws IllegalArgumentException - if value is negative.
618     */
619    @Override
620    public long skip(final long value) throws IOException {
621        if (value >= 0) {
622            long skipped = 0;
623            while (skipped < value) {
624                final long rem = value - skipped;
625                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
626                if (x == -1) {
627                    return skipped;
628                }
629                skipped += x;
630            }
631            return skipped;
632        }
633        throw new IllegalArgumentException();
634    }
635
636    /**
637     * Checks if the signature matches what is expected for a zip file.
638     * Does not currently handle self-extracting zips which may have arbitrary
639     * leading content.
640     *
641     * @param signature the bytes to check
642     * @param length    the number of bytes to check
643     * @return true, if this stream is a zip archive stream, false otherwise
644     */
645    public static boolean matches(final byte[] signature, final int length) {
646        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
647            return false;
648        }
649
650        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
651            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
652            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
653            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
654    }
655
656    private static boolean checksig(final byte[] signature, final byte[] expected) {
657        for (int i = 0; i < expected.length; i++) {
658            if (signature[i] != expected[i]) {
659                return false;
660            }
661        }
662        return true;
663    }
664
665    /**
666     * Closes the current ZIP archive entry and positions the underlying
667     * stream to the beginning of the next entry. All per-entry variables
668     * and data structures are cleared.
669     * <p>
670     * If the compressed size of this entry is included in the entry header,
671     * then any outstanding bytes are simply skipped from the underlying
672     * stream without uncompressing them. This allows an entry to be safely
673     * closed even if the compression method is unsupported.
674     * <p>
675     * In case we don't know the compressed size of this entry or have
676     * already buffered too much data from the underlying stream to support
677     * uncompression, then the uncompression process is completed and the
678     * end position of the stream is adjusted based on the result of that
679     * process.
680     *
681     * @throws IOException if an error occurs
682     */
683    private void closeEntry() throws IOException {
684        if (closed) {
685            throw new IOException("The stream is closed");
686        }
687        if (current == null) {
688            return;
689        }
690
691        // Ensure all entry bytes are read
692        if (currentEntryHasOutstandingBytes()) {
693            drainCurrentEntryData();
694        } else {
695            // this is guaranteed to exhaust the stream
696            skip(Long.MAX_VALUE); //NOSONAR
697
698            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
699                       ? getBytesInflated() : current.bytesRead;
700
701            // this is at most a single read() operation and can't
702            // exceed the range of int
703            final int diff = (int) (current.bytesReadFromStream - inB);
704
705            // Pushback any required bytes
706            if (diff > 0) {
707                pushback(buf.array(), buf.limit() - diff, diff);
708                current.bytesReadFromStream -= diff;
709            }
710
711            // Drain remainder of entry if not all data bytes were required
712            if (currentEntryHasOutstandingBytes()) {
713                drainCurrentEntryData();
714            }
715        }
716
717        if (lastStoredEntry == null && current.hasDataDescriptor) {
718            readDataDescriptor();
719        }
720
721        inf.reset();
722        buf.clear().flip();
723        current = null;
724        lastStoredEntry = null;
725    }
726
727    /**
728     * If the compressed size of the current entry is included in the entry header
729     * and there are any outstanding bytes in the underlying stream, then
730     * this returns true.
731     *
732     * @return true, if current entry is determined to have outstanding bytes, false otherwise
733     */
734    private boolean currentEntryHasOutstandingBytes() {
735        return current.bytesReadFromStream <= current.entry.getCompressedSize()
736                && !current.hasDataDescriptor;
737    }
738
739    /**
740     * Read all data of the current entry from the underlying stream
741     * that hasn't been read, yet.
742     */
743    private void drainCurrentEntryData() throws IOException {
744        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
745        while (remaining > 0) {
746            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
747            if (n < 0) {
748                throw new EOFException("Truncated ZIP entry: "
749                                       + ArchiveUtils.sanitize(current.entry.getName()));
750            }
751            count(n);
752            remaining -= n;
753        }
754    }
755
756    /**
757     * Get the number of bytes Inflater has actually processed.
758     *
759     * <p>for Java &lt; Java7 the getBytes* methods in
760     * Inflater/Deflater seem to return unsigned ints rather than
761     * longs that start over with 0 at 2^32.</p>
762     *
763     * <p>The stream knows how many bytes it has read, but not how
764     * many the Inflater actually consumed - it should be between the
765     * total number of bytes read for the entry and the total number
766     * minus the last read operation.  Here we just try to make the
767     * value close enough to the bytes we've read by assuming the
768     * number of bytes consumed must be smaller than (or equal to) the
769     * number of bytes read but not smaller by more than 2^32.</p>
770     */
771    private long getBytesInflated() {
772        long inB = inf.getBytesRead();
773        if (current.bytesReadFromStream >= TWO_EXP_32) {
774            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
775                inB += TWO_EXP_32;
776            }
777        }
778        return inB;
779    }
780
781    private int fill() throws IOException {
782        if (closed) {
783            throw new IOException("The stream is closed");
784        }
785        final int length = in.read(buf.array());
786        if (length > 0) {
787            buf.limit(length);
788            count(buf.limit());
789            inf.setInput(buf.array(), 0, buf.limit());
790        }
791        return length;
792    }
793
794    private void readFully(final byte[] b) throws IOException {
795        readFully(b, 0);
796    }
797
798    private void readFully(final byte[] b, final int off) throws IOException {
799        final int len = b.length - off;
800        final int count = IOUtils.readFully(in, b, off, len);
801        count(count);
802        if (count < len) {
803            throw new EOFException();
804        }
805    }
806
807    private void readDataDescriptor() throws IOException {
808        readFully(wordBuf);
809        ZipLong val = new ZipLong(wordBuf);
810        if (ZipLong.DD_SIG.equals(val)) {
811            // data descriptor with signature, skip sig
812            readFully(wordBuf);
813            val = new ZipLong(wordBuf);
814        }
815        current.entry.setCrc(val.getValue());
816
817        // if there is a ZIP64 extra field, sizes are eight bytes
818        // each, otherwise four bytes each.  Unfortunately some
819        // implementations - namely Java7 - use eight bytes without
820        // using a ZIP64 extra field -
821        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
822
823        // just read 16 bytes and check whether bytes nine to twelve
824        // look like one of the signatures of what could follow a data
825        // descriptor (ignoring archive decryption headers for now).
826        // If so, push back eight bytes and assume sizes are four
827        // bytes, otherwise sizes are eight bytes each.
828        readFully(twoDwordBuf);
829        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
830        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
831            pushback(twoDwordBuf, DWORD, DWORD);
832            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
833            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
834        } else {
835            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
836            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
837        }
838    }
839
840    /**
841     * Whether this entry requires a data descriptor this library can work with.
842     *
843     * @return true if allowStoredEntriesWithDataDescriptor is true,
844     * the entry doesn't require any data descriptor or the method is
845     * DEFLATED or ENHANCED_DEFLATED.
846     */
847    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
848        return !entry.getGeneralPurposeBit().usesDataDescriptor()
849
850                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
851                || entry.getMethod() == ZipEntry.DEFLATED
852                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
853    }
854
855    /**
856     * Whether the compressed size for the entry is either known or
857     * not required by the compression method being used.
858     */
859    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
860        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
861            || entry.getMethod() == ZipEntry.DEFLATED
862            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
863            || (entry.getGeneralPurposeBit().usesDataDescriptor()
864                && allowStoredEntriesWithDataDescriptor
865                && entry.getMethod() == ZipEntry.STORED);
866    }
867
868    /**
869     * Caches a stored entry that uses the data descriptor.
870     *
871     * <ul>
872     *   <li>Reads a stored entry until the signature of a local file
873     *     header, central directory header or data descriptor has been
874     *     found.</li>
875     *   <li>Stores all entry data in lastStoredEntry.</p>
876     *   <li>Rewinds the stream to position at the data
877     *     descriptor.</li>
878     *   <li>reads the data descriptor</li>
879     * </ul>
880     *
881     * <p>After calling this method the entry should know its size,
882     * the entry's data is cached and the stream is positioned at the
883     * next local file or central directory header.</p>
884     */
885    private void readStoredEntry() throws IOException {
886        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
887        int off = 0;
888        boolean done = false;
889
890        // length of DD without signature
891        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
892
893        while (!done) {
894            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
895            if (r <= 0) {
896                // read the whole archive without ever finding a
897                // central directory
898                throw new IOException("Truncated ZIP file");
899            }
900            if (r + off < 4) {
901                // buffer too small to check for a signature, loop
902                off += r;
903                continue;
904            }
905
906            done = bufferContainsSignature(bos, off, r, ddLen);
907            if (!done) {
908                off = cacheBytesRead(bos, off, r, ddLen);
909            }
910        }
911
912        final byte[] b = bos.toByteArray();
913        lastStoredEntry = new ByteArrayInputStream(b);
914    }
915
916    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
917    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
918    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
919
920    /**
921     * Checks whether the current buffer contains the signature of a
922     * &quot;data descriptor&quot;, &quot;local file header&quot; or
923     * &quot;central directory entry&quot;.
924     *
925     * <p>If it contains such a signature, reads the data descriptor
926     * and positions the stream right after the data descriptor.</p>
927     */
928    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
929            throws IOException {
930
931        boolean done = false;
932        int readTooMuch = 0;
933        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
934            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
935                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
936                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
937                    // found a LFH or CFH:
938                    readTooMuch = offset + lastRead - i - expectedDDLen;
939                    done = true;
940                }
941                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
942                    // found DD:
943                    readTooMuch = offset + lastRead - i;
944                    done = true;
945                }
946                if (done) {
947                    // * push back bytes read in excess as well as the data
948                    //   descriptor
949                    // * copy the remaining bytes to cache
950                    // * read data descriptor
951                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
952                    bos.write(buf.array(), 0, i);
953                    readDataDescriptor();
954                }
955            }
956        }
957        return done;
958    }
959
960    /**
961     * If the last read bytes could hold a data descriptor and an
962     * incomplete signature then save the last bytes to the front of
963     * the buffer and cache everything in front of the potential data
964     * descriptor into the given ByteArrayOutputStream.
965     *
966     * <p>Data descriptor plus incomplete signature (3 bytes in the
967     * worst case) can be 20 bytes max.</p>
968     */
969    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
970        final int cacheable = offset + lastRead - expecteDDLen - 3;
971        if (cacheable > 0) {
972            bos.write(buf.array(), 0, cacheable);
973            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
974            offset = expecteDDLen + 3;
975        } else {
976            offset += lastRead;
977        }
978        return offset;
979    }
980
981    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
982        ((PushbackInputStream) in).unread(buf, offset, length);
983        pushedBackBytes(length);
984    }
985
986    // End of Central Directory Record
987    //   end of central dir signature    WORD
988    //   number of this disk             SHORT
989    //   number of the disk with the
990    //   start of the central directory  SHORT
991    //   total number of entries in the
992    //   central directory on this disk  SHORT
993    //   total number of entries in
994    //   the central directory           SHORT
995    //   size of the central directory   WORD
996    //   offset of start of central
997    //   directory with respect to
998    //   the starting disk number        WORD
999    //   .ZIP file comment length        SHORT
1000    //   .ZIP file comment               up to 64KB
1001    //
1002
1003    /**
1004     * Reads the stream until it find the "End of central directory
1005     * record" and consumes it as well.
1006     */
1007    private void skipRemainderOfArchive() throws IOException {
1008        // skip over central directory. One LFH has been read too much
1009        // already.  The calculation discounts file names and extra
1010        // data so it will be too short.
1011        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1012        findEocdRecord();
1013        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1014        readFully(shortBuf);
1015        // file comment
1016        realSkip(ZipShort.getValue(shortBuf));
1017    }
1018
1019    /**
1020     * Reads forward until the signature of the &quot;End of central
1021     * directory&quot; record is found.
1022     */
1023    private void findEocdRecord() throws IOException {
1024        int currentByte = -1;
1025        boolean skipReadCall = false;
1026        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1027            skipReadCall = false;
1028            if (!isFirstByteOfEocdSig(currentByte)) {
1029                continue;
1030            }
1031            currentByte = readOneByte();
1032            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1033                if (currentByte == -1) {
1034                    break;
1035                }
1036                skipReadCall = isFirstByteOfEocdSig(currentByte);
1037                continue;
1038            }
1039            currentByte = readOneByte();
1040            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1041                if (currentByte == -1) {
1042                    break;
1043                }
1044                skipReadCall = isFirstByteOfEocdSig(currentByte);
1045                continue;
1046            }
1047            currentByte = readOneByte();
1048            if (currentByte == -1
1049                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1050                break;
1051            }
1052            skipReadCall = isFirstByteOfEocdSig(currentByte);
1053        }
1054    }
1055
1056    /**
1057     * Skips bytes by reading from the underlying stream rather than
1058     * the (potentially inflating) archive stream - which {@link
1059     * #skip} would do.
1060     *
1061     * Also updates bytes-read counter.
1062     */
1063    private void realSkip(final long value) throws IOException {
1064        if (value >= 0) {
1065            long skipped = 0;
1066            while (skipped < value) {
1067                final long rem = value - skipped;
1068                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1069                if (x == -1) {
1070                    return;
1071                }
1072                count(x);
1073                skipped += x;
1074            }
1075            return;
1076        }
1077        throw new IllegalArgumentException();
1078    }
1079
1080    /**
1081     * Reads bytes by reading from the underlying stream rather than
1082     * the (potentially inflating) archive stream - which {@link #read} would do.
1083     *
1084     * Also updates bytes-read counter.
1085     */
1086    private int readOneByte() throws IOException {
1087        final int b = in.read();
1088        if (b != -1) {
1089            count(1);
1090        }
1091        return b;
1092    }
1093
1094    private boolean isFirstByteOfEocdSig(final int b) {
1095        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1096    }
1097
1098    private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1099        'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1100    };
1101    private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1102
1103    /**
1104     * Checks whether this might be an APK Signing Block.
1105     *
1106     * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1107     * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1108     * and if we've found it, return true.</p>
1109     *
1110     * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1111     * the local file header of the next entry.
1112     *
1113     * @return true if this looks like a APK signing block
1114     *
1115     * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1116     */
1117    private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1118        // length of block excluding the size field itself
1119        BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1120        // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1121        // also subtract 16 bytes in order to position us at the magic string
1122        BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1123            - APK_SIGNING_BLOCK_MAGIC.length));
1124        byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1125
1126        try {
1127            if (toSkip.signum() < 0) {
1128                // suspectLocalFileHeader contains the start of suspect magic string
1129                int off = suspectLocalFileHeader.length + toSkip.intValue();
1130                // length was shorter than magic length
1131                if (off < DWORD) {
1132                    return false;
1133                }
1134                int bytesInBuffer = Math.abs(toSkip.intValue());
1135                System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1136                if (bytesInBuffer < magic.length) {
1137                    readFully(magic, bytesInBuffer);
1138                }
1139            } else {
1140                while (toSkip.compareTo(LONG_MAX) > 0) {
1141                    realSkip(Long.MAX_VALUE);
1142                    toSkip = toSkip.add(LONG_MAX.negate());
1143                }
1144                realSkip(toSkip.longValue());
1145                readFully(magic);
1146            }
1147        } catch (EOFException ex) {
1148            // length was invalid
1149            return false;
1150        }
1151        return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1152    }
1153
1154    /**
1155     * Structure collecting information for the entry that is
1156     * currently being read.
1157     */
1158    private static final class CurrentEntry {
1159
1160        /**
1161         * Current ZIP entry.
1162         */
1163        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1164
1165        /**
1166         * Does the entry use a data descriptor?
1167         */
1168        private boolean hasDataDescriptor;
1169
1170        /**
1171         * Does the entry have a ZIP64 extended information extra field.
1172         */
1173        private boolean usesZip64;
1174
1175        /**
1176         * Number of bytes of entry content read by the client if the
1177         * entry is STORED.
1178         */
1179        private long bytesRead;
1180
1181        /**
1182         * Number of bytes of entry content read from the stream.
1183         *
1184         * <p>This may be more than the actual entry's length as some
1185         * stuff gets buffered up and needs to be pushed back when the
1186         * end of the entry has been reached.</p>
1187         */
1188        private long bytesReadFromStream;
1189
1190        /**
1191         * The checksum calculated as the current entry is read.
1192         */
1193        private final CRC32 crc = new CRC32();
1194
1195        /**
1196         * The input stream decompressing the data for shrunk and imploded entries.
1197         */
1198        private InputStream in;
1199    }
1200
1201    /**
1202     * Bounded input stream adapted from commons-io
1203     */
1204    private class BoundedInputStream extends InputStream {
1205
1206        /** the wrapped input stream */
1207        private final InputStream in;
1208
1209        /** the max length to provide */
1210        private final long max;
1211
1212        /** the number of bytes already returned */
1213        private long pos = 0;
1214
1215        /**
1216         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1217         * stream and limits it to a certain size.
1218         *
1219         * @param in The wrapped input stream
1220         * @param size The maximum number of bytes to return
1221         */
1222        public BoundedInputStream(final InputStream in, final long size) {
1223            this.max = size;
1224            this.in = in;
1225        }
1226
1227        @Override
1228        public int read() throws IOException {
1229            if (max >= 0 && pos >= max) {
1230                return -1;
1231            }
1232            final int result = in.read();
1233            pos++;
1234            count(1);
1235            current.bytesReadFromStream++;
1236            return result;
1237        }
1238
1239        @Override
1240        public int read(final byte[] b) throws IOException {
1241            return this.read(b, 0, b.length);
1242        }
1243
1244        @Override
1245        public int read(final byte[] b, final int off, final int len) throws IOException {
1246            if (max >= 0 && pos >= max) {
1247                return -1;
1248            }
1249            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1250            final int bytesRead = in.read(b, off, (int) maxRead);
1251
1252            if (bytesRead == -1) {
1253                return -1;
1254            }
1255
1256            pos += bytesRead;
1257            count(bytesRead);
1258            current.bytesReadFromStream += bytesRead;
1259            return bytesRead;
1260        }
1261
1262        @Override
1263        public long skip(final long n) throws IOException {
1264            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1265            final long skippedBytes = IOUtils.skip(in, toSkip);
1266            pos += skippedBytes;
1267            return skippedBytes;
1268        }
1269
1270        @Override
1271        public int available() throws IOException {
1272            if (max >= 0 && pos >= max) {
1273                return 0;
1274            }
1275            return in.available();
1276        }
1277    }
1278}