001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.ByteBuffer; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.file.Files; 032import java.nio.file.StandardOpenOption; 033import java.util.Arrays; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.Enumeration; 037import java.util.EnumSet; 038import java.util.HashMap; 039import java.util.LinkedList; 040import java.util.List; 041import java.util.Map; 042import java.util.zip.Inflater; 043import java.util.zip.ZipException; 044 045import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 046import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 047import org.apache.commons.compress.utils.CountingInputStream; 048import org.apache.commons.compress.utils.IOUtils; 049import org.apache.commons.compress.utils.InputStreamStatistics; 050 051import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 052import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 053import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 054import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 055import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 056 057/** 058 * Replacement for <code>java.util.ZipFile</code>. 059 * 060 * <p>This class adds support for file name encodings other than UTF-8 061 * (which is required to work on ZIP files created by native zip tools 062 * and is able to skip a preamble like the one found in self 063 * extracting archives. Furthermore it returns instances of 064 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 065 * instead of <code>java.util.zip.ZipEntry</code>.</p> 066 * 067 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 068 * have to reimplement all methods anyway. Like 069 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 070 * covers and supports compressed and uncompressed entries. As of 071 * Apache Commons Compress 1.3 it also transparently supports Zip64 072 * extensions and thus individual entries and archives larger than 4 073 * GB or with more than 65536 entries.</p> 074 * 075 * <p>The method signatures mimic the ones of 076 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 077 * 078 * <ul> 079 * <li>There is no getName method.</li> 080 * <li>entries has been renamed to getEntries.</li> 081 * <li>getEntries and getEntry return 082 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 083 * instances.</li> 084 * <li>close is allowed to throw IOException.</li> 085 * </ul> 086 * 087 */ 088public class ZipFile implements Closeable { 089 private static final int HASH_SIZE = 509; 090 static final int NIBLET_MASK = 0x0f; 091 static final int BYTE_SHIFT = 8; 092 private static final int POS_0 = 0; 093 private static final int POS_1 = 1; 094 private static final int POS_2 = 2; 095 private static final int POS_3 = 3; 096 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 097 098 /** 099 * List of entries in the order they appear inside the central 100 * directory. 101 */ 102 private final List<ZipArchiveEntry> entries = 103 new LinkedList<>(); 104 105 /** 106 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 107 */ 108 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 109 new HashMap<>(HASH_SIZE); 110 111 /** 112 * The encoding to use for filenames and the file comment. 113 * 114 * <p>For a list of possible values see <a 115 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 116 * Defaults to UTF-8.</p> 117 */ 118 private final String encoding; 119 120 /** 121 * The zip encoding to use for filenames and the file comment. 122 */ 123 private final ZipEncoding zipEncoding; 124 125 /** 126 * File name of actual source. 127 */ 128 private final String archiveName; 129 130 /** 131 * The actual data source. 132 */ 133 private final SeekableByteChannel archive; 134 135 /** 136 * Whether to look for and use Unicode extra fields. 137 */ 138 private final boolean useUnicodeExtraFields; 139 140 /** 141 * Whether the file is closed. 142 */ 143 private volatile boolean closed = true; 144 145 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 146 private final byte[] dwordBuf = new byte[DWORD]; 147 private final byte[] wordBuf = new byte[WORD]; 148 private final byte[] cfhBuf = new byte[CFH_LEN]; 149 private final byte[] shortBuf = new byte[SHORT]; 150 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 151 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 152 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 153 154 /** 155 * Opens the given file for reading, assuming "UTF8" for file names. 156 * 157 * @param f the archive. 158 * 159 * @throws IOException if an error occurs while reading the file. 160 */ 161 public ZipFile(final File f) throws IOException { 162 this(f, ZipEncodingHelper.UTF8); 163 } 164 165 /** 166 * Opens the given file for reading, assuming "UTF8". 167 * 168 * @param name name of the archive. 169 * 170 * @throws IOException if an error occurs while reading the file. 171 */ 172 public ZipFile(final String name) throws IOException { 173 this(new File(name), ZipEncodingHelper.UTF8); 174 } 175 176 /** 177 * Opens the given file for reading, assuming the specified 178 * encoding for file names, scanning unicode extra fields. 179 * 180 * @param name name of the archive. 181 * @param encoding the encoding to use for file names, use null 182 * for the platform's default encoding 183 * 184 * @throws IOException if an error occurs while reading the file. 185 */ 186 public ZipFile(final String name, final String encoding) throws IOException { 187 this(new File(name), encoding, true); 188 } 189 190 /** 191 * Opens the given file for reading, assuming the specified 192 * encoding for file names and scanning for unicode extra fields. 193 * 194 * @param f the archive. 195 * @param encoding the encoding to use for file names, use null 196 * for the platform's default encoding 197 * 198 * @throws IOException if an error occurs while reading the file. 199 */ 200 public ZipFile(final File f, final String encoding) throws IOException { 201 this(f, encoding, true); 202 } 203 204 /** 205 * Opens the given file for reading, assuming the specified 206 * encoding for file names. 207 * 208 * @param f the archive. 209 * @param encoding the encoding to use for file names, use null 210 * for the platform's default encoding 211 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 212 * Extra Fields (if present) to set the file names. 213 * 214 * @throws IOException if an error occurs while reading the file. 215 */ 216 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 217 throws IOException { 218 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 219 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true); 220 } 221 222 /** 223 * Opens the given channel for reading, assuming "UTF8" for file names. 224 * 225 * <p>{@link 226 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 227 * allows you to read from an in-memory archive.</p> 228 * 229 * @param channel the archive. 230 * 231 * @throws IOException if an error occurs while reading the file. 232 * @since 1.13 233 */ 234 public ZipFile(final SeekableByteChannel channel) 235 throws IOException { 236 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 237 } 238 239 /** 240 * Opens the given channel for reading, assuming the specified 241 * encoding for file names. 242 * 243 * <p>{@link 244 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 245 * allows you to read from an in-memory archive.</p> 246 * 247 * @param channel the archive. 248 * @param encoding the encoding to use for file names, use null 249 * for the platform's default encoding 250 * 251 * @throws IOException if an error occurs while reading the file. 252 * @since 1.13 253 */ 254 public ZipFile(final SeekableByteChannel channel, final String encoding) 255 throws IOException { 256 this(channel, "unknown archive", encoding, true); 257 } 258 259 /** 260 * Opens the given channel for reading, assuming the specified 261 * encoding for file names. 262 * 263 * <p>{@link 264 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 265 * allows you to read from an in-memory archive.</p> 266 * 267 * @param channel the archive. 268 * @param archiveName name of the archive, used for error messages only. 269 * @param encoding the encoding to use for file names, use null 270 * for the platform's default encoding 271 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 272 * Extra Fields (if present) to set the file names. 273 * 274 * @throws IOException if an error occurs while reading the file. 275 * @since 1.13 276 */ 277 public ZipFile(final SeekableByteChannel channel, final String archiveName, 278 final String encoding, final boolean useUnicodeExtraFields) 279 throws IOException { 280 this(channel, archiveName, encoding, useUnicodeExtraFields, false); 281 } 282 283 private ZipFile(final SeekableByteChannel channel, final String archiveName, 284 final String encoding, final boolean useUnicodeExtraFields, 285 final boolean closeOnError) 286 throws IOException { 287 this.archiveName = archiveName; 288 this.encoding = encoding; 289 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 290 this.useUnicodeExtraFields = useUnicodeExtraFields; 291 archive = channel; 292 boolean success = false; 293 try { 294 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 295 populateFromCentralDirectory(); 296 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 297 success = true; 298 } finally { 299 closed = !success; 300 if (!success && closeOnError) { 301 IOUtils.closeQuietly(archive); 302 } 303 } 304 } 305 306 /** 307 * The encoding to use for filenames and the file comment. 308 * 309 * @return null if using the platform's default character encoding. 310 */ 311 public String getEncoding() { 312 return encoding; 313 } 314 315 /** 316 * Closes the archive. 317 * @throws IOException if an error occurs closing the archive. 318 */ 319 @Override 320 public void close() throws IOException { 321 // this flag is only written here and read in finalize() which 322 // can never be run in parallel. 323 // no synchronization needed. 324 closed = true; 325 326 archive.close(); 327 } 328 329 /** 330 * close a zipfile quietly; throw no io fault, do nothing 331 * on a null parameter 332 * @param zipfile file to close, can be null 333 */ 334 public static void closeQuietly(final ZipFile zipfile) { 335 IOUtils.closeQuietly(zipfile); 336 } 337 338 /** 339 * Returns all entries. 340 * 341 * <p>Entries will be returned in the same order they appear 342 * within the archive's central directory.</p> 343 * 344 * @return all entries as {@link ZipArchiveEntry} instances 345 */ 346 public Enumeration<ZipArchiveEntry> getEntries() { 347 return Collections.enumeration(entries); 348 } 349 350 /** 351 * Returns all entries in physical order. 352 * 353 * <p>Entries will be returned in the same order their contents 354 * appear within the archive.</p> 355 * 356 * @return all entries as {@link ZipArchiveEntry} instances 357 * 358 * @since 1.1 359 */ 360 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 361 final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 362 Arrays.sort(allEntries, offsetComparator); 363 return Collections.enumeration(Arrays.asList(allEntries)); 364 } 365 366 /** 367 * Returns a named entry - or {@code null} if no entry by 368 * that name exists. 369 * 370 * <p>If multiple entries with the same name exist the first entry 371 * in the archive's central directory by that name is 372 * returned.</p> 373 * 374 * @param name name of the entry. 375 * @return the ZipArchiveEntry corresponding to the given name - or 376 * {@code null} if not present. 377 */ 378 public ZipArchiveEntry getEntry(final String name) { 379 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 380 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 381 } 382 383 /** 384 * Returns all named entries in the same order they appear within 385 * the archive's central directory. 386 * 387 * @param name name of the entry. 388 * @return the Iterable<ZipArchiveEntry> corresponding to the 389 * given name 390 * @since 1.6 391 */ 392 public Iterable<ZipArchiveEntry> getEntries(final String name) { 393 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 394 return entriesOfThatName != null ? entriesOfThatName 395 : Collections.<ZipArchiveEntry>emptyList(); 396 } 397 398 /** 399 * Returns all named entries in the same order their contents 400 * appear within the archive. 401 * 402 * @param name name of the entry. 403 * @return the Iterable<ZipArchiveEntry> corresponding to the 404 * given name 405 * @since 1.6 406 */ 407 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 408 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 409 if (nameMap.containsKey(name)) { 410 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 411 Arrays.sort(entriesOfThatName, offsetComparator); 412 } 413 return Arrays.asList(entriesOfThatName); 414 } 415 416 /** 417 * Whether this class is able to read the given entry. 418 * 419 * <p>May return false if it is set up to use encryption or a 420 * compression method that hasn't been implemented yet.</p> 421 * @since 1.1 422 * @param ze the entry 423 * @return whether this class is able to read the given entry. 424 */ 425 public boolean canReadEntryData(final ZipArchiveEntry ze) { 426 return ZipUtil.canHandleEntryData(ze); 427 } 428 429 /** 430 * Expose the raw stream of the archive entry (compressed form). 431 * 432 * <p>This method does not relate to how/if we understand the payload in the 433 * stream, since we really only intend to move it on to somewhere else.</p> 434 * 435 * @param ze The entry to get the stream for 436 * @return The raw input stream containing (possibly) compressed data. 437 * @since 1.11 438 */ 439 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 440 if (!(ze instanceof Entry)) { 441 return null; 442 } 443 final long start = ze.getDataOffset(); 444 return createBoundedInputStream(start, ze.getCompressedSize()); 445 } 446 447 448 /** 449 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 450 * Compression and all other attributes will be as in this file. 451 * <p>This method transfers entries based on the central directory of the zip file.</p> 452 * 453 * @param target The zipArchiveOutputStream to write the entries to 454 * @param predicate A predicate that selects which entries to write 455 * @throws IOException on error 456 */ 457 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 458 throws IOException { 459 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 460 while (src.hasMoreElements()) { 461 final ZipArchiveEntry entry = src.nextElement(); 462 if (predicate.test( entry)) { 463 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 464 } 465 } 466 } 467 468 /** 469 * Returns an InputStream for reading the contents of the given entry. 470 * 471 * @param ze the entry to get the stream for. 472 * @return a stream to read the entry from. The returned stream 473 * implements {@link InputStreamStatistics}. 474 * @throws IOException if unable to create an input stream from the zipentry 475 */ 476 public InputStream getInputStream(final ZipArchiveEntry ze) 477 throws IOException { 478 if (!(ze instanceof Entry)) { 479 return null; 480 } 481 // cast validity is checked just above 482 ZipUtil.checkRequestedFeatures(ze); 483 final long start = ze.getDataOffset(); 484 485 // doesn't get closed if the method is not supported - which 486 // should never happen because of the checkRequestedFeatures 487 // call above 488 final InputStream is = 489 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 490 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 491 case STORED: 492 return new StoredStatisticsStream(is); 493 case UNSHRINKING: 494 return new UnshrinkingInputStream(is); 495 case IMPLODING: 496 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 497 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 498 case DEFLATED: 499 final Inflater inflater = new Inflater(true); 500 // Inflater with nowrap=true has this odd contract for a zero padding 501 // byte following the data stream; this used to be zlib's requirement 502 // and has been fixed a long time ago, but the contract persists so 503 // we comply. 504 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 505 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 506 inflater) { 507 @Override 508 public void close() throws IOException { 509 try { 510 super.close(); 511 } finally { 512 inflater.end(); 513 } 514 } 515 }; 516 case BZIP2: 517 return new BZip2CompressorInputStream(is); 518 case ENHANCED_DEFLATED: 519 return new Deflate64CompressorInputStream(is); 520 case AES_ENCRYPTED: 521 case EXPANDING_LEVEL_1: 522 case EXPANDING_LEVEL_2: 523 case EXPANDING_LEVEL_3: 524 case EXPANDING_LEVEL_4: 525 case JPEG: 526 case LZMA: 527 case PKWARE_IMPLODING: 528 case PPMD: 529 case TOKENIZATION: 530 case UNKNOWN: 531 case WAVPACK: 532 case XZ: 533 default: 534 throw new ZipException("Found unsupported compression method " 535 + ze.getMethod()); 536 } 537 } 538 539 /** 540 * <p> 541 * Convenience method to return the entry's content as a String if isUnixSymlink() 542 * returns true for it, otherwise returns null. 543 * </p> 544 * 545 * <p>This method assumes the symbolic link's file name uses the 546 * same encoding that as been specified for this ZipFile.</p> 547 * 548 * @param entry ZipArchiveEntry object that represents the symbolic link 549 * @return entry's content as a String 550 * @throws IOException problem with content's input stream 551 * @since 1.5 552 */ 553 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 554 if (entry != null && entry.isUnixSymlink()) { 555 try (InputStream in = getInputStream(entry)) { 556 return zipEncoding.decode(IOUtils.toByteArray(in)); 557 } 558 } 559 return null; 560 } 561 562 /** 563 * Ensures that the close method of this zipfile is called when 564 * there are no more references to it. 565 * @see #close() 566 */ 567 @Override 568 protected void finalize() throws Throwable { 569 try { 570 if (!closed) { 571 System.err.println("Cleaning up unclosed ZipFile for archive " 572 + archiveName); 573 close(); 574 } 575 } finally { 576 super.finalize(); 577 } 578 } 579 580 /** 581 * Length of a "central directory" entry structure without file 582 * name, extra fields or comment. 583 */ 584 private static final int CFH_LEN = 585 /* version made by */ SHORT 586 /* version needed to extract */ + SHORT 587 /* general purpose bit flag */ + SHORT 588 /* compression method */ + SHORT 589 /* last mod file time */ + SHORT 590 /* last mod file date */ + SHORT 591 /* crc-32 */ + WORD 592 /* compressed size */ + WORD 593 /* uncompressed size */ + WORD 594 /* filename length */ + SHORT 595 /* extra field length */ + SHORT 596 /* file comment length */ + SHORT 597 /* disk number start */ + SHORT 598 /* internal file attributes */ + SHORT 599 /* external file attributes */ + WORD 600 /* relative offset of local header */ + WORD; 601 602 private static final long CFH_SIG = 603 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 604 605 /** 606 * Reads the central directory of the given archive and populates 607 * the internal tables with ZipArchiveEntry instances. 608 * 609 * <p>The ZipArchiveEntrys will know all data that can be obtained from 610 * the central directory alone, but not the data that requires the 611 * local file header or additional data to be read.</p> 612 * 613 * @return a map of zipentries that didn't have the language 614 * encoding flag set when read. 615 */ 616 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 617 throws IOException { 618 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 619 new HashMap<>(); 620 621 positionAtCentralDirectory(); 622 623 wordBbuf.rewind(); 624 IOUtils.readFully(archive, wordBbuf); 625 long sig = ZipLong.getValue(wordBuf); 626 627 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 628 throw new IOException("central directory is empty, can't expand" 629 + " corrupt archive."); 630 } 631 632 while (sig == CFH_SIG) { 633 readCentralDirectoryEntry(noUTF8Flag); 634 wordBbuf.rewind(); 635 IOUtils.readFully(archive, wordBbuf); 636 sig = ZipLong.getValue(wordBuf); 637 } 638 return noUTF8Flag; 639 } 640 641 /** 642 * Reads an individual entry of the central directory, creats an 643 * ZipArchiveEntry from it and adds it to the global maps. 644 * 645 * @param noUTF8Flag map used to collect entries that don't have 646 * their UTF-8 flag set and whose name will be set by data read 647 * from the local file header later. The current entry may be 648 * added to this map. 649 */ 650 private void 651 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 652 throws IOException { 653 cfhBbuf.rewind(); 654 IOUtils.readFully(archive, cfhBbuf); 655 int off = 0; 656 final Entry ze = new Entry(); 657 658 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 659 off += SHORT; 660 ze.setVersionMadeBy(versionMadeBy); 661 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 662 663 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 664 off += SHORT; // version required 665 666 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 667 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 668 final ZipEncoding entryEncoding = 669 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 670 if (hasUTF8Flag) { 671 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 672 } 673 ze.setGeneralPurposeBit(gpFlag); 674 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 675 676 off += SHORT; 677 678 //noinspection MagicConstant 679 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 680 off += SHORT; 681 682 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 683 ze.setTime(time); 684 off += WORD; 685 686 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 687 off += WORD; 688 689 ze.setCompressedSize(ZipLong.getValue(cfhBuf, off)); 690 off += WORD; 691 692 ze.setSize(ZipLong.getValue(cfhBuf, off)); 693 off += WORD; 694 695 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 696 off += SHORT; 697 698 final int extraLen = ZipShort.getValue(cfhBuf, off); 699 off += SHORT; 700 701 final int commentLen = ZipShort.getValue(cfhBuf, off); 702 off += SHORT; 703 704 final int diskStart = ZipShort.getValue(cfhBuf, off); 705 off += SHORT; 706 707 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 708 off += SHORT; 709 710 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 711 off += WORD; 712 713 final byte[] fileName = new byte[fileNameLen]; 714 IOUtils.readFully(archive, ByteBuffer.wrap(fileName)); 715 ze.setName(entryEncoding.decode(fileName), fileName); 716 717 // LFH offset, 718 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 719 // data offset will be filled later 720 entries.add(ze); 721 722 final byte[] cdExtraData = new byte[extraLen]; 723 IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); 724 ze.setCentralDirectoryExtra(cdExtraData); 725 726 setSizesAndOffsetFromZip64Extra(ze, diskStart); 727 728 final byte[] comment = new byte[commentLen]; 729 IOUtils.readFully(archive, ByteBuffer.wrap(comment)); 730 ze.setComment(entryEncoding.decode(comment)); 731 732 if (!hasUTF8Flag && useUnicodeExtraFields) { 733 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 734 } 735 } 736 737 /** 738 * If the entry holds a Zip64 extended information extra field, 739 * read sizes from there if the entry's sizes are set to 740 * 0xFFFFFFFFF, do the same for the offset of the local file 741 * header. 742 * 743 * <p>Ensures the Zip64 extra either knows both compressed and 744 * uncompressed size or neither of both as the internal logic in 745 * ExtraFieldUtils forces the field to create local header data 746 * even if they are never used - and here a field with only one 747 * size would be invalid.</p> 748 */ 749 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, 750 final int diskStart) 751 throws IOException { 752 final Zip64ExtendedInformationExtraField z64 = 753 (Zip64ExtendedInformationExtraField) 754 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 755 if (z64 != null) { 756 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 757 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 758 final boolean hasRelativeHeaderOffset = 759 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 760 z64.reparseCentralDirectoryData(hasUncompressedSize, 761 hasCompressedSize, 762 hasRelativeHeaderOffset, 763 diskStart == ZIP64_MAGIC_SHORT); 764 765 if (hasUncompressedSize) { 766 ze.setSize(z64.getSize().getLongValue()); 767 } else if (hasCompressedSize) { 768 z64.setSize(new ZipEightByteInteger(ze.getSize())); 769 } 770 771 if (hasCompressedSize) { 772 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 773 } else if (hasUncompressedSize) { 774 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 775 } 776 777 if (hasRelativeHeaderOffset) { 778 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 779 } 780 } 781 } 782 783 /** 784 * Length of the "End of central directory record" - which is 785 * supposed to be the last structure of the archive - without file 786 * comment. 787 */ 788 static final int MIN_EOCD_SIZE = 789 /* end of central dir signature */ WORD 790 /* number of this disk */ + SHORT 791 /* number of the disk with the */ 792 /* start of the central directory */ + SHORT 793 /* total number of entries in */ 794 /* the central dir on this disk */ + SHORT 795 /* total number of entries in */ 796 /* the central dir */ + SHORT 797 /* size of the central directory */ + WORD 798 /* offset of start of central */ 799 /* directory with respect to */ 800 /* the starting disk number */ + WORD 801 /* zipfile comment length */ + SHORT; 802 803 /** 804 * Maximum length of the "End of central directory record" with a 805 * file comment. 806 */ 807 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 808 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 809 810 /** 811 * Offset of the field that holds the location of the first 812 * central directory entry inside the "End of central directory 813 * record" relative to the start of the "End of central directory 814 * record". 815 */ 816 private static final int CFD_LOCATOR_OFFSET = 817 /* end of central dir signature */ WORD 818 /* number of this disk */ + SHORT 819 /* number of the disk with the */ 820 /* start of the central directory */ + SHORT 821 /* total number of entries in */ 822 /* the central dir on this disk */ + SHORT 823 /* total number of entries in */ 824 /* the central dir */ + SHORT 825 /* size of the central directory */ + WORD; 826 827 /** 828 * Length of the "Zip64 end of central directory locator" - which 829 * should be right in front of the "end of central directory 830 * record" if one is present at all. 831 */ 832 private static final int ZIP64_EOCDL_LENGTH = 833 /* zip64 end of central dir locator sig */ WORD 834 /* number of the disk with the start */ 835 /* start of the zip64 end of */ 836 /* central directory */ + WORD 837 /* relative offset of the zip64 */ 838 /* end of central directory record */ + DWORD 839 /* total number of disks */ + WORD; 840 841 /** 842 * Offset of the field that holds the location of the "Zip64 end 843 * of central directory record" inside the "Zip64 end of central 844 * directory locator" relative to the start of the "Zip64 end of 845 * central directory locator". 846 */ 847 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 848 /* zip64 end of central dir locator sig */ WORD 849 /* number of the disk with the start */ 850 /* start of the zip64 end of */ 851 /* central directory */ + WORD; 852 853 /** 854 * Offset of the field that holds the location of the first 855 * central directory entry inside the "Zip64 end of central 856 * directory record" relative to the start of the "Zip64 end of 857 * central directory record". 858 */ 859 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 860 /* zip64 end of central dir */ 861 /* signature */ WORD 862 /* size of zip64 end of central */ 863 /* directory record */ + DWORD 864 /* version made by */ + SHORT 865 /* version needed to extract */ + SHORT 866 /* number of this disk */ + WORD 867 /* number of the disk with the */ 868 /* start of the central directory */ + WORD 869 /* total number of entries in the */ 870 /* central directory on this disk */ + DWORD 871 /* total number of entries in the */ 872 /* central directory */ + DWORD 873 /* size of the central directory */ + DWORD; 874 875 /** 876 * Searches for either the "Zip64 end of central directory 877 * locator" or the "End of central dir record", parses 878 * it and positions the stream at the first central directory 879 * record. 880 */ 881 private void positionAtCentralDirectory() 882 throws IOException { 883 positionAtEndOfCentralDirectoryRecord(); 884 boolean found = false; 885 final boolean searchedForZip64EOCD = 886 archive.position() > ZIP64_EOCDL_LENGTH; 887 if (searchedForZip64EOCD) { 888 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 889 wordBbuf.rewind(); 890 IOUtils.readFully(archive, wordBbuf); 891 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 892 wordBuf); 893 } 894 if (!found) { 895 // not a ZIP64 archive 896 if (searchedForZip64EOCD) { 897 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 898 } 899 positionAtCentralDirectory32(); 900 } else { 901 positionAtCentralDirectory64(); 902 } 903 } 904 905 /** 906 * Parses the "Zip64 end of central directory locator", 907 * finds the "Zip64 end of central directory record" using the 908 * parsed information, parses that and positions the stream at the 909 * first central directory record. 910 * 911 * Expects stream to be positioned right behind the "Zip64 912 * end of central directory locator"'s signature. 913 */ 914 private void positionAtCentralDirectory64() 915 throws IOException { 916 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 917 - WORD /* signature has already been read */); 918 dwordBbuf.rewind(); 919 IOUtils.readFully(archive, dwordBbuf); 920 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 921 wordBbuf.rewind(); 922 IOUtils.readFully(archive, wordBbuf); 923 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 924 throw new ZipException("archive's ZIP64 end of central " 925 + "directory locator is corrupt."); 926 } 927 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 928 - WORD /* signature has already been read */); 929 dwordBbuf.rewind(); 930 IOUtils.readFully(archive, dwordBbuf); 931 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 932 } 933 934 /** 935 * Parses the "End of central dir record" and positions 936 * the stream at the first central directory record. 937 * 938 * Expects stream to be positioned at the beginning of the 939 * "End of central dir record". 940 */ 941 private void positionAtCentralDirectory32() 942 throws IOException { 943 skipBytes(CFD_LOCATOR_OFFSET); 944 wordBbuf.rewind(); 945 IOUtils.readFully(archive, wordBbuf); 946 archive.position(ZipLong.getValue(wordBuf)); 947 } 948 949 /** 950 * Searches for the and positions the stream at the start of the 951 * "End of central dir record". 952 */ 953 private void positionAtEndOfCentralDirectoryRecord() 954 throws IOException { 955 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 956 ZipArchiveOutputStream.EOCD_SIG); 957 if (!found) { 958 throw new ZipException("archive is not a ZIP archive"); 959 } 960 } 961 962 /** 963 * Searches the archive backwards from minDistance to maxDistance 964 * for the given signature, positions the RandomaccessFile right 965 * at the signature if it has been found. 966 */ 967 private boolean tryToLocateSignature(final long minDistanceFromEnd, 968 final long maxDistanceFromEnd, 969 final byte[] sig) throws IOException { 970 boolean found = false; 971 long off = archive.size() - minDistanceFromEnd; 972 final long stopSearching = 973 Math.max(0L, archive.size() - maxDistanceFromEnd); 974 if (off >= 0) { 975 for (; off >= stopSearching; off--) { 976 archive.position(off); 977 try { 978 wordBbuf.rewind(); 979 IOUtils.readFully(archive, wordBbuf); 980 wordBbuf.flip(); 981 } catch (EOFException ex) { 982 break; 983 } 984 int curr = wordBbuf.get(); 985 if (curr == sig[POS_0]) { 986 curr = wordBbuf.get(); 987 if (curr == sig[POS_1]) { 988 curr = wordBbuf.get(); 989 if (curr == sig[POS_2]) { 990 curr = wordBbuf.get(); 991 if (curr == sig[POS_3]) { 992 found = true; 993 break; 994 } 995 } 996 } 997 } 998 } 999 } 1000 if (found) { 1001 archive.position(off); 1002 } 1003 return found; 1004 } 1005 1006 /** 1007 * Skips the given number of bytes or throws an EOFException if 1008 * skipping failed. 1009 */ 1010 private void skipBytes(final int count) throws IOException { 1011 long currentPosition = archive.position(); 1012 long newPosition = currentPosition + count; 1013 if (newPosition > archive.size()) { 1014 throw new EOFException(); 1015 } 1016 archive.position(newPosition); 1017 } 1018 1019 /** 1020 * Number of bytes in local file header up to the "length of 1021 * filename" entry. 1022 */ 1023 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1024 /* local file header signature */ WORD 1025 /* version needed to extract */ + SHORT 1026 /* general purpose bit flag */ + SHORT 1027 /* compression method */ + SHORT 1028 /* last mod file time */ + SHORT 1029 /* last mod file date */ + SHORT 1030 /* crc-32 */ + WORD 1031 /* compressed size */ + WORD 1032 /* uncompressed size */ + (long) WORD; 1033 1034 /** 1035 * Walks through all recorded entries and adds the data available 1036 * from the local file header. 1037 * 1038 * <p>Also records the offsets for the data to read from the 1039 * entries.</p> 1040 */ 1041 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1042 entriesWithoutUTF8Flag) 1043 throws IOException { 1044 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1045 // entries is filled in populateFromCentralDirectory and 1046 // never modified 1047 final Entry ze = (Entry) zipArchiveEntry; 1048 final long offset = ze.getLocalHeaderOffset(); 1049 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1050 wordBbuf.rewind(); 1051 IOUtils.readFully(archive, wordBbuf); 1052 wordBbuf.flip(); 1053 wordBbuf.get(shortBuf); 1054 final int fileNameLen = ZipShort.getValue(shortBuf); 1055 wordBbuf.get(shortBuf); 1056 final int extraFieldLen = ZipShort.getValue(shortBuf); 1057 skipBytes(fileNameLen); 1058 final byte[] localExtraData = new byte[extraFieldLen]; 1059 IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); 1060 ze.setExtra(localExtraData); 1061 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1062 + SHORT + SHORT + fileNameLen + extraFieldLen); 1063 ze.setStreamContiguous(true); 1064 1065 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1066 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1067 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1068 nc.comment); 1069 } 1070 1071 final String name = ze.getName(); 1072 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 1073 if (entriesOfThatName == null) { 1074 entriesOfThatName = new LinkedList<>(); 1075 nameMap.put(name, entriesOfThatName); 1076 } 1077 entriesOfThatName.addLast(ze); 1078 } 1079 } 1080 1081 /** 1082 * Checks whether the archive starts with a LFH. If it doesn't, 1083 * it may be an empty archive. 1084 */ 1085 private boolean startsWithLocalFileHeader() throws IOException { 1086 archive.position(0); 1087 wordBbuf.rewind(); 1088 IOUtils.readFully(archive, wordBbuf); 1089 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1090 } 1091 1092 /** 1093 * Creates new BoundedInputStream, according to implementation of 1094 * underlying archive channel. 1095 */ 1096 private BoundedInputStream createBoundedInputStream(long start, long remaining) { 1097 return archive instanceof FileChannel ? 1098 new BoundedFileChannelInputStream(start, remaining) : 1099 new BoundedInputStream(start, remaining); 1100 } 1101 1102 /** 1103 * InputStream that delegates requests to the underlying 1104 * SeekableByteChannel, making sure that only bytes from a certain 1105 * range can be read. 1106 */ 1107 private class BoundedInputStream extends InputStream { 1108 private ByteBuffer singleByteBuffer; 1109 private final long end; 1110 private long loc; 1111 1112 BoundedInputStream(final long start, final long remaining) { 1113 this.end = start+remaining; 1114 if (this.end < start) { 1115 // check for potential vulnerability due to overflow 1116 throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining); 1117 } 1118 loc = start; 1119 } 1120 1121 @Override 1122 public synchronized int read() throws IOException { 1123 if (loc >= end) { 1124 return -1; 1125 } 1126 if (singleByteBuffer == null) { 1127 singleByteBuffer = ByteBuffer.allocate(1); 1128 } 1129 else { 1130 singleByteBuffer.rewind(); 1131 } 1132 int read = read(loc, singleByteBuffer); 1133 if (read < 0) { 1134 return read; 1135 } 1136 loc++; 1137 return singleByteBuffer.get() & 0xff; 1138 } 1139 1140 @Override 1141 public synchronized int read(final byte[] b, final int off, int len) throws IOException { 1142 if (len <= 0) { 1143 return 0; 1144 } 1145 1146 if (len > end-loc) { 1147 if (loc >= end) { 1148 return -1; 1149 } 1150 len = (int)(end-loc); 1151 } 1152 1153 ByteBuffer buf; 1154 buf = ByteBuffer.wrap(b, off, len); 1155 int ret = read(loc, buf); 1156 if (ret > 0) { 1157 loc += ret; 1158 return ret; 1159 } 1160 return ret; 1161 } 1162 1163 protected int read(long pos, ByteBuffer buf) throws IOException { 1164 int read; 1165 synchronized (archive) { 1166 archive.position(pos); 1167 read = archive.read(buf); 1168 } 1169 buf.flip(); 1170 return read; 1171 } 1172 } 1173 1174 /** 1175 * Lock-free implementation of BoundedInputStream. The 1176 * implementation uses positioned reads on the underlying archive 1177 * file channel and therefore performs significantly faster in 1178 * concurrent environment. 1179 */ 1180 private class BoundedFileChannelInputStream extends BoundedInputStream { 1181 private final FileChannel archive; 1182 1183 BoundedFileChannelInputStream(final long start, final long remaining) { 1184 super(start, remaining); 1185 archive = (FileChannel)ZipFile.this.archive; 1186 } 1187 1188 @Override 1189 protected int read(long pos, ByteBuffer buf) throws IOException { 1190 int read = archive.read(buf, pos); 1191 buf.flip(); 1192 return read; 1193 } 1194 } 1195 1196 private static final class NameAndComment { 1197 private final byte[] name; 1198 private final byte[] comment; 1199 private NameAndComment(final byte[] name, final byte[] comment) { 1200 this.name = name; 1201 this.comment = comment; 1202 } 1203 } 1204 1205 /** 1206 * Compares two ZipArchiveEntries based on their offset within the archive. 1207 * 1208 * <p>Won't return any meaningful results if one of the entries 1209 * isn't part of the archive at all.</p> 1210 * 1211 * @since 1.1 1212 */ 1213 private final Comparator<ZipArchiveEntry> offsetComparator = 1214 new Comparator<ZipArchiveEntry>() { 1215 @Override 1216 public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) { 1217 if (e1 == e2) { 1218 return 0; 1219 } 1220 1221 final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1222 final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1223 if (ent1 == null) { 1224 return 1; 1225 } 1226 if (ent2 == null) { 1227 return -1; 1228 } 1229 final long val = (ent1.getLocalHeaderOffset() 1230 - ent2.getLocalHeaderOffset()); 1231 return val == 0 ? 0 : val < 0 ? -1 : +1; 1232 } 1233 }; 1234 1235 /** 1236 * Extends ZipArchiveEntry to store the offset within the archive. 1237 */ 1238 private static class Entry extends ZipArchiveEntry { 1239 1240 Entry() { 1241 } 1242 1243 @Override 1244 public int hashCode() { 1245 return 3 * super.hashCode() 1246 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1247 } 1248 1249 @Override 1250 public boolean equals(final Object other) { 1251 if (super.equals(other)) { 1252 // super.equals would return false if other were not an Entry 1253 final Entry otherEntry = (Entry) other; 1254 return getLocalHeaderOffset() 1255 == otherEntry.getLocalHeaderOffset() 1256 && getDataOffset() 1257 == otherEntry.getDataOffset(); 1258 } 1259 return false; 1260 } 1261 } 1262 1263 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1264 StoredStatisticsStream(InputStream in) { 1265 super(in); 1266 } 1267 1268 @Override 1269 public long getCompressedCount() { 1270 return super.getBytesRead(); 1271 } 1272 1273 @Override 1274 public long getUncompressedCount() { 1275 return getCompressedCount(); 1276 } 1277 } 1278}