001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.File; 025import java.io.FilterInputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.nio.ByteBuffer; 029import java.nio.ByteOrder; 030import java.nio.CharBuffer; 031import java.nio.channels.SeekableByteChannel; 032import java.nio.charset.StandardCharsets; 033import java.nio.charset.CharsetEncoder; 034import java.nio.file.Files; 035import java.nio.file.StandardOpenOption; 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.BitSet; 039import java.util.EnumSet; 040import java.util.LinkedList; 041import java.util.zip.CRC32; 042 043import org.apache.commons.compress.utils.BoundedInputStream; 044import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 045import org.apache.commons.compress.utils.CharsetNames; 046import org.apache.commons.compress.utils.IOUtils; 047import org.apache.commons.compress.utils.InputStreamStatistics; 048 049/** 050 * Reads a 7z file, using SeekableByteChannel under 051 * the covers. 052 * <p> 053 * The 7z file format is a flexible container 054 * that can contain many compression and 055 * encryption types, but at the moment only 056 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 057 * are supported. 058 * <p> 059 * The format is very Windows/Intel specific, 060 * so it uses little-endian byte order, 061 * doesn't store user/group or permission bits, 062 * and represents times using NTFS timestamps 063 * (100 nanosecond units since 1 January 1601). 064 * Hence the official tools recommend against 065 * using it for backup purposes on *nix, and 066 * recommend .tar.7z or .tar.lzma or .tar.xz 067 * instead. 068 * <p> 069 * Both the header and file contents may be 070 * compressed and/or encrypted. With both 071 * encrypted, neither file names nor file 072 * contents can be read, but the use of 073 * encryption isn't plausibly deniable. 074 * 075 * @NotThreadSafe 076 * @since 1.6 077 */ 078public class SevenZFile implements Closeable { 079 static final int SIGNATURE_HEADER_SIZE = 32; 080 081 private final String fileName; 082 private SeekableByteChannel channel; 083 private final Archive archive; 084 private int currentEntryIndex = -1; 085 private int currentFolderIndex = -1; 086 private InputStream currentFolderInputStream = null; 087 private byte[] password; 088 089 private long compressedBytesReadFromCurrentEntry; 090 private long uncompressedBytesReadFromCurrentEntry; 091 092 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 093 094 // shared with SevenZOutputFile and tests, neither mutates it 095 static final byte[] sevenZSignature = { //NOSONAR 096 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 097 }; 098 099 /** 100 * Reads a file as 7z archive 101 * 102 * @param filename the file to read 103 * @param password optional password if the archive is encrypted 104 * @throws IOException if reading the archive fails 105 * @since 1.17 106 */ 107 public SevenZFile(final File filename, final char[] password) throws IOException { 108 this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), 109 filename.getAbsolutePath(), utf16Decode(password), true); 110 } 111 112 /** 113 * Reads a file as 7z archive 114 * 115 * @param filename the file to read 116 * @param password optional password if the archive is encrypted - 117 * the byte array is supposed to be the UTF16-LE encoded 118 * representation of the password. 119 * @throws IOException if reading the archive fails 120 * @deprecated use the char[]-arg version for the password instead 121 */ 122 public SevenZFile(final File filename, final byte[] password) throws IOException { 123 this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)), 124 filename.getAbsolutePath(), password, true); 125 } 126 127 /** 128 * Reads a SeekableByteChannel as 7z archive 129 * 130 * <p>{@link 131 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 132 * allows you to read from an in-memory archive.</p> 133 * 134 * @param channel the channel to read 135 * @throws IOException if reading the archive fails 136 * @since 1.13 137 */ 138 public SevenZFile(final SeekableByteChannel channel) throws IOException { 139 this(channel, "unknown archive", (char[]) null); 140 } 141 142 /** 143 * Reads a SeekableByteChannel as 7z archive 144 * 145 * <p>{@link 146 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 147 * allows you to read from an in-memory archive.</p> 148 * 149 * @param channel the channel to read 150 * @param password optional password if the archive is encrypted 151 * @throws IOException if reading the archive fails 152 * @since 1.17 153 */ 154 public SevenZFile(final SeekableByteChannel channel, 155 final char[] password) throws IOException { 156 this(channel, "unknown archive", utf16Decode(password)); 157 } 158 159 /** 160 * Reads a SeekableByteChannel as 7z archive 161 * 162 * <p>{@link 163 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 164 * allows you to read from an in-memory archive.</p> 165 * 166 * @param channel the channel to read 167 * @param filename name of the archive - only used for error reporting 168 * @param password optional password if the archive is encrypted 169 * @throws IOException if reading the archive fails 170 * @since 1.17 171 */ 172 public SevenZFile(final SeekableByteChannel channel, String filename, 173 final char[] password) throws IOException { 174 this(channel, filename, utf16Decode(password), false); 175 } 176 177 /** 178 * Reads a SeekableByteChannel as 7z archive 179 * 180 * <p>{@link 181 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 182 * allows you to read from an in-memory archive.</p> 183 * 184 * @param channel the channel to read 185 * @param filename name of the archive - only used for error reporting 186 * @throws IOException if reading the archive fails 187 * @since 1.17 188 */ 189 public SevenZFile(final SeekableByteChannel channel, String filename) 190 throws IOException { 191 this(channel, filename, null, false); 192 } 193 194 /** 195 * Reads a SeekableByteChannel as 7z archive 196 * 197 * <p>{@link 198 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 199 * allows you to read from an in-memory archive.</p> 200 * 201 * @param channel the channel to read 202 * @param password optional password if the archive is encrypted - 203 * the byte array is supposed to be the UTF16-LE encoded 204 * representation of the password. 205 * @throws IOException if reading the archive fails 206 * @since 1.13 207 * @deprecated use the char[]-arg version for the password instead 208 */ 209 public SevenZFile(final SeekableByteChannel channel, 210 final byte[] password) throws IOException { 211 this(channel, "unknown archive", password); 212 } 213 214 /** 215 * Reads a SeekableByteChannel as 7z archive 216 * 217 * <p>{@link 218 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 219 * allows you to read from an in-memory archive.</p> 220 * 221 * @param channel the channel to read 222 * @param filename name of the archive - only used for error reporting 223 * @param password optional password if the archive is encrypted - 224 * the byte array is supposed to be the UTF16-LE encoded 225 * representation of the password. 226 * @throws IOException if reading the archive fails 227 * @since 1.13 228 * @deprecated use the char[]-arg version for the password instead 229 */ 230 public SevenZFile(final SeekableByteChannel channel, String filename, 231 final byte[] password) throws IOException { 232 this(channel, filename, password, false); 233 } 234 235 private SevenZFile(final SeekableByteChannel channel, String filename, 236 final byte[] password, boolean closeOnError) throws IOException { 237 boolean succeeded = false; 238 this.channel = channel; 239 this.fileName = filename; 240 try { 241 archive = readHeaders(password); 242 if (password != null) { 243 this.password = Arrays.copyOf(password, password.length); 244 } else { 245 this.password = null; 246 } 247 succeeded = true; 248 } finally { 249 if (!succeeded && closeOnError) { 250 this.channel.close(); 251 } 252 } 253 } 254 255 /** 256 * Reads a file as unencrypted 7z archive 257 * 258 * @param filename the file to read 259 * @throws IOException if reading the archive fails 260 */ 261 public SevenZFile(final File filename) throws IOException { 262 this(filename, (char[]) null); 263 } 264 265 /** 266 * Closes the archive. 267 * @throws IOException if closing the file fails 268 */ 269 @Override 270 public void close() throws IOException { 271 if (channel != null) { 272 try { 273 channel.close(); 274 } finally { 275 channel = null; 276 if (password != null) { 277 Arrays.fill(password, (byte) 0); 278 } 279 password = null; 280 } 281 } 282 } 283 284 /** 285 * Returns the next Archive Entry in this archive. 286 * 287 * @return the next entry, 288 * or {@code null} if there are no more entries 289 * @throws IOException if the next entry could not be read 290 */ 291 public SevenZArchiveEntry getNextEntry() throws IOException { 292 if (currentEntryIndex >= archive.files.length - 1) { 293 return null; 294 } 295 ++currentEntryIndex; 296 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 297 buildDecodingStream(); 298 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 299 return entry; 300 } 301 302 /** 303 * Returns meta-data of all archive entries. 304 * 305 * <p>This method only provides meta-data, the entries can not be 306 * used to read the contents, you still need to process all 307 * entries in order using {@link #getNextEntry} for that.</p> 308 * 309 * <p>The content methods are only available for entries that have 310 * already been reached via {@link #getNextEntry}.</p> 311 * 312 * @return meta-data of all archive entries. 313 * @since 1.11 314 */ 315 public Iterable<SevenZArchiveEntry> getEntries() { 316 return Arrays.asList(archive.files); 317 } 318 319 private Archive readHeaders(final byte[] password) throws IOException { 320 ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 321 .order(ByteOrder.LITTLE_ENDIAN); 322 readFully(buf); 323 final byte[] signature = new byte[6]; 324 buf.get(signature); 325 if (!Arrays.equals(signature, sevenZSignature)) { 326 throw new IOException("Bad 7z signature"); 327 } 328 // 7zFormat.txt has it wrong - it's first major then minor 329 final byte archiveVersionMajor = buf.get(); 330 final byte archiveVersionMinor = buf.get(); 331 if (archiveVersionMajor != 0) { 332 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 333 archiveVersionMajor, archiveVersionMinor)); 334 } 335 336 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 337 final StartHeader startHeader = readStartHeader(startHeaderCrc); 338 339 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 340 if (nextHeaderSizeInt != startHeader.nextHeaderSize) { 341 throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize); 342 } 343 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 344 buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 345 readFully(buf); 346 final CRC32 crc = new CRC32(); 347 crc.update(buf.array()); 348 if (startHeader.nextHeaderCrc != crc.getValue()) { 349 throw new IOException("NextHeader CRC mismatch"); 350 } 351 352 Archive archive = new Archive(); 353 int nid = getUnsignedByte(buf); 354 if (nid == NID.kEncodedHeader) { 355 buf = readEncodedHeader(buf, archive, password); 356 // Archive gets rebuilt with the new header 357 archive = new Archive(); 358 nid = getUnsignedByte(buf); 359 } 360 if (nid == NID.kHeader) { 361 readHeader(buf, archive); 362 } else { 363 throw new IOException("Broken or unsupported archive: no Header"); 364 } 365 return archive; 366 } 367 368 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 369 final StartHeader startHeader = new StartHeader(); 370 // using Stream rather than ByteBuffer for the benefit of the 371 // built-in CRC check 372 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 373 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 374 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 375 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 376 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 377 return startHeader; 378 } 379 } 380 381 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 382 int nid = getUnsignedByte(header); 383 384 if (nid == NID.kArchiveProperties) { 385 readArchiveProperties(header); 386 nid = getUnsignedByte(header); 387 } 388 389 if (nid == NID.kAdditionalStreamsInfo) { 390 throw new IOException("Additional streams unsupported"); 391 //nid = header.readUnsignedByte(); 392 } 393 394 if (nid == NID.kMainStreamsInfo) { 395 readStreamsInfo(header, archive); 396 nid = getUnsignedByte(header); 397 } 398 399 if (nid == NID.kFilesInfo) { 400 readFilesInfo(header, archive); 401 nid = getUnsignedByte(header); 402 } 403 404 if (nid != NID.kEnd) { 405 throw new IOException("Badly terminated header, found " + nid); 406 } 407 } 408 409 private void readArchiveProperties(final ByteBuffer input) throws IOException { 410 // FIXME: the reference implementation just throws them away? 411 int nid = getUnsignedByte(input); 412 while (nid != NID.kEnd) { 413 final long propertySize = readUint64(input); 414 final byte[] property = new byte[(int)propertySize]; 415 input.get(property); 416 nid = getUnsignedByte(input); 417 } 418 } 419 420 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 421 final byte[] password) throws IOException { 422 readStreamsInfo(header, archive); 423 424 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 425 final Folder folder = archive.folders[0]; 426 final int firstPackStreamIndex = 0; 427 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 428 0; 429 430 channel.position(folderOffset); 431 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 432 archive.packSizes[firstPackStreamIndex]); 433 for (final Coder coder : folder.getOrderedCoders()) { 434 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 435 throw new IOException("Multi input/output stream coders are not yet supported"); 436 } 437 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 438 folder.getUnpackSizeForCoder(coder), coder, password); 439 } 440 if (folder.hasCrc) { 441 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 442 folder.getUnpackSize(), folder.crc); 443 } 444 final byte[] nextHeader = new byte[(int)folder.getUnpackSize()]; 445 try (DataInputStream nextHeaderInputStream = new DataInputStream(inputStreamStack)) { 446 nextHeaderInputStream.readFully(nextHeader); 447 } 448 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 449 } 450 451 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 452 int nid = getUnsignedByte(header); 453 454 if (nid == NID.kPackInfo) { 455 readPackInfo(header, archive); 456 nid = getUnsignedByte(header); 457 } 458 459 if (nid == NID.kUnpackInfo) { 460 readUnpackInfo(header, archive); 461 nid = getUnsignedByte(header); 462 } else { 463 // archive without unpack/coders info 464 archive.folders = new Folder[0]; 465 } 466 467 if (nid == NID.kSubStreamsInfo) { 468 readSubStreamsInfo(header, archive); 469 nid = getUnsignedByte(header); 470 } 471 472 if (nid != NID.kEnd) { 473 throw new IOException("Badly terminated StreamsInfo"); 474 } 475 } 476 477 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 478 archive.packPos = readUint64(header); 479 final long numPackStreams = readUint64(header); 480 int nid = getUnsignedByte(header); 481 if (nid == NID.kSize) { 482 archive.packSizes = new long[(int)numPackStreams]; 483 for (int i = 0; i < archive.packSizes.length; i++) { 484 archive.packSizes[i] = readUint64(header); 485 } 486 nid = getUnsignedByte(header); 487 } 488 489 if (nid == NID.kCRC) { 490 archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams); 491 archive.packCrcs = new long[(int)numPackStreams]; 492 for (int i = 0; i < (int)numPackStreams; i++) { 493 if (archive.packCrcsDefined.get(i)) { 494 archive.packCrcs[i] = 0xffffFFFFL & header.getInt(); 495 } 496 } 497 498 nid = getUnsignedByte(header); 499 } 500 501 if (nid != NID.kEnd) { 502 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 503 } 504 } 505 506 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 507 int nid = getUnsignedByte(header); 508 if (nid != NID.kFolder) { 509 throw new IOException("Expected kFolder, got " + nid); 510 } 511 final long numFolders = readUint64(header); 512 final Folder[] folders = new Folder[(int)numFolders]; 513 archive.folders = folders; 514 final int external = getUnsignedByte(header); 515 if (external != 0) { 516 throw new IOException("External unsupported"); 517 } 518 for (int i = 0; i < (int)numFolders; i++) { 519 folders[i] = readFolder(header); 520 } 521 522 nid = getUnsignedByte(header); 523 if (nid != NID.kCodersUnpackSize) { 524 throw new IOException("Expected kCodersUnpackSize, got " + nid); 525 } 526 for (final Folder folder : folders) { 527 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 528 for (int i = 0; i < folder.totalOutputStreams; i++) { 529 folder.unpackSizes[i] = readUint64(header); 530 } 531 } 532 533 nid = getUnsignedByte(header); 534 if (nid == NID.kCRC) { 535 final BitSet crcsDefined = readAllOrBits(header, (int)numFolders); 536 for (int i = 0; i < (int)numFolders; i++) { 537 if (crcsDefined.get(i)) { 538 folders[i].hasCrc = true; 539 folders[i].crc = 0xffffFFFFL & header.getInt(); 540 } else { 541 folders[i].hasCrc = false; 542 } 543 } 544 545 nid = getUnsignedByte(header); 546 } 547 548 if (nid != NID.kEnd) { 549 throw new IOException("Badly terminated UnpackInfo"); 550 } 551 } 552 553 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 554 for (final Folder folder : archive.folders) { 555 folder.numUnpackSubStreams = 1; 556 } 557 int totalUnpackStreams = archive.folders.length; 558 559 int nid = getUnsignedByte(header); 560 if (nid == NID.kNumUnpackStream) { 561 totalUnpackStreams = 0; 562 for (final Folder folder : archive.folders) { 563 final long numStreams = readUint64(header); 564 folder.numUnpackSubStreams = (int)numStreams; 565 totalUnpackStreams += numStreams; 566 } 567 nid = getUnsignedByte(header); 568 } 569 570 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 571 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 572 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 573 subStreamsInfo.crcs = new long[totalUnpackStreams]; 574 575 int nextUnpackStream = 0; 576 for (final Folder folder : archive.folders) { 577 if (folder.numUnpackSubStreams == 0) { 578 continue; 579 } 580 long sum = 0; 581 if (nid == NID.kSize) { 582 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 583 final long size = readUint64(header); 584 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 585 sum += size; 586 } 587 } 588 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 589 } 590 if (nid == NID.kSize) { 591 nid = getUnsignedByte(header); 592 } 593 594 int numDigests = 0; 595 for (final Folder folder : archive.folders) { 596 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 597 numDigests += folder.numUnpackSubStreams; 598 } 599 } 600 601 if (nid == NID.kCRC) { 602 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 603 final long[] missingCrcs = new long[numDigests]; 604 for (int i = 0; i < numDigests; i++) { 605 if (hasMissingCrc.get(i)) { 606 missingCrcs[i] = 0xffffFFFFL & header.getInt(); 607 } 608 } 609 int nextCrc = 0; 610 int nextMissingCrc = 0; 611 for (final Folder folder: archive.folders) { 612 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 613 subStreamsInfo.hasCrc.set(nextCrc, true); 614 subStreamsInfo.crcs[nextCrc] = folder.crc; 615 ++nextCrc; 616 } else { 617 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 618 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 619 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 620 ++nextCrc; 621 ++nextMissingCrc; 622 } 623 } 624 } 625 626 nid = getUnsignedByte(header); 627 } 628 629 if (nid != NID.kEnd) { 630 throw new IOException("Badly terminated SubStreamsInfo"); 631 } 632 633 archive.subStreamsInfo = subStreamsInfo; 634 } 635 636 private Folder readFolder(final ByteBuffer header) throws IOException { 637 final Folder folder = new Folder(); 638 639 final long numCoders = readUint64(header); 640 final Coder[] coders = new Coder[(int)numCoders]; 641 long totalInStreams = 0; 642 long totalOutStreams = 0; 643 for (int i = 0; i < coders.length; i++) { 644 coders[i] = new Coder(); 645 final int bits = getUnsignedByte(header); 646 final int idSize = bits & 0xf; 647 final boolean isSimple = (bits & 0x10) == 0; 648 final boolean hasAttributes = (bits & 0x20) != 0; 649 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 650 651 coders[i].decompressionMethodId = new byte[idSize]; 652 header.get(coders[i].decompressionMethodId); 653 if (isSimple) { 654 coders[i].numInStreams = 1; 655 coders[i].numOutStreams = 1; 656 } else { 657 coders[i].numInStreams = readUint64(header); 658 coders[i].numOutStreams = readUint64(header); 659 } 660 totalInStreams += coders[i].numInStreams; 661 totalOutStreams += coders[i].numOutStreams; 662 if (hasAttributes) { 663 final long propertiesSize = readUint64(header); 664 coders[i].properties = new byte[(int)propertiesSize]; 665 header.get(coders[i].properties); 666 } 667 // would need to keep looping as above: 668 while (moreAlternativeMethods) { 669 throw new IOException("Alternative methods are unsupported, please report. " + 670 "The reference implementation doesn't support them either."); 671 } 672 } 673 folder.coders = coders; 674 folder.totalInputStreams = totalInStreams; 675 folder.totalOutputStreams = totalOutStreams; 676 677 if (totalOutStreams == 0) { 678 throw new IOException("Total output streams can't be 0"); 679 } 680 final long numBindPairs = totalOutStreams - 1; 681 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 682 for (int i = 0; i < bindPairs.length; i++) { 683 bindPairs[i] = new BindPair(); 684 bindPairs[i].inIndex = readUint64(header); 685 bindPairs[i].outIndex = readUint64(header); 686 } 687 folder.bindPairs = bindPairs; 688 689 if (totalInStreams < numBindPairs) { 690 throw new IOException("Total input streams can't be less than the number of bind pairs"); 691 } 692 final long numPackedStreams = totalInStreams - numBindPairs; 693 final long packedStreams[] = new long[(int)numPackedStreams]; 694 if (numPackedStreams == 1) { 695 int i; 696 for (i = 0; i < (int)totalInStreams; i++) { 697 if (folder.findBindPairForInStream(i) < 0) { 698 break; 699 } 700 } 701 if (i == (int)totalInStreams) { 702 throw new IOException("Couldn't find stream's bind pair index"); 703 } 704 packedStreams[0] = i; 705 } else { 706 for (int i = 0; i < (int)numPackedStreams; i++) { 707 packedStreams[i] = readUint64(header); 708 } 709 } 710 folder.packedStreams = packedStreams; 711 712 return folder; 713 } 714 715 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 716 final int areAllDefined = getUnsignedByte(header); 717 final BitSet bits; 718 if (areAllDefined != 0) { 719 bits = new BitSet(size); 720 for (int i = 0; i < size; i++) { 721 bits.set(i, true); 722 } 723 } else { 724 bits = readBits(header, size); 725 } 726 return bits; 727 } 728 729 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 730 final BitSet bits = new BitSet(size); 731 int mask = 0; 732 int cache = 0; 733 for (int i = 0; i < size; i++) { 734 if (mask == 0) { 735 mask = 0x80; 736 cache = getUnsignedByte(header); 737 } 738 bits.set(i, (cache & mask) != 0); 739 mask >>>= 1; 740 } 741 return bits; 742 } 743 744 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 745 final long numFiles = readUint64(header); 746 final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles]; 747 for (int i = 0; i < files.length; i++) { 748 files[i] = new SevenZArchiveEntry(); 749 } 750 BitSet isEmptyStream = null; 751 BitSet isEmptyFile = null; 752 BitSet isAnti = null; 753 while (true) { 754 final int propertyType = getUnsignedByte(header); 755 if (propertyType == 0) { 756 break; 757 } 758 final long size = readUint64(header); 759 switch (propertyType) { 760 case NID.kEmptyStream: { 761 isEmptyStream = readBits(header, files.length); 762 break; 763 } 764 case NID.kEmptyFile: { 765 if (isEmptyStream == null) { // protect against NPE 766 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 767 } 768 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 769 break; 770 } 771 case NID.kAnti: { 772 if (isEmptyStream == null) { // protect against NPE 773 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 774 } 775 isAnti = readBits(header, isEmptyStream.cardinality()); 776 break; 777 } 778 case NID.kName: { 779 final int external = getUnsignedByte(header); 780 if (external != 0) { 781 throw new IOException("Not implemented"); 782 } 783 if (((size - 1) & 1) != 0) { 784 throw new IOException("File names length invalid"); 785 } 786 final byte[] names = new byte[(int)(size - 1)]; 787 header.get(names); 788 int nextFile = 0; 789 int nextName = 0; 790 for (int i = 0; i < names.length; i += 2) { 791 if (names[i] == 0 && names[i+1] == 0) { 792 files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE)); 793 nextName = i + 2; 794 } 795 } 796 if (nextName != names.length || nextFile != files.length) { 797 throw new IOException("Error parsing file names"); 798 } 799 break; 800 } 801 case NID.kCTime: { 802 final BitSet timesDefined = readAllOrBits(header, files.length); 803 final int external = getUnsignedByte(header); 804 if (external != 0) { 805 throw new IOException("Unimplemented"); 806 } 807 for (int i = 0; i < files.length; i++) { 808 files[i].setHasCreationDate(timesDefined.get(i)); 809 if (files[i].getHasCreationDate()) { 810 files[i].setCreationDate(header.getLong()); 811 } 812 } 813 break; 814 } 815 case NID.kATime: { 816 final BitSet timesDefined = readAllOrBits(header, files.length); 817 final int external = getUnsignedByte(header); 818 if (external != 0) { 819 throw new IOException("Unimplemented"); 820 } 821 for (int i = 0; i < files.length; i++) { 822 files[i].setHasAccessDate(timesDefined.get(i)); 823 if (files[i].getHasAccessDate()) { 824 files[i].setAccessDate(header.getLong()); 825 } 826 } 827 break; 828 } 829 case NID.kMTime: { 830 final BitSet timesDefined = readAllOrBits(header, files.length); 831 final int external = getUnsignedByte(header); 832 if (external != 0) { 833 throw new IOException("Unimplemented"); 834 } 835 for (int i = 0; i < files.length; i++) { 836 files[i].setHasLastModifiedDate(timesDefined.get(i)); 837 if (files[i].getHasLastModifiedDate()) { 838 files[i].setLastModifiedDate(header.getLong()); 839 } 840 } 841 break; 842 } 843 case NID.kWinAttributes: { 844 final BitSet attributesDefined = readAllOrBits(header, files.length); 845 final int external = getUnsignedByte(header); 846 if (external != 0) { 847 throw new IOException("Unimplemented"); 848 } 849 for (int i = 0; i < files.length; i++) { 850 files[i].setHasWindowsAttributes(attributesDefined.get(i)); 851 if (files[i].getHasWindowsAttributes()) { 852 files[i].setWindowsAttributes(header.getInt()); 853 } 854 } 855 break; 856 } 857 case NID.kStartPos: { 858 throw new IOException("kStartPos is unsupported, please report"); 859 } 860 case NID.kDummy: { 861 // 7z 9.20 asserts the content is all zeros and ignores the property 862 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 863 864 if (skipBytesFully(header, size) < size) { 865 throw new IOException("Incomplete kDummy property"); 866 } 867 break; 868 } 869 870 default: { 871 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 872 if (skipBytesFully(header, size) < size) { 873 throw new IOException("Incomplete property of type " + propertyType); 874 } 875 break; 876 } 877 } 878 } 879 int nonEmptyFileCounter = 0; 880 int emptyFileCounter = 0; 881 for (int i = 0; i < files.length; i++) { 882 files[i].setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 883 if (files[i].hasStream()) { 884 files[i].setDirectory(false); 885 files[i].setAntiItem(false); 886 files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 887 files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 888 files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 889 ++nonEmptyFileCounter; 890 } else { 891 files[i].setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 892 files[i].setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 893 files[i].setHasCrc(false); 894 files[i].setSize(0); 895 ++emptyFileCounter; 896 } 897 } 898 archive.files = files; 899 calculateStreamMap(archive); 900 } 901 902 private void calculateStreamMap(final Archive archive) throws IOException { 903 final StreamMap streamMap = new StreamMap(); 904 905 int nextFolderPackStreamIndex = 0; 906 final int numFolders = archive.folders != null ? archive.folders.length : 0; 907 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 908 for (int i = 0; i < numFolders; i++) { 909 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 910 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 911 } 912 913 long nextPackStreamOffset = 0; 914 final int numPackSizes = archive.packSizes != null ? archive.packSizes.length : 0; 915 streamMap.packStreamOffsets = new long[numPackSizes]; 916 for (int i = 0; i < numPackSizes; i++) { 917 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 918 nextPackStreamOffset += archive.packSizes[i]; 919 } 920 921 streamMap.folderFirstFileIndex = new int[numFolders]; 922 streamMap.fileFolderIndex = new int[archive.files.length]; 923 int nextFolderIndex = 0; 924 int nextFolderUnpackStreamIndex = 0; 925 for (int i = 0; i < archive.files.length; i++) { 926 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 927 streamMap.fileFolderIndex[i] = -1; 928 continue; 929 } 930 if (nextFolderUnpackStreamIndex == 0) { 931 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 932 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 933 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 934 break; 935 } 936 } 937 if (nextFolderIndex >= archive.folders.length) { 938 throw new IOException("Too few folders in archive"); 939 } 940 } 941 streamMap.fileFolderIndex[i] = nextFolderIndex; 942 if (!archive.files[i].hasStream()) { 943 continue; 944 } 945 ++nextFolderUnpackStreamIndex; 946 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 947 ++nextFolderIndex; 948 nextFolderUnpackStreamIndex = 0; 949 } 950 } 951 952 archive.streamMap = streamMap; 953 } 954 955 private void buildDecodingStream() throws IOException { 956 final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex]; 957 if (folderIndex < 0) { 958 deferredBlockStreams.clear(); 959 // TODO: previously it'd return an empty stream? 960 // new BoundedInputStream(new ByteArrayInputStream(new byte[0]), 0); 961 return; 962 } 963 final SevenZArchiveEntry file = archive.files[currentEntryIndex]; 964 if (currentFolderIndex == folderIndex) { 965 // (COMPRESS-320). 966 // The current entry is within the same (potentially opened) folder. The 967 // previous stream has to be fully decoded before we can start reading 968 // but don't do it eagerly -- if the user skips over the entire folder nothing 969 // is effectively decompressed. 970 971 file.setContentMethods(archive.files[currentEntryIndex - 1].getContentMethods()); 972 } else { 973 // We're opening a new folder. Discard any queued streams/ folder stream. 974 currentFolderIndex = folderIndex; 975 deferredBlockStreams.clear(); 976 if (currentFolderInputStream != null) { 977 currentFolderInputStream.close(); 978 currentFolderInputStream = null; 979 } 980 981 final Folder folder = archive.folders[folderIndex]; 982 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 983 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 984 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 985 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 986 } 987 988 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 989 if (file.getHasCrc()) { 990 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 991 } 992 993 deferredBlockStreams.add(fileStream); 994 } 995 996 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 997 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 998 channel.position(folderOffset); 999 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1000 new BoundedSeekableByteChannelInputStream(channel, 1001 archive.packSizes[firstPackStreamIndex]))) { 1002 @Override 1003 public int read() throws IOException { 1004 final int r = in.read(); 1005 if (r >= 0) { 1006 count(1); 1007 } 1008 return r; 1009 } 1010 @Override 1011 public int read(final byte[] b) throws IOException { 1012 return read(b, 0, b.length); 1013 } 1014 @Override 1015 public int read(final byte[] b, final int off, final int len) throws IOException { 1016 final int r = in.read(b, off, len); 1017 if (r >= 0) { 1018 count(r); 1019 } 1020 return r; 1021 } 1022 private void count(int c) { 1023 compressedBytesReadFromCurrentEntry += c; 1024 } 1025 }; 1026 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1027 for (final Coder coder : folder.getOrderedCoders()) { 1028 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1029 throw new IOException("Multi input/output stream coders are not yet supported"); 1030 } 1031 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1032 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1033 folder.getUnpackSizeForCoder(coder), coder, password); 1034 methods.addFirst(new SevenZMethodConfiguration(method, 1035 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1036 } 1037 entry.setContentMethods(methods); 1038 if (folder.hasCrc) { 1039 return new CRC32VerifyingInputStream(inputStreamStack, 1040 folder.getUnpackSize(), folder.crc); 1041 } 1042 return inputStreamStack; 1043 } 1044 1045 /** 1046 * Reads a byte of data. 1047 * 1048 * @return the byte read, or -1 if end of input is reached 1049 * @throws IOException 1050 * if an I/O error has occurred 1051 */ 1052 public int read() throws IOException { 1053 int b = getCurrentStream().read(); 1054 if (b >= 0) { 1055 uncompressedBytesReadFromCurrentEntry++; 1056 } 1057 return b; 1058 } 1059 1060 private InputStream getCurrentStream() throws IOException { 1061 if (archive.files[currentEntryIndex].getSize() == 0) { 1062 return new ByteArrayInputStream(new byte[0]); 1063 } 1064 if (deferredBlockStreams.isEmpty()) { 1065 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1066 } 1067 1068 while (deferredBlockStreams.size() > 1) { 1069 // In solid compression mode we need to decompress all leading folder' 1070 // streams to get access to an entry. We defer this until really needed 1071 // so that entire blocks can be skipped without wasting time for decompression. 1072 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1073 IOUtils.skip(stream, Long.MAX_VALUE); 1074 } 1075 compressedBytesReadFromCurrentEntry = 0; 1076 } 1077 1078 return deferredBlockStreams.get(0); 1079 } 1080 1081 /** 1082 * Reads data into an array of bytes. 1083 * 1084 * @param b the array to write data to 1085 * @return the number of bytes read, or -1 if end of input is reached 1086 * @throws IOException 1087 * if an I/O error has occurred 1088 */ 1089 public int read(final byte[] b) throws IOException { 1090 return read(b, 0, b.length); 1091 } 1092 1093 /** 1094 * Reads data into an array of bytes. 1095 * 1096 * @param b the array to write data to 1097 * @param off offset into the buffer to start filling at 1098 * @param len of bytes to read 1099 * @return the number of bytes read, or -1 if end of input is reached 1100 * @throws IOException 1101 * if an I/O error has occurred 1102 */ 1103 public int read(final byte[] b, final int off, final int len) throws IOException { 1104 int cnt = getCurrentStream().read(b, off, len); 1105 if (cnt > 0) { 1106 uncompressedBytesReadFromCurrentEntry += cnt; 1107 } 1108 return cnt; 1109 } 1110 1111 /** 1112 * Provides statistics for bytes read from the current entry. 1113 * 1114 * @return statistics for bytes read from the current entry 1115 * @since 1.17 1116 */ 1117 public InputStreamStatistics getStatisticsForCurrentEntry() { 1118 return new InputStreamStatistics() { 1119 @Override 1120 public long getCompressedCount() { 1121 return compressedBytesReadFromCurrentEntry; 1122 } 1123 @Override 1124 public long getUncompressedCount() { 1125 return uncompressedBytesReadFromCurrentEntry; 1126 } 1127 }; 1128 } 1129 1130 private static long readUint64(final ByteBuffer in) throws IOException { 1131 // long rather than int as it might get shifted beyond the range of an int 1132 final long firstByte = getUnsignedByte(in); 1133 int mask = 0x80; 1134 long value = 0; 1135 for (int i = 0; i < 8; i++) { 1136 if ((firstByte & mask) == 0) { 1137 return value | ((firstByte & (mask - 1)) << (8 * i)); 1138 } 1139 final long nextByte = getUnsignedByte(in); 1140 value |= nextByte << (8 * i); 1141 mask >>>= 1; 1142 } 1143 return value; 1144 } 1145 1146 private static int getUnsignedByte(ByteBuffer buf) { 1147 return buf.get() & 0xff; 1148 } 1149 1150 /** 1151 * Checks if the signature matches what is expected for a 7z file. 1152 * 1153 * @param signature 1154 * the bytes to check 1155 * @param length 1156 * the number of bytes to check 1157 * @return true, if this is the signature of a 7z archive. 1158 * @since 1.8 1159 */ 1160 public static boolean matches(final byte[] signature, final int length) { 1161 if (length < sevenZSignature.length) { 1162 return false; 1163 } 1164 1165 for (int i = 0; i < sevenZSignature.length; i++) { 1166 if (signature[i] != sevenZSignature[i]) { 1167 return false; 1168 } 1169 } 1170 return true; 1171 } 1172 1173 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { 1174 if (bytesToSkip < 1) { 1175 return 0; 1176 } 1177 int current = input.position(); 1178 int maxSkip = input.remaining(); 1179 if (maxSkip < bytesToSkip) { 1180 bytesToSkip = maxSkip; 1181 } 1182 input.position(current + (int) bytesToSkip); 1183 return bytesToSkip; 1184 } 1185 1186 private void readFully(ByteBuffer buf) throws IOException { 1187 buf.rewind(); 1188 IOUtils.readFully(channel, buf); 1189 buf.flip(); 1190 } 1191 1192 @Override 1193 public String toString() { 1194 return archive.toString(); 1195 } 1196 1197 private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder(); 1198 1199 private static byte[] utf16Decode(char[] chars) throws IOException { 1200 if (chars == null) { 1201 return null; 1202 } 1203 ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars)); 1204 if (encoded.hasArray()) { 1205 return encoded.array(); 1206 } 1207 byte[] e = new byte[encoded.remaining()]; 1208 encoded.get(e); 1209 return e; 1210 } 1211}