001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024 025import org.apache.commons.compress.archivers.ArchiveEntry; 026import org.apache.commons.compress.archivers.ArchiveInputStream; 027import org.apache.commons.compress.utils.ArchiveUtils; 028import org.apache.commons.compress.utils.IOUtils; 029 030/** 031 * Implements the "ar" archive format as an input stream. 032 * 033 * @NotThreadSafe 034 * 035 */ 036public class ArArchiveInputStream extends ArchiveInputStream { 037 038 private final InputStream input; 039 private long offset = 0; 040 private boolean closed; 041 042 /* 043 * If getNextEnxtry has been called, the entry metadata is stored in 044 * currentEntry. 045 */ 046 private ArArchiveEntry currentEntry = null; 047 048 // Storage area for extra long names (GNU ar) 049 private byte[] namebuffer = null; 050 051 /* 052 * The offset where the current entry started. -1 if no entry has been 053 * called 054 */ 055 private long entryOffset = -1; 056 057 // offsets and length of meta data parts 058 private static final int NAME_OFFSET = 0; 059 private static final int NAME_LEN = 16; 060 private static final int LAST_MODIFIED_OFFSET = NAME_LEN; 061 private static final int LAST_MODIFIED_LEN = 12; 062 private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; 063 private static final int USER_ID_LEN = 6; 064 private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; 065 private static final int GROUP_ID_LEN = 6; 066 private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; 067 private static final int FILE_MODE_LEN = 8; 068 private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; 069 private static final int LENGTH_LEN = 10; 070 071 // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 072 private final byte[] metaData = 073 new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; 074 075 /** 076 * Constructs an Ar input stream with the referenced stream 077 * 078 * @param pInput 079 * the ar input stream 080 */ 081 public ArArchiveInputStream(final InputStream pInput) { 082 input = pInput; 083 closed = false; 084 } 085 086 /** 087 * Returns the next AR entry in this stream. 088 * 089 * @return the next AR entry. 090 * @throws IOException 091 * if the entry could not be read 092 */ 093 public ArArchiveEntry getNextArEntry() throws IOException { 094 if (currentEntry != null) { 095 final long entryEnd = entryOffset + currentEntry.getLength(); 096 long skipped = IOUtils.skip(input, entryEnd - offset); 097 trackReadBytes(skipped); 098 currentEntry = null; 099 } 100 101 if (offset == 0) { 102 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 103 final byte[] realized = new byte[expected.length]; 104 final int read = IOUtils.readFully(input, realized); 105 trackReadBytes(read); 106 if (read != expected.length) { 107 throw new IOException("failed to read header. Occured at byte: " + getBytesRead()); 108 } 109 for (int i = 0; i < expected.length; i++) { 110 if (expected[i] != realized[i]) { 111 throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized)); 112 } 113 } 114 } 115 116 if (offset % 2 != 0) { 117 if (input.read() < 0) { 118 // hit eof 119 return null; 120 } 121 trackReadBytes(1); 122 } 123 124 if (input.available() == 0) { 125 return null; 126 } 127 128 { 129 final int read = IOUtils.readFully(input, metaData); 130 trackReadBytes(read); 131 if (read < metaData.length) { 132 throw new IOException("truncated ar archive"); 133 } 134 } 135 136 { 137 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 138 final byte[] realized = new byte[expected.length]; 139 final int read = IOUtils.readFully(input, realized); 140 trackReadBytes(read); 141 if (read != expected.length) { 142 throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead()); 143 } 144 for (int i = 0; i < expected.length; i++) { 145 if (expected[i] != realized[i]) { 146 throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead()); 147 } 148 } 149 } 150 151 entryOffset = offset; 152 153// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 154 155 // entry name is stored as ASCII string 156 String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); 157 if (isGNUStringTable(temp)) { // GNU extended filenames entry 158 currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); 159 return getNextArEntry(); 160 } 161 162 long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); 163 if (temp.endsWith("/")) { // GNU terminator 164 temp = temp.substring(0, temp.length() - 1); 165 } else if (isGNULongName(temp)) { 166 final int off = Integer.parseInt(temp.substring(1));// get the offset 167 temp = getExtendedName(off); // convert to the long name 168 } else if (isBSDLongName(temp)) { 169 temp = getBSDLongName(temp); 170 // entry length contained the length of the file name in 171 // addition to the real length of the entry. 172 // assume file name was ASCII, there is no "standard" otherwise 173 final int nameLen = temp.length(); 174 len -= nameLen; 175 entryOffset += nameLen; 176 } 177 178 currentEntry = new ArArchiveEntry(temp, len, 179 asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), 180 asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), 181 asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), 182 asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); 183 return currentEntry; 184 } 185 186 /** 187 * Get an extended name from the GNU extended name buffer. 188 * 189 * @param offset pointer to entry within the buffer 190 * @return the extended file name; without trailing "/" if present. 191 * @throws IOException if name not found or buffer not set up 192 */ 193 private String getExtendedName(final int offset) throws IOException { 194 if (namebuffer == null) { 195 throw new IOException("Cannot process GNU long filename as no // record was found"); 196 } 197 for (int i = offset; i < namebuffer.length; i++) { 198 if (namebuffer[i] == '\012' || namebuffer[i] == 0) { 199 if (namebuffer[i - 1] == '/') { 200 i--; // drop trailing / 201 } 202 return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); 203 } 204 } 205 throw new IOException("Failed to read entry: " + offset); 206 } 207 208 private long asLong(final byte[] byteArray, int offset, int len) { 209 return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); 210 } 211 212 private int asInt(final byte[] byteArray, int offset, int len) { 213 return asInt(byteArray, offset, len, 10, false); 214 } 215 216 private int asInt(final byte[] byteArray, int offset, int len, final boolean treatBlankAsZero) { 217 return asInt(byteArray, offset, len, 10, treatBlankAsZero); 218 } 219 220 private int asInt(final byte[] byteArray, int offset, int len, final int base) { 221 return asInt(byteArray, offset, len, base, false); 222 } 223 224 private int asInt(final byte[] byteArray, int offset, int len, final int base, final boolean treatBlankAsZero) { 225 final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); 226 if (string.length() == 0 && treatBlankAsZero) { 227 return 0; 228 } 229 return Integer.parseInt(string, base); 230 } 231 232 /* 233 * (non-Javadoc) 234 * 235 * @see 236 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 237 */ 238 @Override 239 public ArchiveEntry getNextEntry() throws IOException { 240 return getNextArEntry(); 241 } 242 243 /* 244 * (non-Javadoc) 245 * 246 * @see java.io.InputStream#close() 247 */ 248 @Override 249 public void close() throws IOException { 250 if (!closed) { 251 closed = true; 252 input.close(); 253 } 254 currentEntry = null; 255 } 256 257 /* 258 * (non-Javadoc) 259 * 260 * @see java.io.InputStream#read(byte[], int, int) 261 */ 262 @Override 263 public int read(final byte[] b, final int off, final int len) throws IOException { 264 if (currentEntry == null) { 265 throw new IllegalStateException("No current ar entry"); 266 } 267 int toRead = len; 268 final long entryEnd = entryOffset + currentEntry.getLength(); 269 if (len > 0 && entryEnd > offset) { 270 toRead = (int) Math.min(len, entryEnd - offset); 271 } else { 272 return -1; 273 } 274 final int ret = this.input.read(b, off, toRead); 275 trackReadBytes(ret); 276 return ret; 277 } 278 279 /** 280 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 281 * control character 282 * 283 * @param signature 284 * the bytes to check 285 * @param length 286 * the number of bytes to check 287 * @return true, if this stream is an Ar archive stream, false otherwise 288 */ 289 public static boolean matches(final byte[] signature, final int length) { 290 // 3c21 7261 6863 0a3e 291 292 return length >= 8 && signature[0] == 0x21 && 293 signature[1] == 0x3c && signature[2] == 0x61 && 294 signature[3] == 0x72 && signature[4] == 0x63 && 295 signature[5] == 0x68 && signature[6] == 0x3e && 296 signature[7] == 0x0a; 297 } 298 299 static final String BSD_LONGNAME_PREFIX = "#1/"; 300 private static final int BSD_LONGNAME_PREFIX_LEN = 301 BSD_LONGNAME_PREFIX.length(); 302 private static final String BSD_LONGNAME_PATTERN = 303 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 304 305 /** 306 * Does the name look like it is a long name (or a name containing 307 * spaces) as encoded by BSD ar? 308 * 309 * <p>From the FreeBSD ar(5) man page:</p> 310 * <pre> 311 * BSD In the BSD variant, names that are shorter than 16 312 * characters and without embedded spaces are stored 313 * directly in this field. If a name has an embedded 314 * space, or if it is longer than 16 characters, then 315 * the string "#1/" followed by the decimal represen- 316 * tation of the length of the file name is placed in 317 * this field. The actual file name is stored immedi- 318 * ately after the archive header. The content of the 319 * archive member follows the file name. The ar_size 320 * field of the header (see below) will then hold the 321 * sum of the size of the file name and the size of 322 * the member. 323 * </pre> 324 * 325 * @since 1.3 326 */ 327 private static boolean isBSDLongName(final String name) { 328 return name != null && name.matches(BSD_LONGNAME_PATTERN); 329 } 330 331 /** 332 * Reads the real name from the current stream assuming the very 333 * first bytes to be read are the real file name. 334 * 335 * @see #isBSDLongName 336 * 337 * @since 1.3 338 */ 339 private String getBSDLongName(final String bsdLongName) throws IOException { 340 final int nameLen = 341 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 342 final byte[] name = new byte[nameLen]; 343 final int read = IOUtils.readFully(input, name); 344 trackReadBytes(read); 345 if (read != nameLen) { 346 throw new EOFException(); 347 } 348 return ArchiveUtils.toAsciiString(name); 349 } 350 351 private static final String GNU_STRING_TABLE_NAME = "//"; 352 353 /** 354 * Is this the name of the "Archive String Table" as used by 355 * SVR4/GNU to store long file names? 356 * 357 * <p>GNU ar stores multiple extended filenames in the data section 358 * of a file with the name "//", this record is referred to by 359 * future headers.</p> 360 * 361 * <p>A header references an extended filename by storing a "/" 362 * followed by a decimal offset to the start of the filename in 363 * the extended filename data section.</p> 364 * 365 * <p>The format of the "//" file itself is simply a list of the 366 * long filenames, each separated by one or more LF 367 * characters. Note that the decimal offsets are number of 368 * characters, not line or string number within the "//" file.</p> 369 */ 370 private static boolean isGNUStringTable(final String name) { 371 return GNU_STRING_TABLE_NAME.equals(name); 372 } 373 374 private void trackReadBytes(final long read) { 375 count(read); 376 if (read > 0) { 377 offset += read; 378 } 379 } 380 381 /** 382 * Reads the GNU archive String Table. 383 * 384 * @see #isGNUStringTable 385 */ 386 private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { 387 final int bufflen = asInt(length, offset, len); // Assume length will fit in an int 388 namebuffer = new byte[bufflen]; 389 final int read = IOUtils.readFully(input, namebuffer, 0, bufflen); 390 trackReadBytes(read); 391 if (read != bufflen){ 392 throw new IOException("Failed to read complete // record: expected=" 393 + bufflen + " read=" + read); 394 } 395 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 396 } 397 398 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 399 400 /** 401 * Does the name look like it is a long name (or a name containing 402 * spaces) as encoded by SVR4/GNU ar? 403 * 404 * @see #isGNUStringTable 405 */ 406 private boolean isGNULongName(final String name) { 407 return name != null && name.matches(GNU_LONGNAME_PATTERN); 408 } 409}