001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024
025import org.apache.commons.compress.archivers.ArchiveEntry;
026import org.apache.commons.compress.archivers.ArchiveInputStream;
027import org.apache.commons.compress.utils.ArchiveUtils;
028import org.apache.commons.compress.utils.IOUtils;
029
030/**
031 * Implements the "ar" archive format as an input stream.
032 *
033 * @NotThreadSafe
034 *
035 */
036public class ArArchiveInputStream extends ArchiveInputStream {
037
038    private final InputStream input;
039    private long offset = 0;
040    private boolean closed;
041
042    /*
043     * If getNextEnxtry has been called, the entry metadata is stored in
044     * currentEntry.
045     */
046    private ArArchiveEntry currentEntry = null;
047
048    // Storage area for extra long names (GNU ar)
049    private byte[] namebuffer = null;
050
051    /*
052     * The offset where the current entry started. -1 if no entry has been
053     * called
054     */
055    private long entryOffset = -1;
056
057    // offsets and length of meta data parts
058    private static final int NAME_OFFSET = 0;
059    private static final int NAME_LEN = 16;
060    private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
061    private static final int LAST_MODIFIED_LEN = 12;
062    private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
063    private static final int USER_ID_LEN = 6;
064    private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
065    private static final int GROUP_ID_LEN = 6;
066    private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
067    private static final int FILE_MODE_LEN = 8;
068    private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
069    private static final int LENGTH_LEN = 10;
070
071    // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
072    private final byte[] metaData =
073        new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
074
075    /**
076     * Constructs an Ar input stream with the referenced stream
077     *
078     * @param pInput
079     *            the ar input stream
080     */
081    public ArArchiveInputStream(final InputStream pInput) {
082        input = pInput;
083        closed = false;
084    }
085
086    /**
087     * Returns the next AR entry in this stream.
088     *
089     * @return the next AR entry.
090     * @throws IOException
091     *             if the entry could not be read
092     */
093    public ArArchiveEntry getNextArEntry() throws IOException {
094        if (currentEntry != null) {
095            final long entryEnd = entryOffset + currentEntry.getLength();
096            long skipped = IOUtils.skip(input, entryEnd - offset);
097            trackReadBytes(skipped);
098            currentEntry = null;
099        }
100
101        if (offset == 0) {
102            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
103            final byte[] realized = new byte[expected.length];
104            final int read = IOUtils.readFully(input, realized);
105            trackReadBytes(read);
106            if (read != expected.length) {
107                throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
108            }
109            for (int i = 0; i < expected.length; i++) {
110                if (expected[i] != realized[i]) {
111                    throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
112                }
113            }
114        }
115
116        if (offset % 2 != 0) {
117            if (input.read() < 0) {
118                // hit eof
119                return null;
120            }
121            trackReadBytes(1);
122        }
123
124        if (input.available() == 0) {
125            return null;
126        }
127
128        {
129            final int read = IOUtils.readFully(input, metaData);
130            trackReadBytes(read);
131            if (read < metaData.length) {
132                throw new IOException("truncated ar archive");
133            }
134        }
135
136        {
137            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
138            final byte[] realized = new byte[expected.length];
139            final int read = IOUtils.readFully(input, realized);
140            trackReadBytes(read);
141            if (read != expected.length) {
142                throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
143            }
144            for (int i = 0; i < expected.length; i++) {
145                if (expected[i] != realized[i]) {
146                    throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
147                }
148            }
149        }
150
151        entryOffset = offset;
152
153//        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
154
155        // entry name is stored as ASCII string
156        String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
157        if (isGNUStringTable(temp)) { // GNU extended filenames entry
158            currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
159            return getNextArEntry();
160        }
161
162        long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
163        if (temp.endsWith("/")) { // GNU terminator
164            temp = temp.substring(0, temp.length() - 1);
165        } else if (isGNULongName(temp)) {
166            final int off = Integer.parseInt(temp.substring(1));// get the offset
167            temp = getExtendedName(off); // convert to the long name
168        } else if (isBSDLongName(temp)) {
169            temp = getBSDLongName(temp);
170            // entry length contained the length of the file name in
171            // addition to the real length of the entry.
172            // assume file name was ASCII, there is no "standard" otherwise
173            final int nameLen = temp.length();
174            len -= nameLen;
175            entryOffset += nameLen;
176        }
177
178        currentEntry = new ArArchiveEntry(temp, len,
179                                          asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
180                                          asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true),
181                                          asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
182                                          asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
183        return currentEntry;
184    }
185
186    /**
187     * Get an extended name from the GNU extended name buffer.
188     *
189     * @param offset pointer to entry within the buffer
190     * @return the extended file name; without trailing "/" if present.
191     * @throws IOException if name not found or buffer not set up
192     */
193    private String getExtendedName(final int offset) throws IOException {
194        if (namebuffer == null) {
195            throw new IOException("Cannot process GNU long filename as no // record was found");
196        }
197        for (int i = offset; i < namebuffer.length; i++) {
198            if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
199                if (namebuffer[i - 1] == '/') {
200                    i--; // drop trailing /
201                }
202                return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
203            }
204        }
205        throw new IOException("Failed to read entry: " + offset);
206    }
207
208    private long asLong(final byte[] byteArray, int offset, int len) {
209        return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
210    }
211
212    private int asInt(final byte[] byteArray, int offset, int len) {
213        return asInt(byteArray, offset, len, 10, false);
214    }
215
216    private int asInt(final byte[] byteArray, int offset, int len, final boolean treatBlankAsZero) {
217        return asInt(byteArray, offset, len, 10, treatBlankAsZero);
218    }
219
220    private int asInt(final byte[] byteArray, int offset, int len, final int base) {
221        return asInt(byteArray, offset, len, base, false);
222    }
223
224    private int asInt(final byte[] byteArray, int offset, int len, final int base, final boolean treatBlankAsZero) {
225        final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
226        if (string.length() == 0 && treatBlankAsZero) {
227            return 0;
228        }
229        return Integer.parseInt(string, base);
230    }
231
232    /*
233     * (non-Javadoc)
234     *
235     * @see
236     * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
237     */
238    @Override
239    public ArchiveEntry getNextEntry() throws IOException {
240        return getNextArEntry();
241    }
242
243    /*
244     * (non-Javadoc)
245     *
246     * @see java.io.InputStream#close()
247     */
248    @Override
249    public void close() throws IOException {
250        if (!closed) {
251            closed = true;
252            input.close();
253        }
254        currentEntry = null;
255    }
256
257    /*
258     * (non-Javadoc)
259     *
260     * @see java.io.InputStream#read(byte[], int, int)
261     */
262    @Override
263    public int read(final byte[] b, final int off, final int len) throws IOException {
264        if (currentEntry == null) {
265            throw new IllegalStateException("No current ar entry");
266        }
267        int toRead = len;
268        final long entryEnd = entryOffset + currentEntry.getLength();
269        if (len > 0 && entryEnd > offset) {
270            toRead = (int) Math.min(len, entryEnd - offset);
271        } else {
272            return -1;
273        }
274        final int ret = this.input.read(b, off, toRead);
275        trackReadBytes(ret);
276        return ret;
277    }
278
279    /**
280     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF
281     * control character
282     *
283     * @param signature
284     *            the bytes to check
285     * @param length
286     *            the number of bytes to check
287     * @return true, if this stream is an Ar archive stream, false otherwise
288     */
289    public static boolean matches(final byte[] signature, final int length) {
290        // 3c21 7261 6863 0a3e
291
292        return length >= 8 && signature[0] == 0x21 &&
293                signature[1] == 0x3c && signature[2] == 0x61 &&
294                signature[3] == 0x72 && signature[4] == 0x63 &&
295                signature[5] == 0x68 && signature[6] == 0x3e &&
296                signature[7] == 0x0a;
297    }
298
299    static final String BSD_LONGNAME_PREFIX = "#1/";
300    private static final int BSD_LONGNAME_PREFIX_LEN =
301        BSD_LONGNAME_PREFIX.length();
302    private static final String BSD_LONGNAME_PATTERN =
303        "^" + BSD_LONGNAME_PREFIX + "\\d+";
304
305    /**
306     * Does the name look like it is a long name (or a name containing
307     * spaces) as encoded by BSD ar?
308     *
309     * <p>From the FreeBSD ar(5) man page:</p>
310     * <pre>
311     * BSD   In the BSD variant, names that are shorter than 16
312     *       characters and without embedded spaces are stored
313     *       directly in this field.  If a name has an embedded
314     *       space, or if it is longer than 16 characters, then
315     *       the string "#1/" followed by the decimal represen-
316     *       tation of the length of the file name is placed in
317     *       this field. The actual file name is stored immedi-
318     *       ately after the archive header.  The content of the
319     *       archive member follows the file name.  The ar_size
320     *       field of the header (see below) will then hold the
321     *       sum of the size of the file name and the size of
322     *       the member.
323     * </pre>
324     *
325     * @since 1.3
326     */
327    private static boolean isBSDLongName(final String name) {
328        return name != null && name.matches(BSD_LONGNAME_PATTERN);
329    }
330
331    /**
332     * Reads the real name from the current stream assuming the very
333     * first bytes to be read are the real file name.
334     *
335     * @see #isBSDLongName
336     *
337     * @since 1.3
338     */
339    private String getBSDLongName(final String bsdLongName) throws IOException {
340        final int nameLen =
341            Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
342        final byte[] name = new byte[nameLen];
343        final int read = IOUtils.readFully(input, name);
344        trackReadBytes(read);
345        if (read != nameLen) {
346            throw new EOFException();
347        }
348        return ArchiveUtils.toAsciiString(name);
349    }
350
351    private static final String GNU_STRING_TABLE_NAME = "//";
352
353    /**
354     * Is this the name of the "Archive String Table" as used by
355     * SVR4/GNU to store long file names?
356     *
357     * <p>GNU ar stores multiple extended filenames in the data section
358     * of a file with the name "//", this record is referred to by
359     * future headers.</p>
360     *
361     * <p>A header references an extended filename by storing a "/"
362     * followed by a decimal offset to the start of the filename in
363     * the extended filename data section.</p>
364     *
365     * <p>The format of the "//" file itself is simply a list of the
366     * long filenames, each separated by one or more LF
367     * characters. Note that the decimal offsets are number of
368     * characters, not line or string number within the "//" file.</p>
369     */
370    private static boolean isGNUStringTable(final String name) {
371        return GNU_STRING_TABLE_NAME.equals(name);
372    }
373
374    private void trackReadBytes(final long read) {
375        count(read);
376        if (read > 0) {
377            offset += read;
378        }
379    }
380
381    /**
382     * Reads the GNU archive String Table.
383     *
384     * @see #isGNUStringTable
385     */
386    private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
387        final int bufflen = asInt(length, offset, len); // Assume length will fit in an int
388        namebuffer = new byte[bufflen];
389        final int read = IOUtils.readFully(input, namebuffer, 0, bufflen);
390        trackReadBytes(read);
391        if (read != bufflen){
392            throw new IOException("Failed to read complete // record: expected="
393                                  + bufflen + " read=" + read);
394        }
395        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
396    }
397
398    private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
399
400    /**
401     * Does the name look like it is a long name (or a name containing
402     * spaces) as encoded by SVR4/GNU ar?
403     *
404     * @see #isGNUStringTable
405     */
406    private boolean isGNULongName(final String name) {
407        return name != null && name.matches(GNU_LONGNAME_PATTERN);
408    }
409}