001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.dump;
020
021import org.apache.commons.compress.archivers.ArchiveException;
022import org.apache.commons.compress.archivers.ArchiveInputStream;
023import org.apache.commons.compress.archivers.zip.ZipEncoding;
024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
025
026import java.io.EOFException;
027import java.io.IOException;
028import java.io.InputStream;
029
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.HashMap;
033import java.util.Map;
034import java.util.PriorityQueue;
035import java.util.Queue;
036import java.util.Stack;
037
038/**
039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
040 * Methods are provided to position at each successive entry in
041 * the archive, and the read each entry as a normal input stream
042 * using read().
043 *
044 * There doesn't seem to exist a hint on the encoding of string values
045 * in any piece documentation.  Given the main purpose of dump/restore
046 * is backing up a system it seems very likely the format uses the
047 * current default encoding of the system.
048 *
049 * @NotThreadSafe
050 */
051public class DumpArchiveInputStream extends ArchiveInputStream {
052    private DumpArchiveSummary summary;
053    private DumpArchiveEntry active;
054    private boolean isClosed;
055    private boolean hasHitEOF;
056    private long entrySize;
057    private long entryOffset;
058    private int readIdx;
059    private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
060    private byte[] blockBuffer;
061    private int recordOffset;
062    private long filepos;
063    protected TapeInputStream raw;
064
065    // map of ino -> dirent entry. We can use this to reconstruct full paths.
066    private final Map<Integer, Dirent> names = new HashMap<>();
067
068    // map of ino -> (directory) entry when we're missing one or more elements in the path.
069    private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>();
070
071    // queue of (directory) entries where we now have the full path.
072    private Queue<DumpArchiveEntry> queue;
073
074    /**
075     * The encoding to use for filenames and labels.
076     */
077    private final ZipEncoding zipEncoding;
078
079    // the provided encoding (for unit tests)
080    final String encoding;
081
082    /**
083     * Constructor using the platform's default encoding for file
084     * names.
085     *
086     * @param is stream to read from
087     * @throws ArchiveException on error
088     */
089    public DumpArchiveInputStream(final InputStream is) throws ArchiveException {
090        this(is, null);
091    }
092
093    /**
094     * Constructor.
095     *
096     * @param is stream to read from
097     * @param encoding the encoding to use for file names, use null
098     * for the platform's default encoding
099     * @since 1.6
100     * @throws ArchiveException on error
101     */
102    public DumpArchiveInputStream(final InputStream is, final String encoding)
103        throws ArchiveException {
104        this.raw = new TapeInputStream(is);
105        this.hasHitEOF = false;
106        this.encoding = encoding;
107        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
108
109        try {
110            // read header, verify it's a dump archive.
111            final byte[] headerBytes = raw.readRecord();
112
113            if (!DumpArchiveUtil.verify(headerBytes)) {
114                throw new UnrecognizedFormatException();
115            }
116
117            // get summary information
118            summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
119
120            // reset buffer with actual block size.
121            raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
122
123            // allocate our read buffer.
124            blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
125
126            // skip past CLRI and BITS segments since we don't handle them yet.
127            readCLRI();
128            readBITS();
129        } catch (final IOException ex) {
130            throw new ArchiveException(ex.getMessage(), ex);
131        }
132
133        // put in a dummy record for the root node.
134        final Dirent root = new Dirent(2, 2, 4, ".");
135        names.put(2, root);
136
137        // use priority based on queue to ensure parent directories are
138        // released first.
139        queue = new PriorityQueue<>(10,
140                new Comparator<DumpArchiveEntry>() {
141                    @Override
142                    public int compare(final DumpArchiveEntry p, final DumpArchiveEntry q) {
143                        if (p.getOriginalName() == null || q.getOriginalName() == null) {
144                            return Integer.MAX_VALUE;
145                        }
146
147                        return p.getOriginalName().compareTo(q.getOriginalName());
148                    }
149                });
150    }
151
152    @Deprecated
153    @Override
154    public int getCount() {
155        return (int) getBytesRead();
156    }
157
158    @Override
159    public long getBytesRead() {
160        return raw.getBytesRead();
161    }
162
163    /**
164     * Return the archive summary information.
165     * @return the summary
166     */
167    public DumpArchiveSummary getSummary() {
168        return summary;
169    }
170
171    /**
172     * Read CLRI (deleted inode) segment.
173     */
174    private void readCLRI() throws IOException {
175        final byte[] buffer = raw.readRecord();
176
177        if (!DumpArchiveUtil.verify(buffer)) {
178            throw new InvalidFormatException();
179        }
180
181        active = DumpArchiveEntry.parse(buffer);
182
183        if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
184            throw new InvalidFormatException();
185        }
186
187        // we don't do anything with this yet.
188        if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
189            == -1) {
190            throw new EOFException();
191        }
192        readIdx = active.getHeaderCount();
193    }
194
195    /**
196     * Read BITS segment.
197     */
198    private void readBITS() throws IOException {
199        final byte[] buffer = raw.readRecord();
200
201        if (!DumpArchiveUtil.verify(buffer)) {
202            throw new InvalidFormatException();
203        }
204
205        active = DumpArchiveEntry.parse(buffer);
206
207        if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
208            throw new InvalidFormatException();
209        }
210
211        // we don't do anything with this yet.
212        if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
213            == -1) {
214            throw new EOFException();
215        }
216        readIdx = active.getHeaderCount();
217    }
218
219    /**
220     * Read the next entry.
221     * @return the next entry
222     * @throws IOException on error
223     */
224    public DumpArchiveEntry getNextDumpEntry() throws IOException {
225        return getNextEntry();
226    }
227
228    @Override
229    public DumpArchiveEntry getNextEntry() throws IOException {
230        DumpArchiveEntry entry = null;
231        String path = null;
232
233        // is there anything in the queue?
234        if (!queue.isEmpty()) {
235            return queue.remove();
236        }
237
238        while (entry == null) {
239            if (hasHitEOF) {
240                return null;
241            }
242
243            // skip any remaining records in this segment for prior file.
244            // we might still have holes... easiest to do it
245            // block by block. We may want to revisit this if
246            // the unnecessary decompression time adds up.
247            while (readIdx < active.getHeaderCount()) {
248                if (!active.isSparseRecord(readIdx++)
249                    && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
250                    throw new EOFException();
251                }
252            }
253
254            readIdx = 0;
255            filepos = raw.getBytesRead();
256
257            byte[] headerBytes = raw.readRecord();
258
259            if (!DumpArchiveUtil.verify(headerBytes)) {
260                throw new InvalidFormatException();
261            }
262
263            active = DumpArchiveEntry.parse(headerBytes);
264
265            // skip any remaining segments for prior file.
266            while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
267                if (raw.skip((long) DumpArchiveConstants.TP_SIZE
268                             * (active.getHeaderCount()
269                                - active.getHeaderHoles())) == -1) {
270                    throw new EOFException();
271                }
272
273                filepos = raw.getBytesRead();
274                headerBytes = raw.readRecord();
275
276                if (!DumpArchiveUtil.verify(headerBytes)) {
277                    throw new InvalidFormatException();
278                }
279
280                active = DumpArchiveEntry.parse(headerBytes);
281            }
282
283            // check if this is an end-of-volume marker.
284            if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
285                hasHitEOF = true;
286
287                return null;
288            }
289
290            entry = active;
291
292            if (entry.isDirectory()) {
293                readDirectoryEntry(active);
294
295                // now we create an empty InputStream.
296                entryOffset = 0;
297                entrySize = 0;
298                readIdx = active.getHeaderCount();
299            } else {
300                entryOffset = 0;
301                entrySize = active.getEntrySize();
302                readIdx = 0;
303            }
304
305            recordOffset = readBuf.length;
306
307            path = getPath(entry);
308
309            if (path == null) {
310                entry = null;
311            }
312        }
313
314        entry.setName(path);
315        entry.setSimpleName(names.get(entry.getIno()).getName());
316        entry.setOffset(filepos);
317
318        return entry;
319    }
320
321    /**
322     * Read directory entry.
323     */
324    private void readDirectoryEntry(DumpArchiveEntry entry)
325        throws IOException {
326        long size = entry.getEntrySize();
327        boolean first = true;
328
329        while (first ||
330                DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
331            // read the header that we just peeked at.
332            if (!first) {
333                raw.readRecord();
334            }
335
336            if (!names.containsKey(entry.getIno()) &&
337                    DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
338                pending.put(entry.getIno(), entry);
339            }
340
341            final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
342
343            if (blockBuffer.length < datalen) {
344                blockBuffer = new byte[datalen];
345            }
346
347            if (raw.read(blockBuffer, 0, datalen) != datalen) {
348                throw new EOFException();
349            }
350
351            int reclen = 0;
352
353            for (int i = 0; i < datalen - 8 && i < size - 8;
354                    i += reclen) {
355                final int ino = DumpArchiveUtil.convert32(blockBuffer, i);
356                reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
357
358                final byte type = blockBuffer[i + 6];
359
360                final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
361
362                if (".".equals(name) || "..".equals(name)) {
363                    // do nothing...
364                    continue;
365                }
366
367                final Dirent d = new Dirent(ino, entry.getIno(), type, name);
368
369                /*
370                if ((type == 4) && names.containsKey(ino)) {
371                    System.out.println("we already have ino: " +
372                                       names.get(ino));
373                }
374                */
375
376                names.put(ino, d);
377
378                // check whether this allows us to fill anything in the pending list.
379                for (final Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
380                    final String path = getPath(e.getValue());
381
382                    if (path != null) {
383                        e.getValue().setName(path);
384                        e.getValue()
385                         .setSimpleName(names.get(e.getKey()).getName());
386                        queue.add(e.getValue());
387                    }
388                }
389
390                // remove anything that we found. (We can't do it earlier
391                // because of concurrent modification exceptions.)
392                for (final DumpArchiveEntry e : queue) {
393                    pending.remove(e.getIno());
394                }
395            }
396
397            final byte[] peekBytes = raw.peek();
398
399            if (!DumpArchiveUtil.verify(peekBytes)) {
400                throw new InvalidFormatException();
401            }
402
403            entry = DumpArchiveEntry.parse(peekBytes);
404            first = false;
405            size -= DumpArchiveConstants.TP_SIZE;
406        }
407    }
408
409    /**
410     * Get full path for specified archive entry, or null if there's a gap.
411     *
412     * @param entry
413     * @return  full path for specified archive entry, or null if there's a gap.
414     */
415    private String getPath(final DumpArchiveEntry entry) {
416        // build the stack of elements. It's possible that we're
417        // still missing an intermediate value and if so we
418        final Stack<String> elements = new Stack<>();
419        Dirent dirent = null;
420
421        for (int i = entry.getIno();; i = dirent.getParentIno()) {
422            if (!names.containsKey(i)) {
423                elements.clear();
424                break;
425            }
426
427            dirent = names.get(i);
428            elements.push(dirent.getName());
429
430            if (dirent.getIno() == dirent.getParentIno()) {
431                break;
432            }
433        }
434
435        // if an element is missing defer the work and read next entry.
436        if (elements.isEmpty()) {
437            pending.put(entry.getIno(), entry);
438
439            return null;
440        }
441
442        // generate full path from stack of elements.
443        final StringBuilder sb = new StringBuilder(elements.pop());
444
445        while (!elements.isEmpty()) {
446            sb.append('/');
447            sb.append(elements.pop());
448        }
449
450        return sb.toString();
451    }
452
453    /**
454     * Reads bytes from the current dump archive entry.
455     *
456     * This method is aware of the boundaries of the current
457     * entry in the archive and will deal with them as if they
458     * were this stream's start and EOF.
459     *
460     * @param buf The buffer into which to place bytes read.
461     * @param off The offset at which to place bytes read.
462     * @param len The number of bytes to read.
463     * @return The number of bytes read, or -1 at EOF.
464     * @throws IOException on error
465     */
466    @Override
467    public int read(final byte[] buf, int off, int len) throws IOException {
468        int totalRead = 0;
469
470        if (hasHitEOF || isClosed || entryOffset >= entrySize) {
471            return -1;
472        }
473
474        if (active == null) {
475            throw new IllegalStateException("No current dump entry");
476        }
477
478        if (len + entryOffset > entrySize) {
479            len = (int) (entrySize - entryOffset);
480        }
481
482        while (len > 0) {
483            final int sz = len > readBuf.length - recordOffset
484                ? readBuf.length - recordOffset : len;
485
486            // copy any data we have
487            if (recordOffset + sz <= readBuf.length) {
488                System.arraycopy(readBuf, recordOffset, buf, off, sz);
489                totalRead += sz;
490                recordOffset += sz;
491                len -= sz;
492                off += sz;
493            }
494
495            // load next block if necessary.
496            if (len > 0) {
497                if (readIdx >= 512) {
498                    final byte[] headerBytes = raw.readRecord();
499
500                    if (!DumpArchiveUtil.verify(headerBytes)) {
501                        throw new InvalidFormatException();
502                    }
503
504                    active = DumpArchiveEntry.parse(headerBytes);
505                    readIdx = 0;
506                }
507
508                if (!active.isSparseRecord(readIdx++)) {
509                    final int r = raw.read(readBuf, 0, readBuf.length);
510                    if (r != readBuf.length) {
511                        throw new EOFException();
512                    }
513                } else {
514                    Arrays.fill(readBuf, (byte) 0);
515                }
516
517                recordOffset = 0;
518            }
519        }
520
521        entryOffset += totalRead;
522
523        return totalRead;
524    }
525
526    /**
527     * Closes the stream for this entry.
528     */
529    @Override
530    public void close() throws IOException {
531        if (!isClosed) {
532            isClosed = true;
533            raw.close();
534        }
535    }
536
537    /**
538     * Look at the first few bytes of the file to decide if it's a dump
539     * archive. With 32 bytes we can look at the magic value, with a full
540     * 1k we can verify the checksum.
541     * @param buffer data to match
542     * @param length length of data
543     * @return whether the buffer seems to contain dump data
544     */
545    public static boolean matches(final byte[] buffer, final int length) {
546        // do we have enough of the header?
547        if (length < 32) {
548            return false;
549        }
550
551        // this is the best test
552        if (length >= DumpArchiveConstants.TP_SIZE) {
553            return DumpArchiveUtil.verify(buffer);
554        }
555
556        // this will work in a pinch.
557        return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
558            24);
559    }
560
561}