001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.z;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.lzw.LZWInputStream;
026
027/**
028 * Input stream that decompresses .Z files.
029 * @NotThreadSafe
030 * @since 1.7
031 */
032public class ZCompressorInputStream extends LZWInputStream {
033    private static final int MAGIC_1 = 0x1f;
034    private static final int MAGIC_2 = 0x9d;
035    private static final int BLOCK_MODE_MASK = 0x80;
036    private static final int MAX_CODE_SIZE_MASK = 0x1f;
037    private final boolean blockMode;
038    private final int maxCodeSize;
039    private long totalCodesRead = 0;
040
041    public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb)
042            throws IOException {
043        super(inputStream, ByteOrder.LITTLE_ENDIAN);
044        final int firstByte = (int) in.readBits(8);
045        final int secondByte = (int) in.readBits(8);
046        final int thirdByte = (int) in.readBits(8);
047        if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
048            throw new IOException("Input is not in .Z format");
049        }
050        blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
051        maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
052        if (blockMode) {
053            setClearCode(DEFAULT_CODE_SIZE);
054        }
055        initializeTables(maxCodeSize, memoryLimitInKb);
056        clearEntries();
057    }
058
059    public ZCompressorInputStream(final InputStream inputStream) throws IOException {
060        this(inputStream, -1);
061    }
062
063    private void clearEntries() {
064        setTableSize((1 << 8) + (blockMode ? 1 : 0));
065    }
066
067    /**
068     * {@inheritDoc}
069     * <p><strong>This method is only protected for technical reasons
070     * and is not part of Commons Compress' published API.  It may
071     * change or disappear without warning.</strong></p>
072     */
073    @Override
074    protected int readNextCode() throws IOException {
075        final int code = super.readNextCode();
076        if (code >= 0) {
077            ++totalCodesRead;
078        }
079        return code;
080    }
081
082    private void reAlignReading() throws IOException {
083        // "compress" works in multiples of 8 symbols, each codeBits bits long.
084        // When codeBits changes, the remaining unused symbols in the current
085        // group of 8 are still written out, in the old codeSize,
086        // as garbage values (usually zeroes) that need to be skipped.
087        long codeReadsToThrowAway = 8 - (totalCodesRead % 8);
088        if (codeReadsToThrowAway == 8) {
089            codeReadsToThrowAway = 0;
090        }
091        for (long i = 0; i < codeReadsToThrowAway; i++) {
092            readNextCode();
093        }
094        in.clearBitCache();
095    }
096
097    /**
098     * {@inheritDoc}
099     * <p><strong>This method is only protected for technical reasons
100     * and is not part of Commons Compress' published API.  It may
101     * change or disappear without warning.</strong></p>
102     */
103    @Override
104    protected int addEntry(final int previousCode, final byte character) throws IOException {
105        final int maxTableSize = 1 << getCodeSize();
106        final int r = addEntry(previousCode, character, maxTableSize);
107        if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
108            reAlignReading();
109            incrementCodeSize();
110        }
111        return r;
112    }
113
114    /**
115     * {@inheritDoc}
116     * <p><strong>This method is only protected for technical reasons
117     * and is not part of Commons Compress' published API.  It may
118     * change or disappear without warning.</strong></p>
119     */
120    @Override
121    protected int decompressNextSymbol() throws IOException {
122        //
123        //                   table entry    table entry
124        //                  _____________   _____
125        //    table entry  /             \ /     \
126        //    ____________/               \       \
127        //   /           / \             / \       \
128        //  +---+---+---+---+---+---+---+---+---+---+
129        //  | . | . | . | . | . | . | . | . | . | . |
130        //  +---+---+---+---+---+---+---+---+---+---+
131        //  |<--------->|<------------->|<----->|<->|
132        //     symbol        symbol      symbol  symbol
133        //
134        final int code = readNextCode();
135        if (code < 0) {
136            return -1;
137        } else if (blockMode && code == getClearCode()) {
138            clearEntries();
139            reAlignReading();
140            resetCodeSize();
141            resetPreviousCode();
142            return 0;
143        } else {
144            boolean addedUnfinishedEntry = false;
145            if (code == getTableSize()) {
146                addRepeatOfPreviousCode();
147                addedUnfinishedEntry = true;
148            } else if (code > getTableSize()) {
149                throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
150            }
151            return expandCodeToOutputStack(code, addedUnfinishedEntry);
152        }
153    }
154
155    /**
156     * Checks if the signature matches what is expected for a Unix compress file.
157     *
158     * @param signature
159     *            the bytes to check
160     * @param length
161     *            the number of bytes to check
162     * @return true, if this stream is a Unix compress compressed
163     * stream, false otherwise
164     *
165     * @since 1.9
166     */
167    public static boolean matches(final byte[] signature, final int length) {
168        return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
169    }
170
171}