001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
025import org.apache.commons.compress.utils.ByteUtils;
026
027/**
028 * CompressorInputStream for the LZ4 block format.
029 *
030 * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
031 * @since 1.14
032 * @NotThreadSafe
033 */
034public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {
035
036    static final int WINDOW_SIZE = 1 << 16;
037    static final int SIZE_BITS = 4;
038    static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;
039    static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;
040
041    /** Back-Reference-size part of the block starting byte. */
042    private int nextBackReferenceSize;
043
044    /** Current state of the stream */
045    private State state = State.NO_BLOCK;
046
047    /**
048     * Creates a new LZ4 input stream.
049     *
050     * @param is
051     *            An InputStream to read compressed data from
052     *
053     * @throws IOException if reading fails
054     */
055    public BlockLZ4CompressorInputStream(final InputStream is) throws IOException {
056        super(is, WINDOW_SIZE);
057    }
058
059    /**
060     * {@inheritDoc}
061     */
062    @Override
063    public int read(final byte[] b, final int off, final int len) throws IOException {
064        switch (state) {
065        case EOF:
066            return -1;
067        case NO_BLOCK: // NOSONAR - fallthrough intended
068            readSizes();
069            /*FALLTHROUGH*/
070        case IN_LITERAL:
071            int litLen = readLiteral(b, off, len);
072            if (!hasMoreDataInBlock()) {
073                state = State.LOOKING_FOR_BACK_REFERENCE;
074            }
075            return litLen > 0 ? litLen : read(b, off, len);
076        case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
077            if (!initializeBackReference()) {
078                state = State.EOF;
079                return -1;
080            }
081            /*FALLTHROUGH*/
082        case IN_BACK_REFERENCE:
083            int backReferenceLen = readBackReference(b, off, len);
084            if (!hasMoreDataInBlock()) {
085                state = State.NO_BLOCK;
086            }
087            return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
088        default:
089            throw new IOException("Unknown stream state " + state);
090        }
091    }
092
093    private void readSizes() throws IOException {
094        int nextBlock = readOneByte();
095        if (nextBlock == -1) {
096            throw new IOException("Premature end of stream while looking for next block");
097        }
098        nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
099        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
100        if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
101            literalSizePart += readSizeBytes();
102        }
103        startLiteral(literalSizePart);
104        state = State.IN_LITERAL;
105    }
106
107    private long readSizeBytes() throws IOException {
108        long accum = 0;
109        int nextByte;
110        do {
111            nextByte = readOneByte();
112            if (nextByte == -1) {
113                throw new IOException("Premature end of stream while parsing length");
114            }
115            accum += nextByte;
116        } while (nextByte == 255);
117        return accum;
118    }
119
120    /**
121     * @return false if there is no more back-reference - this means this is the
122     * last block of the stream.
123     */
124    private boolean initializeBackReference() throws IOException {
125        int backReferenceOffset = 0;
126        try {
127            backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
128        } catch (IOException ex) {
129            if (nextBackReferenceSize == 0) { // the last block has no back-reference
130                return false;
131            }
132            throw ex;
133        }
134        long backReferenceSize = nextBackReferenceSize;
135        if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
136            backReferenceSize += readSizeBytes();
137        }
138        // minimal match length 4 is encoded as 0
139        startBackReference(backReferenceOffset, backReferenceSize + 4);
140        state = State.IN_BACK_REFERENCE;
141        return true;
142    }
143
144    private enum State {
145        NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
146    }
147}