001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026import org.apache.commons.compress.utils.ByteUtils;
027
028/**
029 * CompressorOutputStream for the LZ4 frame format.
030 *
031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
032 *
033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
034 * @since 1.14
035 * @NotThreadSafe
036 */
037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
038
039    private static final byte[] END_MARK = new byte[4];
040
041    // used in one-arg write method
042    private final byte[] oneByte = new byte[1];
043
044    private final byte[] blockData;
045    private final OutputStream out;
046    private final Parameters params;
047    private boolean finished = false;
048    private int currentIndex = 0;
049
050    // used for frame header checksum and content checksum, if requested
051    private final XXHash32 contentHash = new XXHash32();
052    // used for block checksum, if requested
053    private final XXHash32 blockHash;
054
055    // only created if the config requires block dependency
056    private byte[] blockDependencyBuffer;
057    private int collectedBlockDependencyBytes;
058
059    /**
060     * The block sizes supported by the format.
061     */
062    public enum BlockSize {
063        /** Block size of 64K */
064        K64(64 * 1024, 4),
065        /** Block size of 256K */
066        K256(256 * 1024, 5),
067        /** Block size of 1M */
068        M1(1024 * 1024, 6),
069        /** Block size of 4M */
070        M4(4096 * 1024, 7);
071
072        private final int size, index;
073        BlockSize(int size, int index) {
074            this.size = size;
075            this.index = index;
076        }
077        int getSize() {
078            return size;
079        }
080        int getIndex() {
081            return index;
082        }
083    }
084
085    /**
086     * Parameters of the LZ4 frame format.
087     */
088    public static class Parameters {
089        private final BlockSize blockSize;
090        private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
091        private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
092
093        /**
094         * The default parameters of 4M block size, enabled content
095         * checksum, disabled block checksums and independent blocks.
096         *
097         * <p>This matches the defaults of the lz4 command line utility.</p>
098         */
099        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
100
101        /**
102         * Sets up custom a custom block size for the LZ4 stream but
103         * otherwise uses the defaults of enabled content checksum,
104         * disabled block checksums and independent blocks.
105         * @param blockSize the size of a single block.
106         */
107        public Parameters(BlockSize blockSize) {
108            this(blockSize, true, false, false);
109        }
110        /**
111         * Sets up custom a custom block size for the LZ4 stream but
112         * otherwise uses the defaults of enabled content checksum,
113         * disabled block checksums and independent blocks.
114         * @param blockSize the size of a single block.
115         * @param lz77params parameters used to fine-tune compression,
116         * in particular to balance compression ratio vs compression
117         * speed.
118         */
119        public Parameters(BlockSize blockSize,
120            org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
121            this(blockSize, true, false, false, lz77params);
122        }
123        /**
124         * Sets up custom parameters for the LZ4 stream.
125         * @param blockSize the size of a single block.
126         * @param withContentChecksum whether to write a content checksum
127         * @param withBlockChecksum whether to write a block checksum.
128         * Note that block checksums are not supported by the lz4
129         * command line utility
130         * @param withBlockDependency whether a block may depend on
131         * the content of a previous block. Enabling this may improve
132         * compression ratio but makes it impossible to decompress the
133         * output in parallel.
134         */
135        public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum,
136            boolean withBlockDependency) {
137            this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency,
138                 BlockLZ4CompressorOutputStream.createParameterBuilder().build());
139        }
140
141        /**
142         * Sets up custom parameters for the LZ4 stream.
143         * @param blockSize the size of a single block.
144         * @param withContentChecksum whether to write a content checksum
145         * @param withBlockChecksum whether to write a block checksum.
146         * Note that block checksums are not supported by the lz4
147         * command line utility
148         * @param withBlockDependency whether a block may depend on
149         * the content of a previous block. Enabling this may improve
150         * compression ratio but makes it impossible to decompress the
151         * output in parallel.
152         * @param lz77params parameters used to fine-tune compression,
153         * in particular to balance compression ratio vs compression
154         * speed.
155         */
156        public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum,
157                boolean withBlockDependency,
158                org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
159            this.blockSize = blockSize;
160            this.withContentChecksum = withContentChecksum;
161            this.withBlockChecksum = withBlockChecksum;
162            this.withBlockDependency = withBlockDependency;
163            this.lz77params = lz77params;
164        }
165
166        @Override
167        public String toString() {
168            return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum
169                + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency;
170        }
171    }
172
173    /**
174     * Constructs a new output stream that compresses data using the
175     * LZ4 frame format using the default block size of 4MB.
176     * @param out the OutputStream to which to write the compressed data
177     * @throws IOException if writing the signature fails
178     */
179    public FramedLZ4CompressorOutputStream(OutputStream out) throws IOException {
180        this(out, Parameters.DEFAULT);
181    }
182
183    /**
184     * Constructs a new output stream that compresses data using the
185     * LZ4 frame format using the given block size.
186     * @param out the OutputStream to which to write the compressed data
187     * @param params the parameters to use
188     * @throws IOException if writing the signature fails
189     */
190    public FramedLZ4CompressorOutputStream(OutputStream out, Parameters params) throws IOException {
191        this.params = params;
192        blockData = new byte[params.blockSize.getSize()];
193        this.out = out;
194        blockHash = params.withBlockChecksum ? new XXHash32() : null;
195        out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
196        writeFrameDescriptor();
197        blockDependencyBuffer = params.withBlockDependency
198            ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]
199            : null;
200    }
201
202    @Override
203    public void write(int b) throws IOException {
204        oneByte[0] = (byte) (b & 0xff);
205        write(oneByte);
206    }
207
208    @Override
209    public void write(byte[] data, int off, int len) throws IOException {
210        if (params.withContentChecksum) {
211            contentHash.update(data, off, len);
212        }
213        if (currentIndex + len > blockData.length) {
214            flushBlock();
215            while (len > blockData.length) {
216                System.arraycopy(data, off, blockData, 0, blockData.length);
217                off += blockData.length;
218                len -= blockData.length;
219                currentIndex = blockData.length;
220                flushBlock();
221            }
222        }
223        System.arraycopy(data, off, blockData, currentIndex, len);
224        currentIndex += len;
225    }
226
227    @Override
228    public void close() throws IOException {
229        try {
230            finish();
231        } finally {
232            out.close();
233        }
234    }
235
236    /**
237     * Compresses all remaining data and writes it to the stream,
238     * doesn't close the underlying stream.
239     * @throws IOException if an error occurs
240     */
241    public void finish() throws IOException {
242        if (!finished) {
243            if (currentIndex > 0) {
244                flushBlock();
245            }
246            writeTrailer();
247            finished = true;
248        }
249    }
250
251    private void writeFrameDescriptor() throws IOException {
252        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
253        if (!params.withBlockDependency) {
254            flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
255        }
256        if (params.withContentChecksum) {
257            flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
258        }
259        if (params.withBlockChecksum) {
260            flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
261        }
262        out.write(flags);
263        contentHash.update(flags);
264        int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
265        out.write(bd);
266        contentHash.update(bd);
267        out.write((int) ((contentHash.getValue() >> 8) & 0xff));
268        contentHash.reset();
269    }
270
271    private void flushBlock() throws IOException {
272        final boolean withBlockDependency = params.withBlockDependency;
273        ByteArrayOutputStream baos = new ByteArrayOutputStream();
274        try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
275            if (withBlockDependency) {
276                o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes,
277                    collectedBlockDependencyBytes);
278            }
279            o.write(blockData, 0, currentIndex);
280        }
281        if (withBlockDependency) {
282            appendToBlockDependencyBuffer(blockData, 0, currentIndex);
283        }
284        byte[] b = baos.toByteArray();
285        if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
286            ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK,
287                4);
288            out.write(blockData, 0, currentIndex);
289            if (params.withBlockChecksum) {
290                blockHash.update(blockData, 0, currentIndex);
291            }
292        } else {
293            ByteUtils.toLittleEndian(out, b.length, 4);
294            out.write(b);
295            if (params.withBlockChecksum) {
296                blockHash.update(b, 0, b.length);
297            }
298        }
299        if (params.withBlockChecksum) {
300            ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
301            blockHash.reset();
302        }
303        currentIndex = 0;
304    }
305
306    private void writeTrailer() throws IOException {
307        out.write(END_MARK);
308        if (params.withContentChecksum) {
309            ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
310        }
311    }
312
313    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
314        len = Math.min(len, blockDependencyBuffer.length);
315        if (len > 0) {
316            int keep = blockDependencyBuffer.length - len;
317            if (keep > 0) {
318                // move last keep bytes towards the start of the buffer
319                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
320            }
321            // append new data
322            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
323            collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len,
324                blockDependencyBuffer.length);
325        }
326    }
327
328}
329