001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020package org.apache.commons.compress.archivers.zip;
021
022import java.io.IOException;
023import java.nio.ByteBuffer;
024
025/**
026 * An interface for encoders that do a pretty encoding of ZIP
027 * filenames.
028 *
029 * <p>There are mostly two implementations, one that uses java.nio
030 * {@link java.nio.charset.Charset Charset} and one implementation,
031 * which copes with simple 8 bit charsets, because java-1.4 did not
032 * support Cp437 in java.nio.</p>
033 *
034 * <p>The main reason for defining an own encoding layer comes from
035 * the problems with {@link java.lang.String#getBytes(String)
036 * String.getBytes}, which encodes unknown characters as ASCII
037 * quotation marks ('?'). Quotation marks are per definition an
038 * invalid filename on some operating systems  like Windows, which
039 * leads to ignored ZIP entries.</p>
040 *
041 * <p>All implementations should implement this interface in a
042 * reentrant way.</p>
043 */
044public interface ZipEncoding {
045    /**
046     * Check, whether the given string may be losslessly encoded using this
047     * encoding.
048     *
049     * @param name A filename or ZIP comment.
050     * @return Whether the given name may be encoded with out any losses.
051     */
052    boolean canEncode(String name);
053
054    /**
055     * Encode a filename or a comment to a byte array suitable for
056     * storing it to a serialized zip entry.
057     *
058     * <p>Examples for CP 437 (in pseudo-notation, right hand side is
059     * C-style notation):</p>
060     * <pre>
061     *  encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
062     *  encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
063     * </pre>
064     *
065     * @param name A filename or ZIP comment.
066     * @return A byte buffer with a backing array containing the
067     *         encoded name.  Unmappable characters or malformed
068     *         character sequences are mapped to a sequence of utf-16
069     *         words encoded in the format <code>%Uxxxx</code>.  It is
070     *         assumed, that the byte buffer is positioned at the
071     *         beginning of the encoded result, the byte buffer has a
072     *         backing array and the limit of the byte buffer points
073     *         to the end of the encoded result.
074     * @throws IOException on error
075     */
076    ByteBuffer encode(String name) throws IOException;
077
078    /**
079     * @param data The byte values to decode.
080     * @return The decoded string.
081     * @throws IOException on error
082     */
083    String decode(byte [] data) throws IOException;
084}