001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.MemoryLimitException;
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BitInputStream;
028
029/**
030 * <p>Generic LZW implementation. It is used internally for
031 * the Z decompressor and the Unshrinking Zip file compression method,
032 * but may be useful for third-party projects in implementing their own LZW variations.</p>
033 *
034 * @NotThreadSafe
035 * @since 1.10
036 */
037public abstract class LZWInputStream extends CompressorInputStream {
038    protected static final int DEFAULT_CODE_SIZE = 9;
039    protected static final int UNUSED_PREFIX = -1;
040
041    private final byte[] oneByte = new byte[1];
042
043    protected final BitInputStream in;
044    private int clearCode = -1;
045    private int codeSize = DEFAULT_CODE_SIZE;
046    private byte previousCodeFirstChar;
047    private int previousCode = UNUSED_PREFIX;
048    private int tableSize;
049    private int[] prefixes;
050    private byte[] characters;
051    private byte[] outputStack;
052    private int outputStackLocation;
053
054    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
055        this.in = new BitInputStream(inputStream, byteOrder);
056    }
057
058    @Override
059    public void close() throws IOException {
060        in.close();
061    }
062    
063    @Override
064    public int read() throws IOException {
065        final int ret = read(oneByte);
066        if (ret < 0) {
067            return ret;
068        }
069        return 0xff & oneByte[0];
070    }
071    
072    @Override
073    public int read(final byte[] b, final int off, final int len) throws IOException {
074        int bytesRead = readFromStack(b, off, len);
075        while (len - bytesRead > 0) {
076            final int result = decompressNextSymbol();
077            if (result < 0) {
078                if (bytesRead > 0) {
079                    count(bytesRead);
080                    return bytesRead;
081                }
082                return result;
083            }
084            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
085        }
086        count(bytesRead);
087        return bytesRead;
088    }
089
090    /**
091     * Read the next code and expand it.
092     * @return the expanded next code
093     * @throws IOException on error
094     */
095    protected abstract int decompressNextSymbol() throws IOException;
096
097    /**
098     * Add a new entry to the dictionary.
099     * @param previousCode the previous code
100     * @param character the next character to append
101     * @return the new code
102     * @throws IOException on error
103     */
104    protected abstract int addEntry(int previousCode, byte character)
105        throws IOException;
106
107    /**
108     * Sets the clear code based on the code size.
109     * @param codeSize code size
110     */
111    protected void setClearCode(final int codeSize) {
112        clearCode = (1 << (codeSize - 1));
113    }
114
115    /**
116     * Initializes the arrays based on the maximum code size.
117     * First checks that the estimated memory usage is below memoryLimitInKb
118     *
119     * @param maxCodeSize maximum code size
120     * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
121     * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
122     */
123    protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb)
124            throws MemoryLimitException {
125
126        if (memoryLimitInKb > -1) {
127            final int maxTableSize = 1 << maxCodeSize;
128            //account for potential overflow
129            long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack))
130            long memoryUsageInKb = memoryUsageInBytes >> 10;
131
132            if (memoryUsageInKb > memoryLimitInKb) {
133                throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
134            }
135        }
136        initializeTables(maxCodeSize);
137    }
138
139    /**
140     * Initializes the arrays based on the maximum code size.
141     * @param maxCodeSize maximum code size
142     */
143    protected void initializeTables(final int maxCodeSize) {
144        final int maxTableSize = 1 << maxCodeSize;
145        prefixes = new int[maxTableSize];
146        characters = new byte[maxTableSize];
147        outputStack = new byte[maxTableSize];
148        outputStackLocation = maxTableSize;
149        final int max = 1 << 8;
150        for (int i = 0; i < max; i++) {
151            prefixes[i] = -1;
152            characters[i] = (byte) i;
153        }
154    }
155
156    /**
157     * Reads the next code from the stream.
158     * @return the next code
159     * @throws IOException on error
160     */
161    protected int readNextCode() throws IOException {
162        if (codeSize > 31) {
163            throw new IllegalArgumentException("code size must not be bigger than 31");
164        }
165        return (int) in.readBits(codeSize);
166    }
167    
168    /**
169     * Adds a new entry if the maximum table size hasn't been exceeded
170     * and returns the new index.
171     * @param previousCode the previous code
172     * @param character the character to append
173     * @param maxTableSize the maximum table size
174     * @return the new code
175     */
176    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
177        if (tableSize < maxTableSize) {
178            prefixes[tableSize] = previousCode;
179            characters[tableSize] = character;
180            return tableSize++;
181        }
182        return -1;
183    }
184
185    /**
186     * Add entry for repeat of previousCode we haven't added, yet.
187     * @return new code for a repeat of the previous code
188     * @throws IOException on error
189     */
190    protected int addRepeatOfPreviousCode() throws IOException {
191        if (previousCode == -1) {
192            // can't have a repeat for the very first code
193            throw new IOException("The first code can't be a reference to its preceding code");
194        }
195        return addEntry(previousCode, previousCodeFirstChar);
196    }
197
198    /**
199     * Expands the entry with index code to the output stack and may
200     * create a new entry
201     * @param code the code
202     * @param addedUnfinishedEntry whether unfinished entries have been added
203     * @return the new location of the output stack
204     * @throws IOException on error
205     */
206    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
207        throws IOException {
208        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
209            outputStack[--outputStackLocation] = characters[entry];
210        }
211        if (previousCode != -1 && !addedUnfinishedEntry) {
212            addEntry(previousCode, outputStack[outputStackLocation]);
213        }
214        previousCode = code;
215        previousCodeFirstChar = outputStack[outputStackLocation];
216        return outputStackLocation;
217    }
218
219    private int readFromStack(final byte[] b, final int off, final int len) {
220        final int remainingInStack = outputStack.length - outputStackLocation;
221        if (remainingInStack > 0) {
222            final int maxLength = Math.min(remainingInStack, len);
223            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
224            outputStackLocation += maxLength;
225            return maxLength;
226        }
227        return 0;
228    }
229
230    protected int getCodeSize() {
231        return codeSize;
232    }
233
234    protected void resetCodeSize() {
235        setCodeSize(DEFAULT_CODE_SIZE);
236    }
237
238    protected void setCodeSize(final int cs) {
239        this.codeSize = cs;
240    }
241
242    protected void incrementCodeSize() {
243        codeSize++;
244    }
245
246    protected void resetPreviousCode() {
247        this.previousCode = -1;
248    }
249
250    protected int getPrefix(final int offset) {
251        return prefixes[offset];
252    }
253
254    protected void setPrefix(final int offset, final int value) {
255        prefixes[offset] = value;
256    }
257
258    protected int getPrefixesLength() {
259        return prefixes.length;
260    }
261
262    protected int getClearCode() {
263        return clearCode;
264    }
265
266    protected int getTableSize() {
267        return tableSize;
268    }
269
270    protected void setTableSize(final int newSize) {
271        tableSize = newSize;
272    }
273
274}