001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lzw; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.MemoryLimitException; 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BitInputStream; 028 029/** 030 * <p>Generic LZW implementation. It is used internally for 031 * the Z decompressor and the Unshrinking Zip file compression method, 032 * but may be useful for third-party projects in implementing their own LZW variations.</p> 033 * 034 * @NotThreadSafe 035 * @since 1.10 036 */ 037public abstract class LZWInputStream extends CompressorInputStream { 038 protected static final int DEFAULT_CODE_SIZE = 9; 039 protected static final int UNUSED_PREFIX = -1; 040 041 private final byte[] oneByte = new byte[1]; 042 043 protected final BitInputStream in; 044 private int clearCode = -1; 045 private int codeSize = DEFAULT_CODE_SIZE; 046 private byte previousCodeFirstChar; 047 private int previousCode = UNUSED_PREFIX; 048 private int tableSize; 049 private int[] prefixes; 050 private byte[] characters; 051 private byte[] outputStack; 052 private int outputStackLocation; 053 054 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 055 this.in = new BitInputStream(inputStream, byteOrder); 056 } 057 058 @Override 059 public void close() throws IOException { 060 in.close(); 061 } 062 063 @Override 064 public int read() throws IOException { 065 final int ret = read(oneByte); 066 if (ret < 0) { 067 return ret; 068 } 069 return 0xff & oneByte[0]; 070 } 071 072 @Override 073 public int read(final byte[] b, final int off, final int len) throws IOException { 074 int bytesRead = readFromStack(b, off, len); 075 while (len - bytesRead > 0) { 076 final int result = decompressNextSymbol(); 077 if (result < 0) { 078 if (bytesRead > 0) { 079 count(bytesRead); 080 return bytesRead; 081 } 082 return result; 083 } 084 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 085 } 086 count(bytesRead); 087 return bytesRead; 088 } 089 090 /** 091 * Read the next code and expand it. 092 * @return the expanded next code 093 * @throws IOException on error 094 */ 095 protected abstract int decompressNextSymbol() throws IOException; 096 097 /** 098 * Add a new entry to the dictionary. 099 * @param previousCode the previous code 100 * @param character the next character to append 101 * @return the new code 102 * @throws IOException on error 103 */ 104 protected abstract int addEntry(int previousCode, byte character) 105 throws IOException; 106 107 /** 108 * Sets the clear code based on the code size. 109 * @param codeSize code size 110 */ 111 protected void setClearCode(final int codeSize) { 112 clearCode = (1 << (codeSize - 1)); 113 } 114 115 /** 116 * Initializes the arrays based on the maximum code size. 117 * First checks that the estimated memory usage is below memoryLimitInKb 118 * 119 * @param maxCodeSize maximum code size 120 * @param memoryLimitInKb maximum allowed estimated memory usage in Kb 121 * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb 122 */ 123 protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) 124 throws MemoryLimitException { 125 126 if (memoryLimitInKb > -1) { 127 final int maxTableSize = 1 << maxCodeSize; 128 //account for potential overflow 129 long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack)) 130 long memoryUsageInKb = memoryUsageInBytes >> 10; 131 132 if (memoryUsageInKb > memoryLimitInKb) { 133 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb); 134 } 135 } 136 initializeTables(maxCodeSize); 137 } 138 139 /** 140 * Initializes the arrays based on the maximum code size. 141 * @param maxCodeSize maximum code size 142 */ 143 protected void initializeTables(final int maxCodeSize) { 144 final int maxTableSize = 1 << maxCodeSize; 145 prefixes = new int[maxTableSize]; 146 characters = new byte[maxTableSize]; 147 outputStack = new byte[maxTableSize]; 148 outputStackLocation = maxTableSize; 149 final int max = 1 << 8; 150 for (int i = 0; i < max; i++) { 151 prefixes[i] = -1; 152 characters[i] = (byte) i; 153 } 154 } 155 156 /** 157 * Reads the next code from the stream. 158 * @return the next code 159 * @throws IOException on error 160 */ 161 protected int readNextCode() throws IOException { 162 if (codeSize > 31) { 163 throw new IllegalArgumentException("code size must not be bigger than 31"); 164 } 165 return (int) in.readBits(codeSize); 166 } 167 168 /** 169 * Adds a new entry if the maximum table size hasn't been exceeded 170 * and returns the new index. 171 * @param previousCode the previous code 172 * @param character the character to append 173 * @param maxTableSize the maximum table size 174 * @return the new code 175 */ 176 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 177 if (tableSize < maxTableSize) { 178 prefixes[tableSize] = previousCode; 179 characters[tableSize] = character; 180 return tableSize++; 181 } 182 return -1; 183 } 184 185 /** 186 * Add entry for repeat of previousCode we haven't added, yet. 187 * @return new code for a repeat of the previous code 188 * @throws IOException on error 189 */ 190 protected int addRepeatOfPreviousCode() throws IOException { 191 if (previousCode == -1) { 192 // can't have a repeat for the very first code 193 throw new IOException("The first code can't be a reference to its preceding code"); 194 } 195 return addEntry(previousCode, previousCodeFirstChar); 196 } 197 198 /** 199 * Expands the entry with index code to the output stack and may 200 * create a new entry 201 * @param code the code 202 * @param addedUnfinishedEntry whether unfinished entries have been added 203 * @return the new location of the output stack 204 * @throws IOException on error 205 */ 206 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) 207 throws IOException { 208 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 209 outputStack[--outputStackLocation] = characters[entry]; 210 } 211 if (previousCode != -1 && !addedUnfinishedEntry) { 212 addEntry(previousCode, outputStack[outputStackLocation]); 213 } 214 previousCode = code; 215 previousCodeFirstChar = outputStack[outputStackLocation]; 216 return outputStackLocation; 217 } 218 219 private int readFromStack(final byte[] b, final int off, final int len) { 220 final int remainingInStack = outputStack.length - outputStackLocation; 221 if (remainingInStack > 0) { 222 final int maxLength = Math.min(remainingInStack, len); 223 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 224 outputStackLocation += maxLength; 225 return maxLength; 226 } 227 return 0; 228 } 229 230 protected int getCodeSize() { 231 return codeSize; 232 } 233 234 protected void resetCodeSize() { 235 setCodeSize(DEFAULT_CODE_SIZE); 236 } 237 238 protected void setCodeSize(final int cs) { 239 this.codeSize = cs; 240 } 241 242 protected void incrementCodeSize() { 243 codeSize++; 244 } 245 246 protected void resetPreviousCode() { 247 this.previousCode = -1; 248 } 249 250 protected int getPrefix(final int offset) { 251 return prefixes[offset]; 252 } 253 254 protected void setPrefix(final int offset, final int value) { 255 prefixes[offset] = value; 256 } 257 258 protected int getPrefixesLength() { 259 return prefixes.length; 260 } 261 262 protected int getClearCode() { 263 return clearCode; 264 } 265 266 protected int getTableSize() { 267 return tableSize; 268 } 269 270 protected void setTableSize(final int newSize) { 271 tableSize = newSize; 272 } 273 274}