001 /* CharsetEncoder.java -- 002 Copyright (C) 2002 Free Software Foundation, Inc. 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 package java.nio.charset; 039 040 import java.nio.ByteBuffer; 041 import java.nio.CharBuffer; 042 043 /** 044 * @author Jesse Rosenstock 045 * @since 1.4 046 */ 047 public abstract class CharsetEncoder 048 { 049 private static final int STATE_RESET = 0; 050 private static final int STATE_CODING = 1; 051 private static final int STATE_END = 2; 052 private static final int STATE_FLUSHED = 3; 053 054 private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'}; 055 056 private final Charset charset; 057 private final float averageBytesPerChar; 058 private final float maxBytesPerChar; 059 private byte[] replacement; 060 061 private int state = STATE_RESET; 062 063 private CodingErrorAction malformedInputAction 064 = CodingErrorAction.REPORT; 065 private CodingErrorAction unmappableCharacterAction 066 = CodingErrorAction.REPORT; 067 068 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 069 float maxBytesPerChar) 070 { 071 this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT); 072 } 073 074 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 075 float maxBytesPerChar, byte[] replacement) 076 { 077 if (averageBytesPerChar <= 0.0f) 078 throw new IllegalArgumentException ("Non-positive averageBytesPerChar"); 079 if (maxBytesPerChar <= 0.0f) 080 throw new IllegalArgumentException ("Non-positive maxBytesPerChar"); 081 082 this.charset = cs; 083 this.averageBytesPerChar 084 = averageBytesPerChar; 085 this.maxBytesPerChar 086 = maxBytesPerChar; 087 this.replacement = replacement; 088 implReplaceWith (replacement); 089 } 090 091 public final float averageBytesPerChar () 092 { 093 return averageBytesPerChar; 094 } 095 096 public boolean canEncode (char c) 097 { 098 CharBuffer cb = CharBuffer.allocate (1).put (c); 099 cb.flip (); 100 return canEncode (cb); 101 } 102 103 public boolean canEncode (CharSequence cs) 104 { 105 CharBuffer cb; 106 if (cs instanceof CharBuffer) 107 cb = ((CharBuffer) cs).duplicate (); 108 else 109 cb = CharBuffer.wrap (cs); 110 return canEncode (cb); 111 } 112 113 private boolean canEncode (CharBuffer cb) 114 { 115 // It is an error if a coding operation is "in progress" 116 // I take that to mean the state is not reset or flushed. 117 // XXX: check "in progress" everywhere 118 if (state == STATE_FLUSHED) 119 reset (); 120 else if (state != STATE_RESET) 121 throw new IllegalStateException (); 122 123 CodingErrorAction oldMalformedInputAction = malformedInputAction; 124 CodingErrorAction oldUnmappableCharacterAction 125 = unmappableCharacterAction; 126 127 try 128 { 129 if (oldMalformedInputAction != CodingErrorAction.REPORT) 130 onMalformedInput (CodingErrorAction.REPORT); 131 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 132 onUnmappableCharacter (CodingErrorAction.REPORT); 133 } 134 catch (Exception e) 135 { 136 return false; 137 } 138 finally 139 { 140 if (oldMalformedInputAction != CodingErrorAction.REPORT) 141 onMalformedInput (oldMalformedInputAction); 142 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 143 onUnmappableCharacter (oldUnmappableCharacterAction); 144 } 145 146 return true; 147 } 148 149 public final Charset charset () 150 { 151 return charset; 152 } 153 154 public final ByteBuffer encode (CharBuffer in) 155 throws CharacterCodingException 156 { 157 // XXX: Sun's Javadoc seems to contradict itself saying an 158 // IllegalStateException is thrown "if a decoding operation is already 159 // in progress" and also that "it resets this Encoder". 160 // Should we check to see that the state is reset, or should we 161 // call reset()? 162 if (state != STATE_RESET) 163 throw new IllegalStateException (); 164 165 // REVIEW: Using max instead of average may allocate a very large 166 // buffer. Maybe we should do something more efficient? 167 int remaining = in.remaining (); 168 int n = (int) (remaining * maxBytesPerChar ()); 169 ByteBuffer out = ByteBuffer.allocate (n); 170 171 if (remaining == 0) 172 { 173 state = STATE_FLUSHED; 174 return out; 175 } 176 177 CoderResult cr = encode (in, out, true); 178 if (cr.isError ()) 179 cr.throwException (); 180 181 cr = flush (out); 182 if (cr.isError ()) 183 cr.throwException (); 184 185 out.flip (); 186 187 // Unfortunately, resizing the actual bytebuffer array is required. 188 byte[] resized = new byte[out.remaining()]; 189 out.get(resized); 190 return ByteBuffer.wrap(resized); 191 } 192 193 public final CoderResult encode (CharBuffer in, ByteBuffer out, 194 boolean endOfInput) 195 { 196 int newState = endOfInput ? STATE_END : STATE_CODING; 197 // XXX: Need to check for "previous step was an invocation [not] of 198 // this method with a value of true for the endOfInput parameter but 199 // a return value indicating an incomplete decoding operation" 200 // XXX: We will not check the previous return value, just 201 // that the previous call passed true for endOfInput 202 if (state != STATE_RESET && state != STATE_CODING 203 && !(endOfInput && state == STATE_END)) 204 throw new IllegalStateException (); 205 state = newState; 206 207 for (;;) 208 { 209 CoderResult cr; 210 try 211 { 212 cr = encodeLoop (in, out); 213 } 214 catch (RuntimeException e) 215 { 216 throw new CoderMalfunctionError (e); 217 } 218 219 if (cr.isOverflow ()) 220 return cr; 221 222 if (cr.isUnderflow ()) 223 { 224 if (endOfInput && in.hasRemaining ()) 225 cr = CoderResult.malformedForLength (in.remaining ()); 226 else 227 return cr; 228 } 229 230 CodingErrorAction action = cr.isMalformed () 231 ? malformedInputAction 232 : unmappableCharacterAction; 233 234 if (action == CodingErrorAction.REPORT) 235 return cr; 236 237 if (action == CodingErrorAction.REPLACE) 238 { 239 if (out.remaining () < replacement.length) 240 return CoderResult.OVERFLOW; 241 out.put (replacement); 242 } 243 244 in.position (in.position () + cr.length ()); 245 } 246 } 247 248 protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out); 249 250 public final CoderResult flush (ByteBuffer out) 251 { 252 // It seems weird that you can flush after reset, but Sun's javadoc 253 // says an IllegalStateException is thrown "If the previous step of the 254 // current decoding operation was an invocation neither of the reset 255 // method nor ... of the three-argument encode method with a value of 256 // true for the endOfInput parameter." 257 // Further note that flush() only requires that there not be 258 // an IllegalStateException if the previous step was a call to 259 // encode with true as the last argument. It does not require 260 // that the call succeeded. encode() does require that it succeeded. 261 // XXX: test this to see if reality matches javadoc 262 if (state != STATE_RESET && state != STATE_END) 263 throw new IllegalStateException (); 264 265 state = STATE_FLUSHED; 266 return implFlush (out); 267 } 268 269 protected CoderResult implFlush (ByteBuffer out) 270 { 271 return CoderResult.UNDERFLOW; 272 } 273 274 protected void implOnMalformedInput (CodingErrorAction newAction) 275 { 276 // default implementation does nothing 277 } 278 279 protected void implOnUnmappableCharacter (CodingErrorAction newAction) 280 { 281 // default implementation does nothing 282 } 283 284 protected void implReplaceWith (byte[] newReplacement) 285 { 286 // default implementation does nothing 287 } 288 289 protected void implReset () 290 { 291 // default implementation does nothing 292 } 293 294 public boolean isLegalReplacement (byte[] replacement) 295 { 296 // TODO: cache the decoder 297 // error actions will be REPORT after construction 298 CharsetDecoder decoder = charset.newDecoder (); 299 ByteBuffer bb = ByteBuffer.wrap (replacement); 300 CharBuffer cb 301 = CharBuffer.allocate ((int) (replacement.length 302 * decoder.maxCharsPerByte ())); 303 return !decoder.decode (bb, cb, true).isError (); 304 } 305 306 public CodingErrorAction malformedInputAction () 307 { 308 return malformedInputAction; 309 } 310 311 public final float maxBytesPerChar () 312 { 313 return maxBytesPerChar; 314 } 315 316 public final CharsetEncoder onMalformedInput (CodingErrorAction newAction) 317 { 318 if (newAction == null) 319 throw new IllegalArgumentException ("Null action"); 320 321 malformedInputAction = newAction; 322 implOnMalformedInput (newAction); 323 return this; 324 } 325 326 public CodingErrorAction unmappableCharacterAction () 327 { 328 return unmappableCharacterAction; 329 } 330 331 public final CharsetEncoder onUnmappableCharacter 332 (CodingErrorAction newAction) 333 { 334 if (newAction == null) 335 throw new IllegalArgumentException ("Null action"); 336 337 unmappableCharacterAction = newAction; 338 implOnUnmappableCharacter (newAction); 339 return this; 340 } 341 342 public final byte[] replacement () 343 { 344 return replacement; 345 } 346 347 public final CharsetEncoder replaceWith (byte[] newReplacement) 348 { 349 if (newReplacement == null) 350 throw new IllegalArgumentException ("Null replacement"); 351 if (newReplacement.length == 0) 352 throw new IllegalArgumentException ("Empty replacement"); 353 // XXX: what about maxBytesPerChar? 354 355 if (!isLegalReplacement (newReplacement)) 356 throw new IllegalArgumentException ("Illegal replacement"); 357 358 this.replacement = newReplacement; 359 implReplaceWith (newReplacement); 360 return this; 361 } 362 363 public final CharsetEncoder reset () 364 { 365 state = STATE_RESET; 366 implReset (); 367 return this; 368 } 369 }