001/* NumericShaper.java 002 Copyright (C) 2003 Free Software Foundation, Inc. 003 004This file is part of GNU Classpath. 005 006GNU Classpath is free software; you can redistribute it and/or modify 007it under the terms of the GNU General Public License as published by 008the Free Software Foundation; either version 2, or (at your option) 009any later version. 010 011GNU Classpath is distributed in the hope that it will be useful, but 012WITHOUT ANY WARRANTY; without even the implied warranty of 013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014General Public License for more details. 015 016You should have received a copy of the GNU General Public License 017along with GNU Classpath; see the file COPYING. If not, write to the 018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 01902110-1301 USA. 020 021Linking this library statically or dynamically with other modules is 022making a combined work based on this library. Thus, the terms and 023conditions of the GNU General Public License cover the whole 024combination. 025 026As a special exception, the copyright holders of this library give you 027permission to link this library with independent modules to produce an 028executable, regardless of the license terms of these independent 029modules, and to copy and distribute the resulting executable under 030terms of your choice, provided that you also meet, for each linked 031independent module, the terms and conditions of the license of that 032module. An independent module is a module which is not derived from 033or based on this library. If you modify this library, you may extend 034this exception to your version of the library, but you are not 035obligated to do so. If you do not wish to do so, delete this 036exception statement from your version. */ 037 038 039package java.awt.font; 040 041import java.io.Serializable; 042import java.lang.Character.UnicodeBlock; 043 044/** 045 * This class handles numeric shaping. A shaper can either be contextual 046 * or not. A non-contextual shaper will always translate ASCII digits 047 * in its input into the target Unicode range. A contextual shaper will 048 * change the target Unicode range depending on the characters it has 049 * previously processed. 050 * 051 * @author Michael Koch 052 * @author Tom Tromey 053 * 054 * @since 1.4 055 * @specnote This class does not handle LIMBU or OSMANYA. 056 * @specnote The JDK does not seem to properly handle ranges without a 057 * digit zero, such as TAMIL. This implementation does. 058 */ 059public final class NumericShaper implements Serializable 060{ 061 private static final long serialVersionUID = -8022764705923730308L; 062 063 /** Convenience constant representing all the valid Unicode ranges. */ 064 public static final int ALL_RANGES = 524287; 065 066 /** 067 * Constant representing the Unicode ARABIC range. Shaping done 068 * using this range will translate to the arabic decimal characters. 069 * Use EASTERN_ARABIC if you want to shape to the eastern arabic 070 * (also known as the extended arabic) decimal characters. 071 */ 072 public static final int ARABIC = 2; 073 074 /** Constant representing the Unicode BENGALI range. */ 075 public static final int BENGALI = 16; 076 077 /** Constant representing the Unicode DEVANAGARI range. */ 078 public static final int DEVANAGARI = 8; 079 080 /** 081 * Constant representing the Unicode extended arabic range. 082 * In Unicode there are two different sets of arabic digits; 083 * this selects the extended or eastern set. 084 */ 085 public static final int EASTERN_ARABIC = 4; 086 087 /** 088 * Constant representing the Unicode ETHIOPIC range. Note that 089 * there is no digit zero in this range; an ASCII digit zero 090 * is left unchanged when shaping to this range. 091 */ 092 public static final int ETHIOPIC = 65536; 093 094 /** 095 * Constant representing the Unicode EUROPEAN range. For 096 * contextual shaping purposes, characters in the various 097 * extended Latin character blocks are recognized as EUROPEAN. 098 */ 099 public static final int EUROPEAN = 1; 100 101 /** Constant representing the Unicode GUJARATI range. */ 102 public static final int GUJARATI = 64; 103 104 /** Constant representing the Unicode GURMUKHI range. */ 105 public static final int GURMUKHI = 32; 106 107 /** Constant representing the Unicode KANNADA range. */ 108 public static final int KANNADA = 1024; 109 110 /** Constant representing the Unicode KHMER range. */ 111 public static final int KHMER = 131072; 112 113 /** Constant representing the Unicode LAO range. */ 114 public static final int LAO = 8192; 115 116 /** Constant representing the Unicode MALAYALAM range. */ 117 public static final int MALAYALAM = 2048; 118 119 /** Constant representing the Unicode MONGOLIAN range. */ 120 public static final int MONGOLIAN = 262144; 121 122 /** Constant representing the Unicode MYANMAR range. */ 123 public static final int MYANMAR = 32768; 124 125 /** Constant representing the Unicode ORIYA range. */ 126 public static final int ORIYA = 128; 127 128 /** 129 * Constant representing the Unicode TAMIL range. Note that 130 * there is no digit zero in this range; an ASCII digit zero 131 * is left unchanged when shaping to this range. 132 */ 133 public static final int TAMIL = 256; 134 135 /** Constant representing the Unicode TELUGU range. */ 136 public static final int TELUGU = 512; 137 138 /** Constant representing the Unicode THAI range. */ 139 public static final int THAI = 4096; 140 141 /** Constant representing the Unicode TIBETAN range. */ 142 public static final int TIBETAN = 16384; 143 144 /** 145 * This table holds the zero digits for each language. This is hard-coded 146 * because the values will not change and the table layout is tied to the 147 * other constants in this class in any case. In the two places where a 148 * language does not have a zero digit, the character immediately preceeding 149 * the one digit is used instead. These languages are special-cased in 150 * the shaping code. 151 */ 152 private static final char[] zeroDigits = 153 { 154 '0', // EUROPEAN 155 '\u0660', // ARABIC 156 '\u06f0', // EASTERN_ARABIC 157 '\u0966', // DEVANAGARI 158 '\u09e6', // BENGALI 159 '\u0a66', // GURMUKHI 160 '\u0ae6', // GUJARATI 161 '\u0b66', // ORIYA 162 '\u0be6', // TAMIL - special case as there is no digit zero 163 '\u0c66', // TELUGU 164 '\u0ce6', // KANNADA 165 '\u0d66', // MALAYALAM 166 '\u0e50', // THAI 167 '\u0ed0', // LAO 168 '\u0f20', // TIBETAN 169 '\u1040', // MYANMAR 170 '\u1368', // ETHIOPIC - special case as there is no digit zero 171 '\u17e0', // KHMER 172 '\u1810' // MONGOLIAN 173 }; 174 175 /** 176 * The default initial context for this shaper, specified as 177 * an integer from 0 to 18. 178 */ 179 private int key; 180 181 /** 182 * The target ranges handled by this shaper. If the shaper 183 * is not contextual, the high bit of this field will be set. 184 * @specnote This was discovered by reading the serialization spec 185 */ 186 private int mask; 187 188 /** 189 * Create a new numeric shaper. The key given is a constant from 190 * this class, the constructor turns it into its internal form. 191 * @param key the key to use, as one of the manifest constants 192 * @param mask a mask of languages to shape for 193 */ 194 private NumericShaper (int key, int mask) 195 { 196 // This internal form is a bit goofy, but it is specified by 197 // the serialization spec. 198 this.key = Integer.numberOfTrailingZeros(key); 199 this.mask = mask; 200 } 201 202 /** 203 * Return an integer representing all the languages for which this 204 * shaper will shape. The result is taken by "or"ing together 205 * the constants representing the various languages. 206 */ 207 public int getRanges () 208 { 209 return mask & ALL_RANGES; 210 } 211 212 /** 213 * Return true if this shaper is contextual, false if it is not. 214 */ 215 public boolean isContextual () 216 { 217 return mask > 0; 218 } 219 220 /** 221 * Shape the text in the given array. The starting context will 222 * be the context passed to the shaper at creation time. 223 * @param text the text to shape 224 * @param start the index of the starting character of the array 225 * @param count the number of characters in the array 226 */ 227 public void shape (char[] text, int start, int count) 228 { 229 shape (text, start, count, 1 << key); 230 } 231 232 /** 233 * Given a unicode block object, return corresponding language constant. 234 * If the block is not recognized, returns zero. Note that as there 235 * is no separate ARABIC block in Character, this case must 236 * be specially handled by the caller; EASTERN_ARABIC is preferred when 237 * both are specified. 238 * @param b the unicode block to classify 239 * @return the language constant, or zero if not recognized 240 */ 241 private int classify(UnicodeBlock b) 242 { 243 if (b == null) 244 return 0; 245 // ARABIC is handled by the caller; from testing we know 246 // that EASTERN_ARABIC takes precedence. 247 if (b == UnicodeBlock.ARABIC) 248 return EASTERN_ARABIC; 249 if (b == UnicodeBlock.BENGALI) 250 return BENGALI; 251 if (b == UnicodeBlock.DEVANAGARI) 252 return DEVANAGARI; 253 if (b == UnicodeBlock.ETHIOPIC) 254 return ETHIOPIC; 255 if (b == UnicodeBlock.BASIC_LATIN 256 || b == UnicodeBlock.LATIN_1_SUPPLEMENT 257 || b == UnicodeBlock.LATIN_EXTENDED_A 258 || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL 259 || b == UnicodeBlock.LATIN_EXTENDED_B) 260 return EUROPEAN; 261 if (b == UnicodeBlock.GUJARATI) 262 return GUJARATI; 263 if (b == UnicodeBlock.GURMUKHI) 264 return GURMUKHI; 265 if (b == UnicodeBlock.KANNADA) 266 return KANNADA; 267 if (b == UnicodeBlock.KHMER) 268 return KHMER; 269 if (b == UnicodeBlock.LAO) 270 return LAO; 271 if (b == UnicodeBlock.MALAYALAM) 272 return MALAYALAM; 273 if (b == UnicodeBlock.MONGOLIAN) 274 return MONGOLIAN; 275 if (b == UnicodeBlock.MYANMAR) 276 return MYANMAR; 277 if (b == UnicodeBlock.ORIYA) 278 return ORIYA; 279 if (b == UnicodeBlock.TAMIL) 280 return TAMIL; 281 if (b == UnicodeBlock.TELUGU) 282 return TELUGU; 283 if (b == UnicodeBlock.THAI) 284 return THAI; 285 if (b == UnicodeBlock.TIBETAN) 286 return TIBETAN; 287 return 0; 288 } 289 290 /** 291 * Shape the given text, using the indicated initial context. 292 * If this shaper is not a contextual shaper, then the given context 293 * will be ignored. 294 * @param text the text to shape 295 * @param start the index of the first character of the text to shape 296 * @param count the number of characters to shape in the text 297 * @param context the initial context 298 * @throws IllegalArgumentException if the initial context is invalid 299 */ 300 public void shape (char[] text, int start, int count, int context) 301 { 302 int currentContext; 303 if (isContextual()) 304 { 305 if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0) 306 throw new IllegalArgumentException("invalid context argument"); 307 // If the indicated context is not one we are handling, reset it. 308 if ((context & mask) == 0) 309 currentContext = -1; 310 else 311 currentContext = Integer.numberOfTrailingZeros(context); 312 } 313 else 314 currentContext = key; 315 316 for (int i = 0; i < count; ++i) 317 { 318 char c = text[start + i]; 319 if (c >= '0' && c <= '9') 320 { 321 if (currentContext >= 0) 322 { 323 // Shape into the current context. 324 if (c == '0' 325 && ((1 << currentContext) == TAMIL 326 || (1 << currentContext) == ETHIOPIC)) 327 { 328 // No digit 0 in this context; do nothing. 329 } 330 else 331 text[start + i] 332 = (char) (zeroDigits[currentContext] + c - '0'); 333 } 334 } 335 else if (isContextual()) 336 { 337 // if c is in a group, set currentContext; else reset it. 338 int group = classify(UnicodeBlock.of(c)); 339 // Specially handle ARABIC. 340 if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0 341 && (mask & ARABIC) != 0) 342 group = ARABIC; 343 if ((mask & group) != 0) 344 { 345 // The character was classified as being in a group 346 // we recognize, and it was selected by the shaper. 347 // So, change the context. 348 currentContext = Integer.numberOfTrailingZeros(group); 349 } 350 } 351 } 352 } 353 354 public boolean equals (Object obj) 355 { 356 if (! (obj instanceof NumericShaper)) 357 return false; 358 NumericShaper tmp = (NumericShaper) obj; 359 return key == tmp.key && mask == tmp.mask; 360 } 361 362 public int hashCode () 363 { 364 return key ^ mask; 365 } 366 367 public String toString () 368 { 369 // For debugging only. 370 return "key=" + key + "; mask=" + mask; 371 } 372 373 /** 374 * Return a non-contextual shaper which can shape to a single range. 375 * All ASCII digits in the input text are translated to this language. 376 * @param singleRange the target language 377 * @return a non-contextual shaper for this language 378 * @throws IllegalArgumentException if the argument does not name a 379 * single language, as specified by the constants declared in this class 380 */ 381 public static NumericShaper getShaper (int singleRange) 382 { 383 if (Integer.bitCount(singleRange) != 1) 384 throw new IllegalArgumentException("more than one bit set in argument"); 385 if ((singleRange & ~ALL_RANGES) != 0) 386 throw new IllegalArgumentException("argument out of range"); 387 return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange); 388 } 389 390 /** 391 * Return a contextual shaper which can shape to any of the indicated 392 * languages. The default initial context for this shaper is EUROPEAN. 393 * @param ranges the ranges to shape to 394 * @return a contextual shaper which will target any of these ranges 395 * @throws IllegalArgumentException if the argument specifies an 396 * unrecognized range 397 */ 398 public static NumericShaper getContextualShaper (int ranges) 399 { 400 if ((ranges & ~ALL_RANGES) != 0) 401 throw new IllegalArgumentException("argument out of range"); 402 return new NumericShaper(EUROPEAN, ranges); 403 } 404 405 /** 406 * Return a contextual shaper which can shape to any of the indicated 407 * languages. The default initial context for this shaper is given as 408 * an argument. 409 * @param ranges the ranges to shape to 410 * @param defaultContext the default initial context 411 * @return a contextual shaper which will target any of these ranges 412 * @throws IllegalArgumentException if the ranges argument specifies an 413 * unrecognized range, or if the defaultContext argument does not specify 414 * a single valid range 415 */ 416 public static NumericShaper getContextualShaper (int ranges, 417 int defaultContext) 418 { 419 if (Integer.bitCount(defaultContext) != 1) 420 throw new IllegalArgumentException("more than one bit set in context"); 421 if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0) 422 throw new IllegalArgumentException("argument out of range"); 423 return new NumericShaper(defaultContext, ranges); 424 } 425}