ICU 50.1.2  50.1.2
normalizer2.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __NORMALIZER2_H__
18 #define __NORMALIZER2_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/unorm2.h"
32 
34 
79 public:
84  ~Normalizer2();
85 
86 #ifndef U_HIDE_DRAFT_API
87 
98  static const Normalizer2 *
99  getNFCInstance(UErrorCode &errorCode);
100 
112  static const Normalizer2 *
113  getNFDInstance(UErrorCode &errorCode);
114 
126  static const Normalizer2 *
127  getNFKCInstance(UErrorCode &errorCode);
128 
140  static const Normalizer2 *
141  getNFKDInstance(UErrorCode &errorCode);
142 
154  static const Normalizer2 *
155  getNFKCCasefoldInstance(UErrorCode &errorCode);
156 #endif /* U_HIDE_DRAFT_API */
157 
179  static const Normalizer2 *
180  getInstance(const char *packageName,
181  const char *name,
182  UNormalization2Mode mode,
183  UErrorCode &errorCode);
184 
196  normalize(const UnicodeString &src, UErrorCode &errorCode) const {
197  UnicodeString result;
198  normalize(src, result, errorCode);
199  return result;
200  }
214  virtual UnicodeString &
215  normalize(const UnicodeString &src,
216  UnicodeString &dest,
217  UErrorCode &errorCode) const = 0;
232  virtual UnicodeString &
233  normalizeSecondAndAppend(UnicodeString &first,
234  const UnicodeString &second,
235  UErrorCode &errorCode) const = 0;
250  virtual UnicodeString &
251  append(UnicodeString &first,
252  const UnicodeString &second,
253  UErrorCode &errorCode) const = 0;
254 
268  virtual UBool
269  getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
270 
295  virtual UBool
296  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
297 
313  virtual UChar32
314  composePair(UChar32 a, UChar32 b) const;
315 
324  virtual uint8_t
325  getCombiningClass(UChar32 c) const;
326 
341  virtual UBool
342  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
343 
360  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
361 
384  virtual int32_t
385  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
386 
400  virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
401 
416  virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
417 
431  virtual UBool isInert(UChar32 c) const = 0;
432 
433 private:
434  // No ICU "poor man's RTTI" for this class nor its subclasses.
435  virtual UClassID getDynamicClassID() const;
436 };
437 
450 public:
461  FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
462  norm2(n2), set(filterSet) {}
463 
469 
483  virtual UnicodeString &
484  normalize(const UnicodeString &src,
485  UnicodeString &dest,
486  UErrorCode &errorCode) const;
501  virtual UnicodeString &
503  const UnicodeString &second,
504  UErrorCode &errorCode) const;
519  virtual UnicodeString &
520  append(UnicodeString &first,
521  const UnicodeString &second,
522  UErrorCode &errorCode) const;
523 
535  virtual UBool
536  getDecomposition(UChar32 c, UnicodeString &decomposition) const;
537 
549  virtual UBool
550  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
551 
562  virtual UChar32
563  composePair(UChar32 a, UChar32 b) const;
564 
573  virtual uint8_t
574  getCombiningClass(UChar32 c) const;
575 
587  virtual UBool
588  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
601  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
613  virtual int32_t
614  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
615 
624  virtual UBool hasBoundaryBefore(UChar32 c) const;
625 
634  virtual UBool hasBoundaryAfter(UChar32 c) const;
635 
643  virtual UBool isInert(UChar32 c) const;
644 private:
645  UnicodeString &
646  normalize(const UnicodeString &src,
647  UnicodeString &dest,
648  USetSpanCondition spanCondition,
649  UErrorCode &errorCode) const;
650 
651  UnicodeString &
653  const UnicodeString &second,
654  UBool doNormalize,
655  UErrorCode &errorCode) const;
656 
657  const Normalizer2 &norm2;
658  const UnicodeSet &set;
659 };
660 
662 
663 #endif // !UCONFIG_NO_NORMALIZATION
664 #endif // __NORMALIZER2_H__