ICU 50.1.2  50.1.2
rbbi.h
Go to the documentation of this file.
1 /*
2 ***************************************************************************
3 * Copyright (C) 1999-2012 International Business Machines Corporation *
4 * and others. All rights reserved. *
5 ***************************************************************************
6 
7 **********************************************************************
8 * Date Name Description
9 * 10/22/99 alan Creation.
10 * 11/11/99 rgillam Complete port from Java.
11 **********************************************************************
12 */
13 
14 #ifndef RBBI_H
15 #define RBBI_H
16 
17 #include "unicode/utypes.h"
18 
24 #if !UCONFIG_NO_BREAK_ITERATION
25 
26 #include "unicode/brkiter.h"
27 #include "unicode/udata.h"
28 #include "unicode/parseerr.h"
29 #include "unicode/schriter.h"
30 #include "unicode/uchriter.h"
31 
32 
33 struct UTrie;
34 
36 
38 struct RBBIDataHeader;
39 class RuleBasedBreakIteratorTables;
40 class BreakIterator;
41 class RBBIDataWrapper;
42 class UStack;
43 class LanguageBreakEngine;
44 class UnhandledEngine;
45 struct RBBIStateTable;
46 
47 
48 
49 
66 
67 protected:
73 
80 
87 
94 
99  RBBIDataWrapper *fData;
100 
105 
113 
120 
129 
135 
142 
151 
159  UnhandledEngine *fUnhandledBreakEngine;
160 
166  int32_t fBreakType;
167 
168 protected:
169  //=======================================================================
170  // constructors
171  //=======================================================================
172 
173 #ifndef U_HIDE_INTERNAL_API
174 
182  enum EDontAdopt {
183  kDontAdopt
184  };
185 
196  RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
197 
206  RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
207 #endif /* U_HIDE_INTERNAL_API */
208 
209 
210  friend class RBBIRuleBuilder;
212  friend class BreakIterator;
213 
214 
215 
216 public:
217 
223 
231 
241  UParseError &parseError,
242  UErrorCode &status);
243 
267  RuleBasedBreakIterator(const uint8_t *compiledRules,
268  uint32_t ruleLength,
269  UErrorCode &status);
270 
284 
289  virtual ~RuleBasedBreakIterator();
290 
298  RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
299 
308  virtual UBool operator==(const BreakIterator& that) const;
309 
317  UBool operator!=(const BreakIterator& that) const;
318 
329  virtual BreakIterator* clone() const;
330 
336  virtual int32_t hashCode(void) const;
337 
343  virtual const UnicodeString& getRules(void) const;
344 
345  //=======================================================================
346  // BreakIterator overrides
347  //=======================================================================
348 
374  virtual CharacterIterator& getText(void) const;
375 
376 
391  virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
392 
400  virtual void adoptText(CharacterIterator* newText);
401 
408  virtual void setText(const UnicodeString& newText);
409 
423  virtual void setText(UText *text, UErrorCode &status);
424 
430  virtual int32_t first(void);
431 
437  virtual int32_t last(void);
438 
449  virtual int32_t next(int32_t n);
450 
456  virtual int32_t next(void);
457 
463  virtual int32_t previous(void);
464 
472  virtual int32_t following(int32_t offset);
473 
481  virtual int32_t preceding(int32_t offset);
482 
491  virtual UBool isBoundary(int32_t offset);
492 
498  virtual int32_t current(void) const;
499 
500 
533  virtual int32_t getRuleStatus() const;
534 
558  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
559 
571  virtual UClassID getDynamicClassID(void) const;
572 
584  static UClassID U_EXPORT2 getStaticClassID(void);
585 
586  /*
587  * Create a clone (copy) of this break iterator in memory provided
588  * by the caller. The idea is to increase performance by avoiding
589  * a storage allocation. Use of this functoin is NOT RECOMMENDED.
590  * Performance gains are minimal, and correct buffer management is
591  * tricky. Use clone() instead.
592  *
593  * @param stackBuffer The pointer to the memory into which the cloned object
594  * should be placed. If NULL, allocate heap memory
595  * for the cloned object.
596  * @param BufferSize The size of the buffer. If zero, return the required
597  * buffer size, but do not clone the object. If the
598  * size was too small (but not zero), allocate heap
599  * storage for the cloned object.
600  *
601  * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
602  * returned if the the provided buffer was too small, and
603  * the clone was therefore put on the heap.
604  *
605  * @return Pointer to the clone object. This may differ from the stackBuffer
606  * address if the byte alignment of the stack buffer was not suitable
607  * or if the stackBuffer was too small to hold the clone.
608  * @stable ICU 2.0
609  */
610  virtual BreakIterator * createBufferClone(void *stackBuffer,
611  int32_t &BufferSize,
612  UErrorCode &status);
613 
614 
632  virtual const uint8_t *getBinaryRules(uint32_t &length);
633 
659  virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
660 
661 
662 protected:
663  //=======================================================================
664  // implementation
665  //=======================================================================
671  virtual void reset(void);
672 
673 #if 0
674 
682  virtual UBool isDictionaryChar(UChar32);
683 
688  virtual int32_t getBreakType() const;
689 #endif
690 
695  virtual void setBreakType(int32_t type);
696 
697 #ifndef U_HIDE_INTERNAL_API
698 
703  void init();
704 #endif /* U_HIDE_INTERNAL_API */
705 
706 private:
707 
717  int32_t handlePrevious(const RBBIStateTable *statetable);
718 
728  int32_t handleNext(const RBBIStateTable *statetable);
729 
730 protected:
731 
732 #ifndef U_HIDE_INTERNAL_API
733 
747  int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
748 #endif /* U_HIDE_INTERNAL_API */
749 
750 private:
751 
758  const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
759 
763  void makeRuleStatusValid();
764 
765 };
766 
767 //------------------------------------------------------------------------------
768 //
769 // Inline Functions Definitions ...
770 //
771 //------------------------------------------------------------------------------
772 
774  return !operator==(that);
775 }
776 
778 
779 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
780 
781 #endif