UCommon
unicode.h
Go to the documentation of this file.
1 // Copyright (C) 2009-2010 David Sugar, Tycho Softworks.
2 //
3 // This file is part of GNU uCommon C++.
4 //
5 // GNU uCommon C++ is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published
7 // by the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // GNU uCommon C++ is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with GNU uCommon C++. If not, see <http://www.gnu.org/licenses/>.
17 
32 #ifndef _UCOMMON_UNICODE_H_
33 #define _UCOMMON_UNICODE_H_
34 
35 #ifndef _UCOMMON_STRING_H_
36 #include <ucommon/string.h>
37 #endif
38 
39 NAMESPACE_UCOMMON
40 
45 typedef int32_t ucs4_t;
46 
50 typedef int16_t ucs2_t;
51 
55 typedef void *unicode_t;
56 
62 class __EXPORT utf8
63 {
64 public:
68  static const unsigned ucsize;
69 
73  static const char *nil;
74 
80  static unsigned size(const char *codepoint);
81 
87  static size_t count(const char *string);
88 
95  static char *offset(char *string, ssize_t position);
96 
102  static ucs4_t codepoint(const char *encoded);
103 
109  static size_t chars(const unicode_t string);
110 
116  static size_t chars(ucs4_t character);
117 
124  static size_t unpack(const unicode_t string, CharacterProtocol& buffer);
125 
133  static size_t pack(unicode_t unicode, CharacterProtocol& buffer, size_t size);
134 
138  static ucs4_t *udup(const char *string);
139 
143  static ucs2_t *wdup(const char *string);
144 
152  static const char *find(const char *string, ucs4_t character, size_t start = 0);
153 
161  static const char *rfind(const char *string, ucs4_t character, size_t end = (size_t)-1l);
162 
169  static unsigned ccount(const char *string, ucs4_t character);
170 
176  static ucs4_t get(CharacterProtocol& buffer);
177 
184  static ucs4_t put(ucs4_t character, CharacterProtocol& buffer);
185 };
186 
193 class __EXPORT UString : public String, public utf8
194 {
195 protected:
199  UString();
200 
205  UString(strsize_t size);
206 
211  UString(const unicode_t text);
212 
219  UString(const char *text, strsize_t size);
220 
227  UString(const unicode_t *text, const unicode_t *end);
228 
234  UString(const UString& existing);
235 
240  virtual ~UString();
241 
248  UString get(strsize_t codepoint, strsize_t size = 0) const;
249 
256  size_t get(unicode_t unicode, size_t size) const;
257 
262  void set(const unicode_t unicode);
263 
268  void add(const unicode_t unicode);
269 
275  ucs4_t at(int position) const;
276 
283  inline size_t operator()(unicode_t unicode, size_t size) const
284  {return get(unicode, size);};
285 
292  UString operator()(int codepoint, strsize_t size) const;
293 
301  const char *operator()(int offset) const;
302 
308  inline ucs4_t operator[](int position) const
309  {return UString::at(position);};
310 
315  inline strsize_t count(void) const
316  {return utf8::count(str->text);}
317 
323  unsigned ccount(ucs4_t character) const;
324 
331  const char *find(ucs4_t character, strsize_t start = 0) const;
332 
339  const char *rfind(ucs4_t character, strsize_t end = npos) const;
340 };
341 
347 class __EXPORT utf8_pointer
348 {
349 protected:
350  uint8_t *text;
351 
352 public:
356  utf8_pointer();
357 
362  utf8_pointer(const char *string);
363 
369 
374  utf8_pointer& operator ++();
375 
380  utf8_pointer& operator --();
381 
387  utf8_pointer& operator +=(long offset);
388 
394  utf8_pointer& operator -=(long offset);
395 
401  utf8_pointer operator+(long offset) const;
402 
408  utf8_pointer operator-(long offset) const;
409 
414  inline operator bool() const
415  {return text != NULL;};
416 
421  inline bool operator!() const
422  {return text == NULL;};
423 
429  ucs4_t operator[](long codepoint) const;
430 
436  utf8_pointer& operator=(const char *string);
437 
441  void inc(void);
442 
446  void dec(void);
447 
453  inline bool operator==(const char *string) const
454  {return (const char *)text == string;};
455 
461  inline bool operator!=(const char *string) const
462  {return (const char *)text != string;};
463 
468  inline ucs4_t operator*() const
469  {return utf8::codepoint((const char *)text);};
470 
475  inline char *c_str(void) const
476  {return (char *)text;};
477 
482  inline operator char*() const
483  {return (char *)text;};
484 
489  inline size_t len(void) const
490  {return utf8::count((const char *)text);};
491 };
492 
493 inline ucs4_t *strudup(const char *string)
494  {return utf8::udup(string);}
495 
496 inline ucs2_t *strwdup(const char *string)
497  {return utf8::wdup(string);}
498 
499 __EXPORT unicode_t unidup(const char *string);
500 
501 template<>
502 inline void dupfree<ucs2_t*>(ucs2_t *string)
503  {::free(string);}
504 
505 template<>
506 inline void dupfree<ucs4_t*>(ucs4_t *string)
507  {::free(string);}
508 
509 template<>
510 inline void dupfree<unicode_t>(unicode_t string)
511  {::free(string);}
512 
517 
522 
523 END_NAMESPACE
524 
525 #endif