00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef __MYGUI_U_STRING_H__
00028 #define __MYGUI_U_STRING_H__
00029
00030
00031 #include "MyGUI_Prerequest.h"
00032 #include "MyGUI_Diagnostic.h"
00033
00034
00035 #include <iterator>
00036 #include <string>
00037 #include <stdexcept>
00038 #include <assert.h>
00039
00040
00041
00042
00043
00044
00045
00046
00047 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310)
00048
00049 # if defined(_DLL_CPPLIB)
00050
00051 namespace std
00052 {
00053 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
00054 allocator<unsigned short> >;
00055
00056 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
00057 allocator<__wchar_t> >;
00058 }
00059
00060 # endif // defined(_DLL_CPPLIB)
00061
00062 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300
00063
00064
00065 namespace MyGUI
00066 {
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101 #ifdef __STDC_ISO_10646__
00102
00103
00104 #else // #ifdef __STDC_ISO_10646__
00105 #if defined( __WIN32__ ) || defined( _WIN32 )
00106 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
00107 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
00108 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
00109 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
00110 #endif // #if WCHAR_MAX <= 0xFFFF
00111 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
00112 #endif // #ifdef __STDC_ISO_10646__
00113
00114
00115
00116
00117 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00118
00119
00120
00121
00122
00123 # if defined(_NATIVE_WCHAR_T_DEFINED)
00124 # define MYGUI_IS_NATIVE_WCHAR_T 1
00125 # else
00126 # define MYGUI_IS_NATIVE_WCHAR_T 0
00127 # endif
00128
00129 #else // MYGUI_COMPILER != MYGUI_COMPILER_MSVC
00130
00131
00132 # define MYGUI_IS_NATIVE_WCHAR_T 1
00133
00134 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00135
00137
00162 class UString
00163 {
00164
00165 static const unsigned char _lead1 = 0xC0;
00166 static const unsigned char _lead1_mask = 0x1F;
00167 static const unsigned char _lead2 = 0xE0;
00168 static const unsigned char _lead2_mask = 0x0F;
00169 static const unsigned char _lead3 = 0xF0;
00170 static const unsigned char _lead3_mask = 0x07;
00171 static const unsigned char _lead4 = 0xF8;
00172 static const unsigned char _lead4_mask = 0x03;
00173 static const unsigned char _lead5 = 0xFC;
00174 static const unsigned char _lead5_mask = 0x01;
00175 static const unsigned char _cont = 0x80;
00176 static const unsigned char _cont_mask = 0x3F;
00177
00178 public:
00180 typedef size_t size_type;
00182 static const size_type npos = ~0;
00183
00185 typedef uint32 unicode_char;
00186
00188 typedef uint16 code_point;
00189
00191 typedef code_point value_type;
00192
00193 typedef std::basic_string<code_point> dstring;
00194
00196 typedef std::basic_string<unicode_char> utf32string;
00197
00199 class invalid_data: public std::runtime_error
00200 {
00201 public:
00203 explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message )
00204 {
00205
00206 }
00207 };
00208
00209
00211 class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type>
00212 {
00213 friend class UString;
00214 protected:
00215 _base_iterator()
00216 {
00217 mString = 0;
00218 }
00219
00220 void _seekFwd( size_type c )
00221 {
00222 mIter += c;
00223 }
00224 void _seekRev( size_type c )
00225 {
00226 mIter -= c;
00227 }
00228 void _become( const _base_iterator& i )
00229 {
00230 mIter = i.mIter;
00231 mString = i.mString;
00232 }
00233 bool _test_begin() const
00234 {
00235 return mIter == mString->mData.begin();
00236 }
00237 bool _test_end() const
00238 {
00239 return mIter == mString->mData.end();
00240 }
00241 size_type _get_index() const
00242 {
00243 return mIter - mString->mData.begin();
00244 }
00245 void _jump_to( size_type index )
00246 {
00247 mIter = mString->mData.begin() + index;
00248 }
00249
00250 unicode_char _getCharacter() const
00251 {
00252 size_type current_index = _get_index();
00253 return mString->getChar( current_index );
00254 }
00255 int _setCharacter( unicode_char uc )
00256 {
00257 size_type current_index = _get_index();
00258 int change = mString->setChar( current_index, uc );
00259 _jump_to( current_index );
00260 return change;
00261 }
00262
00263 void _moveNext()
00264 {
00265 _seekFwd( 1 );
00266 if ( _test_end() ) return;
00267 if ( _utf16_surrogate_follow( mIter[0] ) )
00268 {
00269
00270
00271 code_point lead_half = 0;
00272
00273 lead_half = mIter[-1];
00274 if ( _utf16_surrogate_lead( lead_half ) )
00275 {
00276 _seekFwd( 1 );
00277 }
00278 }
00279 }
00280 void _movePrev()
00281 {
00282 _seekRev( 1 );
00283 if ( _test_begin() ) return;
00284 if ( _utf16_surrogate_follow( mIter[0] ) )
00285 {
00286
00287
00288 code_point lead_half = 0;
00289 lead_half = mIter[-1];
00290 if ( _utf16_surrogate_lead( lead_half ) )
00291 {
00292 _seekRev( 1 );
00293 }
00294 }
00295 }
00296
00297 dstring::iterator mIter;
00298 UString* mString;
00299 };
00300
00301
00302
00303
00304 class _const_fwd_iterator;
00305
00307 class _fwd_iterator: public _base_iterator
00308 {
00309 friend class _const_fwd_iterator;
00310 public:
00311 _fwd_iterator() { }
00312 _fwd_iterator( const _fwd_iterator& i )
00313 {
00314 _become( i );
00315 }
00316
00318 _fwd_iterator& operator++()
00319 {
00320 _seekFwd( 1 );
00321 return *this;
00322 }
00324 _fwd_iterator operator++( int )
00325 {
00326 _fwd_iterator tmp( *this );
00327 _seekFwd( 1 );
00328 return tmp;
00329 }
00330
00332 _fwd_iterator& operator--()
00333 {
00334 _seekRev( 1 );
00335 return *this;
00336 }
00338 _fwd_iterator operator--( int )
00339 {
00340 _fwd_iterator tmp( *this );
00341 _seekRev( 1 );
00342 return tmp;
00343 }
00344
00346 _fwd_iterator operator+( size_type n )
00347 {
00348 _fwd_iterator tmp( *this );
00349 tmp._seekFwd( n );
00350 return tmp;
00351 }
00353 _fwd_iterator operator+( difference_type n )
00354 {
00355 _fwd_iterator tmp( *this );
00356 if ( n < 0 )
00357 tmp._seekRev( -n );
00358 else
00359 tmp._seekFwd( n );
00360 return tmp;
00361 }
00363 _fwd_iterator operator-( size_type n )
00364 {
00365 _fwd_iterator tmp( *this );
00366 tmp._seekRev( n );
00367 return tmp;
00368 }
00370 _fwd_iterator operator-( difference_type n )
00371 {
00372 _fwd_iterator tmp( *this );
00373 if ( n < 0 )
00374 tmp._seekFwd( -n );
00375 else
00376 tmp._seekRev( n );
00377 return tmp;
00378 }
00379
00381 _fwd_iterator& operator+=( size_type n )
00382 {
00383 _seekFwd( n );
00384 return *this;
00385 }
00387 _fwd_iterator& operator+=( difference_type n )
00388 {
00389 if ( n < 0 )
00390 _seekRev( -n );
00391 else
00392 _seekFwd( n );
00393 return *this;
00394 }
00396 _fwd_iterator& operator-=( size_type n )
00397 {
00398 _seekRev( n );
00399 return *this;
00400 }
00402 _fwd_iterator& operator-=( difference_type n )
00403 {
00404 if ( n < 0 )
00405 _seekFwd( -n );
00406 else
00407 _seekRev( n );
00408 return *this;
00409 }
00410
00412 value_type& operator*() const
00413 {
00414 return *mIter;
00415 }
00416
00418 value_type& operator[]( size_type n ) const
00419 {
00420 _fwd_iterator tmp( *this );
00421 tmp += n;
00422 return *tmp;
00423 }
00425 value_type& operator[]( difference_type n ) const
00426 {
00427 _fwd_iterator tmp( *this );
00428 tmp += n;
00429 return *tmp;
00430 }
00431
00433 _fwd_iterator& moveNext()
00434 {
00435 _moveNext();
00436 return *this;
00437 }
00439 _fwd_iterator& movePrev()
00440 {
00441 _movePrev();
00442 return *this;
00443 }
00445 unicode_char getCharacter() const
00446 {
00447 return _getCharacter();
00448 }
00450 int setCharacter( unicode_char uc )
00451 {
00452 return _setCharacter( uc );
00453 }
00454 };
00455
00456
00457
00458
00460 class _const_fwd_iterator: public _base_iterator
00461 {
00462 public:
00463 _const_fwd_iterator() { }
00464 _const_fwd_iterator( const _const_fwd_iterator& i )
00465 {
00466 _become( i );
00467 }
00468 _const_fwd_iterator( const _fwd_iterator& i )
00469 {
00470 _become( i );
00471 }
00472
00474 _const_fwd_iterator& operator++()
00475 {
00476 _seekFwd( 1 );
00477 return *this;
00478 }
00480 _const_fwd_iterator operator++( int )
00481 {
00482 _const_fwd_iterator tmp( *this );
00483 _seekFwd( 1 );
00484 return tmp;
00485 }
00486
00488 _const_fwd_iterator& operator--()
00489 {
00490 _seekRev( 1 );
00491 return *this;
00492 }
00494 _const_fwd_iterator operator--( int )
00495 {
00496 _const_fwd_iterator tmp( *this );
00497 _seekRev( 1 );
00498 return tmp;
00499 }
00500
00502 _const_fwd_iterator operator+( size_type n )
00503 {
00504 _const_fwd_iterator tmp( *this );
00505 tmp._seekFwd( n );
00506 return tmp;
00507 }
00509 _const_fwd_iterator operator+( difference_type n )
00510 {
00511 _const_fwd_iterator tmp( *this );
00512 if ( n < 0 )
00513 tmp._seekRev( -n );
00514 else
00515 tmp._seekFwd( n );
00516 return tmp;
00517 }
00519 _const_fwd_iterator operator-( size_type n )
00520 {
00521 _const_fwd_iterator tmp( *this );
00522 tmp._seekRev( n );
00523 return tmp;
00524 }
00526 _const_fwd_iterator operator-( difference_type n )
00527 {
00528 _const_fwd_iterator tmp( *this );
00529 if ( n < 0 )
00530 tmp._seekFwd( -n );
00531 else
00532 tmp._seekRev( n );
00533 return tmp;
00534 }
00535
00537 _const_fwd_iterator& operator+=( size_type n )
00538 {
00539 _seekFwd( n );
00540 return *this;
00541 }
00543 _const_fwd_iterator& operator+=( difference_type n )
00544 {
00545 if ( n < 0 )
00546 _seekRev( -n );
00547 else
00548 _seekFwd( n );
00549 return *this;
00550 }
00552 _const_fwd_iterator& operator-=( size_type n )
00553 {
00554 _seekRev( n );
00555 return *this;
00556 }
00558 _const_fwd_iterator& operator-=( difference_type n )
00559 {
00560 if ( n < 0 )
00561 _seekFwd( -n );
00562 else
00563 _seekRev( n );
00564 return *this;
00565 }
00566
00568 const value_type& operator*() const
00569 {
00570 return *mIter;
00571 }
00572
00574 const value_type& operator[]( size_type n ) const
00575 {
00576 _const_fwd_iterator tmp( *this );
00577 tmp += n;
00578 return *tmp;
00579 }
00581 const value_type& operator[]( difference_type n ) const
00582 {
00583 _const_fwd_iterator tmp( *this );
00584 tmp += n;
00585 return *tmp;
00586 }
00587
00589 _const_fwd_iterator& moveNext()
00590 {
00591 _moveNext();
00592 return *this;
00593 }
00595 _const_fwd_iterator& movePrev()
00596 {
00597 _movePrev();
00598 return *this;
00599 }
00601 unicode_char getCharacter() const
00602 {
00603 return _getCharacter();
00604 }
00605
00607 friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00609 friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00611 friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00613 friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00615 friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00617 friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00619 friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00620
00621 };
00622
00623
00624
00625
00626 class _const_rev_iterator;
00628 class _rev_iterator: public _base_iterator
00629 {
00630 friend class _const_rev_iterator;
00631 public:
00632 _rev_iterator() { }
00633 _rev_iterator( const _rev_iterator& i )
00634 {
00635 _become( i );
00636 }
00637
00639 _rev_iterator& operator++()
00640 {
00641 _seekRev( 1 );
00642 return *this;
00643 }
00645 _rev_iterator operator++( int )
00646 {
00647 _rev_iterator tmp( *this );
00648 _seekRev( 1 );
00649 return tmp;
00650 }
00651
00653 _rev_iterator& operator--()
00654 {
00655 _seekFwd( 1 );
00656 return *this;
00657 }
00659 _rev_iterator operator--( int )
00660 {
00661 _rev_iterator tmp( *this );
00662 _seekFwd( 1 );
00663 return tmp;
00664 }
00665
00667 _rev_iterator operator+( size_type n )
00668 {
00669 _rev_iterator tmp( *this );
00670 tmp._seekRev( n );
00671 return tmp;
00672 }
00674 _rev_iterator operator+( difference_type n )
00675 {
00676 _rev_iterator tmp( *this );
00677 if ( n < 0 )
00678 tmp._seekFwd( -n );
00679 else
00680 tmp._seekRev( n );
00681 return tmp;
00682 }
00684 _rev_iterator operator-( size_type n )
00685 {
00686 _rev_iterator tmp( *this );
00687 tmp._seekFwd( n );
00688 return tmp;
00689 }
00691 _rev_iterator operator-( difference_type n )
00692 {
00693 _rev_iterator tmp( *this );
00694 if ( n < 0 )
00695 tmp._seekRev( -n );
00696 else
00697 tmp._seekFwd( n );
00698 return tmp;
00699 }
00700
00702 _rev_iterator& operator+=( size_type n )
00703 {
00704 _seekRev( n );
00705 return *this;
00706 }
00708 _rev_iterator& operator+=( difference_type n )
00709 {
00710 if ( n < 0 )
00711 _seekFwd( -n );
00712 else
00713 _seekRev( n );
00714 return *this;
00715 }
00717 _rev_iterator& operator-=( size_type n )
00718 {
00719 _seekFwd( n );
00720 return *this;
00721 }
00723 _rev_iterator& operator-=( difference_type n )
00724 {
00725 if ( n < 0 )
00726 _seekRev( -n );
00727 else
00728 _seekFwd( n );
00729 return *this;
00730 }
00731
00733 value_type& operator*() const
00734 {
00735 return mIter[-1];
00736 }
00737
00739 value_type& operator[]( size_type n ) const
00740 {
00741 _rev_iterator tmp( *this );
00742 tmp -= n;
00743 return *tmp;
00744 }
00746 value_type& operator[]( difference_type n ) const
00747 {
00748 _rev_iterator tmp( *this );
00749 tmp -= n;
00750 return *tmp;
00751 }
00752 };
00753
00755 class _const_rev_iterator: public _base_iterator
00756 {
00757 public:
00758 _const_rev_iterator() { }
00759 _const_rev_iterator( const _const_rev_iterator& i )
00760 {
00761 _become( i );
00762 }
00763 _const_rev_iterator( const _rev_iterator& i )
00764 {
00765 _become( i );
00766 }
00768 _const_rev_iterator& operator++()
00769 {
00770 _seekRev( 1 );
00771 return *this;
00772 }
00774 _const_rev_iterator operator++( int )
00775 {
00776 _const_rev_iterator tmp( *this );
00777 _seekRev( 1 );
00778 return tmp;
00779 }
00780
00782 _const_rev_iterator& operator--()
00783 {
00784 _seekFwd( 1 );
00785 return *this;
00786 }
00788 _const_rev_iterator operator--( int )
00789 {
00790 _const_rev_iterator tmp( *this );
00791 _seekFwd( 1 );
00792 return tmp;
00793 }
00794
00796 _const_rev_iterator operator+( size_type n )
00797 {
00798 _const_rev_iterator tmp( *this );
00799 tmp._seekRev( n );
00800 return tmp;
00801 }
00803 _const_rev_iterator operator+( difference_type n )
00804 {
00805 _const_rev_iterator tmp( *this );
00806 if ( n < 0 )
00807 tmp._seekFwd( -n );
00808 else
00809 tmp._seekRev( n );
00810 return tmp;
00811 }
00813 _const_rev_iterator operator-( size_type n )
00814 {
00815 _const_rev_iterator tmp( *this );
00816 tmp._seekFwd( n );
00817 return tmp;
00818 }
00820 _const_rev_iterator operator-( difference_type n )
00821 {
00822 _const_rev_iterator tmp( *this );
00823 if ( n < 0 )
00824 tmp._seekRev( -n );
00825 else
00826 tmp._seekFwd( n );
00827 return tmp;
00828 }
00829
00831 _const_rev_iterator& operator+=( size_type n )
00832 {
00833 _seekRev( n );
00834 return *this;
00835 }
00837 _const_rev_iterator& operator+=( difference_type n )
00838 {
00839 if ( n < 0 )
00840 _seekFwd( -n );
00841 else
00842 _seekRev( n );
00843 return *this;
00844 }
00846 _const_rev_iterator& operator-=( size_type n )
00847 {
00848 _seekFwd( n );
00849 return *this;
00850 }
00852 _const_rev_iterator& operator-=( difference_type n )
00853 {
00854 if ( n < 0 )
00855 _seekRev( -n );
00856 else
00857 _seekFwd( n );
00858 return *this;
00859 }
00860
00862 const value_type& operator*() const
00863 {
00864 return mIter[-1];
00865 }
00866
00868 const value_type& operator[]( size_type n ) const
00869 {
00870 _const_rev_iterator tmp( *this );
00871 tmp -= n;
00872 return *tmp;
00873 }
00875 const value_type& operator[]( difference_type n ) const
00876 {
00877 _const_rev_iterator tmp( *this );
00878 tmp -= n;
00879 return *tmp;
00880 }
00881
00883 friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
00885 friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
00887 friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00889 friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
00891 friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00893 friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
00895 friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00896 };
00897
00898
00899 typedef _fwd_iterator iterator;
00900 typedef _rev_iterator reverse_iterator;
00901 typedef _const_fwd_iterator const_iterator;
00902 typedef _const_rev_iterator const_reverse_iterator;
00903
00904
00906
00907
00908 UString()
00909 {
00910 _init();
00911 }
00913 UString( const UString& copy )
00914 {
00915 _init();
00916 mData = copy.mData;
00917 }
00919 UString( size_type length, const code_point& ch )
00920 {
00921 _init();
00922 assign( length, ch );
00923 }
00925 UString( const code_point* str )
00926 {
00927 _init();
00928 assign( str );
00929 }
00931 UString( const code_point* str, size_type length )
00932 {
00933 _init();
00934 assign( str, length );
00935 }
00937 UString( const UString& str, size_type index, size_type length )
00938 {
00939 _init();
00940 assign( str, index, length );
00941 }
00942 #if MYGUI_IS_NATIVE_WCHAR_T
00944 UString( const wchar_t* w_str )
00945 {
00946 _init();
00947 assign( w_str );
00948 }
00950 UString( const wchar_t* w_str, size_type length )
00951 {
00952 _init();
00953 assign( w_str, length );
00954 }
00955 #endif
00957 UString( const std::wstring& wstr )
00958 {
00959 _init();
00960 assign( wstr );
00961 }
00963 UString( const char* c_str )
00964 {
00965 _init();
00966 assign( c_str );
00967 }
00969 UString( const char* c_str, size_type length )
00970 {
00971 _init();
00972 assign( c_str, length );
00973 }
00975 UString( const std::string& str )
00976 {
00977 _init();
00978 assign( str );
00979 }
00981 ~UString()
00982 {
00983 _cleanBuffer();
00984 }
00986
00988
00990
00991
00992 size_type size() const
00993 {
00994 return mData.size();
00995 }
00997 size_type length() const
00998 {
00999 return size();
01000 }
01002
01003 size_type length_Characters() const
01004 {
01005 const_iterator i = begin(), ie = end();
01006 size_type c = 0;
01007 while ( i != ie )
01008 {
01009 i.moveNext();
01010 ++c;
01011 }
01012 return c;
01013 }
01015 size_type max_size() const
01016 {
01017 return mData.max_size();
01018 }
01020 void reserve( size_type size )
01021 {
01022 mData.reserve( size );
01023 }
01025 void resize( size_type num, const code_point& val = 0 )
01026 {
01027 mData.resize( num, val );
01028 }
01030 void swap( UString& from )
01031 {
01032 mData.swap( from.mData );
01033 }
01035 bool empty() const
01036 {
01037 return mData.empty();
01038 }
01040 const code_point* c_str() const
01041 {
01042 return mData.c_str();
01043 }
01045 const code_point* data() const
01046 {
01047 return c_str();
01048 }
01050 size_type capacity() const
01051 {
01052 return mData.capacity();
01053 }
01055 void clear()
01056 {
01057 mData.clear();
01058 }
01060
01061 UString substr( size_type index, size_type num = npos ) const
01062 {
01063
01064 dstring data = mData.substr( index, num );
01065 UString tmp;
01066 tmp.mData.swap( data );
01067 return tmp;
01068 }
01070 void push_back( unicode_char val )
01071 {
01072 code_point cp[2];
01073 size_t c = _utf32_to_utf16( val, cp );
01074 if ( c > 0 ) push_back( cp[0] );
01075 if ( c > 1 ) push_back( cp[1] );
01076 }
01077 #if MYGUI_IS_NATIVE_WCHAR_T
01079 void push_back( wchar_t val )
01080 {
01081
01082 mData.push_back( static_cast<unicode_char>( val ) );
01083 }
01084 #endif
01086
01088 void push_back( code_point val )
01089 {
01090 mData.push_back( val );
01091 }
01093
01094 void push_back( char val )
01095 {
01096 mData.push_back( static_cast<code_point>( val ) );
01097 }
01099 bool inString( unicode_char ch ) const
01100 {
01101 const_iterator i, ie = end();
01102 for ( i = begin(); i != ie; i.moveNext() )
01103 {
01104 if ( i.getCharacter() == ch )
01105 return true;
01106 }
01107 return false;
01108 }
01110
01112
01114
01115
01116 const std::string& asUTF8() const
01117 {
01118 _load_buffer_UTF8();
01119 return *m_buffer.mStrBuffer;
01120 }
01122 const char* asUTF8_c_str() const
01123 {
01124 _load_buffer_UTF8();
01125 return m_buffer.mStrBuffer->c_str();
01126 }
01128 const utf32string& asUTF32() const
01129 {
01130 _load_buffer_UTF32();
01131 return *m_buffer.mUTF32StrBuffer;
01132 }
01134 const unicode_char* asUTF32_c_str() const
01135 {
01136 _load_buffer_UTF32();
01137 return m_buffer.mUTF32StrBuffer->c_str();
01138 }
01140 const std::wstring& asWStr() const
01141 {
01142 _load_buffer_WStr();
01143 return *m_buffer.mWStrBuffer;
01144 }
01146 const wchar_t* asWStr_c_str() const
01147 {
01148 _load_buffer_WStr();
01149 return m_buffer.mWStrBuffer->c_str();
01150 }
01152
01154
01156
01157
01158 code_point& at( size_type loc )
01159 {
01160 return mData.at( loc );
01161 }
01163 const code_point& at( size_type loc ) const
01164 {
01165 return mData.at( loc );
01166 }
01168
01172 unicode_char getChar( size_type loc ) const
01173 {
01174 const code_point* ptr = c_str();
01175 unicode_char uc;
01176 size_t l = _utf16_char_length( ptr[loc] );
01177 code_point cp[2] = {
01178 0, 0
01179 };
01180 cp[0] = ptr[loc];
01181
01182 if ( l == 2 && ( loc + 1 ) < mData.length() )
01183 {
01184 cp[1] = ptr[loc+1];
01185 }
01186 _utf16_to_utf32( cp, uc );
01187 return uc;
01188 }
01190
01198 int setChar( size_type loc, unicode_char ch )
01199 {
01200 code_point cp[2] = {
01201 0, 0
01202 };
01203 size_t l = _utf32_to_utf16( ch, cp );
01204 unicode_char existingChar = getChar( loc );
01205 size_t existingSize = _utf16_char_length( existingChar );
01206 size_t newSize = _utf16_char_length( ch );
01207
01208 if ( newSize > existingSize )
01209 {
01210 at( loc ) = cp[0];
01211 insert( loc + 1, 1, cp[1] );
01212 return 1;
01213 }
01214 if ( newSize < existingSize )
01215 {
01216 erase( loc, 1 );
01217 at( loc ) = cp[0];
01218 return -1;
01219 }
01220
01221
01222 at( loc ) = cp[0];
01223 if ( l == 2 ) at( loc + 1 ) = cp[1];
01224 return 0;
01225 }
01227
01229
01231
01232
01233 iterator begin()
01234 {
01235 iterator i;
01236 i.mIter = mData.begin();
01237 i.mString = this;
01238 return i;
01239 }
01241 const_iterator begin() const
01242 {
01243 const_iterator i;
01244 i.mIter = const_cast<UString*>( this )->mData.begin();
01245 i.mString = const_cast<UString*>( this );
01246 return i;
01247 }
01249 iterator end()
01250 {
01251 iterator i;
01252 i.mIter = mData.end();
01253 i.mString = this;
01254 return i;
01255 }
01257 const_iterator end() const
01258 {
01259 const_iterator i;
01260 i.mIter = const_cast<UString*>( this )->mData.end();
01261 i.mString = const_cast<UString*>( this );
01262 return i;
01263 }
01265 reverse_iterator rbegin()
01266 {
01267 reverse_iterator i;
01268 i.mIter = mData.end();
01269 i.mString = this;
01270 return i;
01271 }
01273 const_reverse_iterator rbegin() const
01274 {
01275 const_reverse_iterator i;
01276 i.mIter = const_cast<UString*>( this )->mData.end();
01277 i.mString = const_cast<UString*>( this );
01278 return i;
01279 }
01281 reverse_iterator rend()
01282 {
01283 reverse_iterator i;
01284 i.mIter = mData.begin();
01285 i.mString = this;
01286 return i;
01287 }
01289 const_reverse_iterator rend() const
01290 {
01291 const_reverse_iterator i;
01292 i.mIter = const_cast<UString*>( this )->mData.begin();
01293 i.mString = const_cast<UString*>( this );
01294 return i;
01295 }
01297
01299
01301
01302
01303 UString& assign( iterator start, iterator end )
01304 {
01305 mData.assign( start.mIter, end.mIter );
01306 return *this;
01307 }
01309 UString& assign( const UString& str )
01310 {
01311 mData.assign( str.mData );
01312 return *this;
01313 }
01315 UString& assign( const code_point* str )
01316 {
01317 mData.assign( str );
01318 return *this;
01319 }
01321 UString& assign( const code_point* str, size_type num )
01322 {
01323 mData.assign( str, num );
01324 return *this;
01325 }
01327 UString& assign( const UString& str, size_type index, size_type len )
01328 {
01329 mData.assign( str.mData, index, len );
01330 return *this;
01331 }
01333 UString& assign( size_type num, const code_point& ch )
01334 {
01335 mData.assign( num, ch );
01336 return *this;
01337 }
01339 UString& assign( const std::wstring& wstr )
01340 {
01341 mData.clear();
01342 mData.reserve( wstr.length() );
01343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
01344 code_point tmp;
01345 std::wstring::const_iterator i, ie = wstr.end();
01346 for ( i = wstr.begin(); i != ie; i++ )
01347 {
01348 tmp = static_cast<code_point>( *i );
01349 mData.push_back( tmp );
01350 }
01351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
01352 code_point cp[3] = { 0, 0, 0 };
01353 unicode_char tmp;
01354 std::wstring::const_iterator i, ie = wstr.end();
01355 for ( i = wstr.begin(); i != ie; i++ )
01356 {
01357 tmp = static_cast<unicode_char>( *i );
01358 size_t l = _utf32_to_utf16( tmp, cp );
01359 if ( l > 0 ) mData.push_back( cp[0] );
01360 if ( l > 1 ) mData.push_back( cp[1] );
01361 }
01362 #endif
01363 return *this;
01364 }
01365 #if MYGUI_IS_NATIVE_WCHAR_T
01367 UString& assign( const wchar_t* w_str )
01368 {
01369 std::wstring tmp;
01370 tmp.assign( w_str );
01371 return assign( tmp );
01372 }
01374 UString& assign( const wchar_t* w_str, size_type num )
01375 {
01376 std::wstring tmp;
01377 tmp.assign( w_str, num );
01378 return assign( tmp );
01379 }
01380 #endif
01382 UString& assign( const std::string& str )
01383 {
01384 size_type len = _verifyUTF8( str );
01385 clear();
01386 reserve( len );
01387
01388
01389
01390
01391 unicode_char uc;
01392 unsigned char utf8buf[7];
01393 utf8buf[6] = 0;
01394 size_t utf8len;
01395 code_point utf16buff[3];
01396 utf16buff[2] = 0;
01397 size_t utf16len;
01398
01399 std::string::const_iterator i, ie = str.end();
01400 for ( i = str.begin(); i != ie; i++ )
01401 {
01402 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) );
01403 for ( size_t j = 0; j < utf8len; j++ )
01404 {
01405 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) );
01406 }
01407 utf8buf[utf8len] = 0;
01408 utf8len = _utf8_to_utf32( utf8buf, uc );
01409 i += utf8len - 1;
01410
01411 utf16len = _utf32_to_utf16( uc, utf16buff );
01412 append( utf16buff, utf16len );
01413 }
01414 return *this;
01415 }
01417 UString& assign( const char* c_str )
01418 {
01419 std::string tmp( c_str );
01420 return assign( tmp );
01421 }
01423 UString& assign( const char* c_str, size_type num )
01424 {
01425 std::string tmp;
01426 tmp.assign( c_str, num );
01427 return assign( tmp );
01428 }
01430
01432
01434
01435
01436 UString& append( const UString& str )
01437 {
01438 mData.append( str.mData );
01439 return *this;
01440 }
01442 UString& append( const code_point* str )
01443 {
01444 mData.append( str );
01445 return *this;
01446 }
01448 UString& append( const UString& str, size_type index, size_type len )
01449 {
01450 mData.append( str.mData, index, len );
01451 return *this;
01452 }
01454 UString& append( const code_point* str, size_type num )
01455 {
01456 mData.append( str, num );
01457 return *this;
01458 }
01460 UString& append( size_type num, code_point ch )
01461 {
01462 mData.append( num, ch );
01463 return *this;
01464 }
01466 UString& append( iterator start, iterator end )
01467 {
01468 mData.append( start.mIter, end.mIter );
01469 return *this;
01470 }
01471 #if MYGUI_IS_NATIVE_WCHAR_T
01473 UString& append( const wchar_t* w_str, size_type num )
01474 {
01475 std::wstring tmp( w_str, num );
01476 return append( tmp );
01477 }
01479 UString& append( size_type num, wchar_t ch )
01480 {
01481 return append( num, static_cast<unicode_char>( ch ) );
01482 }
01483 #endif
01485 UString& append( const char* c_str, size_type num )
01486 {
01487 UString tmp( c_str, num );
01488 append( tmp );
01489 return *this;
01490 }
01492 UString& append( size_type num, char ch )
01493 {
01494 append( num, static_cast<code_point>( ch ) );
01495 return *this;
01496 }
01498 UString& append( size_type num, unicode_char ch )
01499 {
01500 code_point cp[2] = { 0, 0 };
01501 if ( _utf32_to_utf16( ch, cp ) == 2 )
01502 {
01503 for ( size_type i = 0; i < num; i++ )
01504 {
01505 append( 1, cp[0] );
01506 append( 1, cp[1] );
01507 }
01508 }
01509 else
01510 {
01511 for ( size_type i = 0; i < num; i++ )
01512 {
01513 append( 1, cp[0] );
01514 }
01515 }
01516 return *this;
01517 }
01519
01521
01523
01524
01525 iterator insert( iterator i, const code_point& ch )
01526 {
01527 iterator ret;
01528 ret.mIter = mData.insert( i.mIter, ch );
01529 ret.mString = this;
01530 return ret;
01531 }
01533 UString& insert( size_type index, const UString& str )
01534 {
01535 mData.insert( index, str.mData );
01536 return *this;
01537 }
01539 UString& insert( size_type index, const code_point* str )
01540 {
01541 mData.insert( index, str );
01542 return *this;
01543 }
01545 UString& insert( size_type index1, const UString& str, size_type index2, size_type num )
01546 {
01547 mData.insert( index1, str.mData, index2, num );
01548 return *this;
01549 }
01551 void insert( iterator i, iterator start, iterator end )
01552 {
01553 mData.insert( i.mIter, start.mIter, end.mIter );
01554 }
01556 UString& insert( size_type index, const code_point* str, size_type num )
01557 {
01558 mData.insert( index, str, num );
01559 return *this;
01560 }
01561 #if MYGUI_IS_NATIVE_WCHAR_T
01563 UString& insert( size_type index, const wchar_t* w_str, size_type num )
01564 {
01565 UString tmp( w_str, num );
01566 insert( index, tmp );
01567 return *this;
01568 }
01569 #endif
01571 UString& insert( size_type index, const char* c_str, size_type num )
01572 {
01573 UString tmp( c_str, num );
01574 insert( index, tmp );
01575 return *this;
01576 }
01578 UString& insert( size_type index, size_type num, code_point ch )
01579 {
01580 mData.insert( index, num, ch );
01581 return *this;
01582 }
01583 #if MYGUI_IS_NATIVE_WCHAR_T
01585 UString& insert( size_type index, size_type num, wchar_t ch )
01586 {
01587 insert( index, num, static_cast<unicode_char>( ch ) );
01588 return *this;
01589 }
01590 #endif
01592 UString& insert( size_type index, size_type num, char ch )
01593 {
01594 insert( index, num, static_cast<code_point>( ch ) );
01595 return *this;
01596 }
01598 UString& insert( size_type index, size_type num, unicode_char ch )
01599 {
01600 code_point cp[3] = { 0, 0, 0 };
01601 size_t l = _utf32_to_utf16( ch, cp );
01602 if ( l == 1 )
01603 {
01604 return insert( index, num, cp[0] );
01605 }
01606 for ( size_type c = 0; c < num; c++ )
01607 {
01608
01609 insert( index, 1, cp[1] );
01610 insert( index, 1, cp[0] );
01611 }
01612 return *this;
01613 }
01615 void insert( iterator i, size_type num, const code_point& ch )
01616 {
01617 mData.insert( i.mIter, num, ch );
01618 }
01619 #if MYGUI_IS_NATIVE_WCHAR_T
01621 void insert( iterator i, size_type num, const wchar_t& ch )
01622 {
01623 insert( i, num, static_cast<unicode_char>( ch ) );
01624 }
01625 #endif
01627 void insert( iterator i, size_type num, const char& ch )
01628 {
01629 insert( i, num, static_cast<code_point>( ch ) );
01630 }
01632 void insert( iterator i, size_type num, const unicode_char& ch )
01633 {
01634 code_point cp[3] = { 0, 0, 0 };
01635 size_t l = _utf32_to_utf16( ch, cp );
01636 if ( l == 1 )
01637 {
01638 insert( i, num, cp[0] );
01639 }
01640 else
01641 {
01642 for ( size_type c = 0; c < num; c++ )
01643 {
01644
01645 insert( i, 1, cp[1] );
01646 insert( i, 1, cp[0] );
01647 }
01648 }
01649 }
01651
01653
01655
01656
01657 iterator erase( iterator loc )
01658 {
01659 iterator ret;
01660 ret.mIter = mData.erase( loc.mIter );
01661 ret.mString = this;
01662 return ret;
01663 }
01665 iterator erase( iterator start, iterator end )
01666 {
01667 iterator ret;
01668 ret.mIter = mData.erase( start.mIter, end.mIter );
01669 ret.mString = this;
01670 return ret;
01671 }
01673 UString& erase( size_type index = 0, size_type num = npos )
01674 {
01675 if ( num == npos )
01676 mData.erase( index );
01677 else
01678 mData.erase( index, num );
01679 return *this;
01680 }
01682
01684
01686
01687
01688 UString& replace( size_type index1, size_type num1, const UString& str )
01689 {
01690 mData.replace( index1, num1, str.mData, 0, npos );
01691 return *this;
01692 }
01694 UString& replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01695 {
01696 mData.replace( index1, num1, str.mData, 0, num2 );
01697 return *this;
01698 }
01700 UString& replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01701 {
01702 mData.replace( index1, num1, str.mData, index2, num2 );
01703 return *this;
01704 }
01706 UString& replace( iterator start, iterator end, const UString& str, size_type num = npos )
01707 {
01708 _const_fwd_iterator st(start);
01709
01710 size_type index1 = begin() - st;
01711 size_type num1 = end - st;
01712 return replace( index1, num1, str, 0, num );
01713 }
01715 UString& replace( size_type index, size_type num1, size_type num2, code_point ch )
01716 {
01717 mData.replace( index, num1, num2, ch );
01718 return *this;
01719 }
01721 UString& replace( iterator start, iterator end, size_type num, code_point ch )
01722 {
01723 _const_fwd_iterator st(start);
01724
01725 size_type index1 = begin() - st;
01726 size_type num1 = end - st;
01727 return replace( index1, num1, num, ch );
01728 }
01730
01732
01734
01735
01736 int compare( const UString& str ) const
01737 {
01738 return mData.compare( str.mData );
01739 }
01741 int compare( const code_point* str ) const
01742 {
01743 return mData.compare( str );
01744 }
01746 int compare( size_type index, size_type length, const UString& str ) const
01747 {
01748 return mData.compare( index, length, str.mData );
01749 }
01751 int compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01752 {
01753 return mData.compare( index, length, str.mData, index2, length2 );
01754 }
01756 int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01757 {
01758 return mData.compare( index, length, str, length2 );
01759 }
01760 #if MYGUI_IS_NATIVE_WCHAR_T
01762 int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01763 {
01764 UString tmp( w_str, length2 );
01765 return compare( index, length, tmp );
01766 }
01767 #endif
01769 int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01770 {
01771 UString tmp( c_str, length2 );
01772 return compare( index, length, tmp );
01773 }
01775
01777
01779
01780
01781
01782 size_type find( const UString& str, size_type index = 0 ) const
01783 {
01784 return mData.find( str.c_str(), index );
01785 }
01787
01788 size_type find( const code_point* cp_str, size_type index, size_type length ) const
01789 {
01790 UString tmp( cp_str );
01791 return mData.find( tmp.c_str(), index, length );
01792 }
01794
01795 size_type find( const char* c_str, size_type index, size_type length ) const
01796 {
01797 UString tmp( c_str );
01798 return mData.find( tmp.c_str(), index, length );
01799 }
01800 #if MYGUI_IS_NATIVE_WCHAR_T
01802
01803 size_type find( const wchar_t* w_str, size_type index, size_type length ) const
01804 {
01805 UString tmp( w_str );
01806 return mData.find( tmp.c_str(), index, length );
01807 }
01808 #endif
01810
01811 size_type find( char ch, size_type index = 0 ) const
01812 {
01813 return find( static_cast<code_point>( ch ), index );
01814 }
01816
01817 size_type find( code_point ch, size_type index = 0 ) const
01818 {
01819 return mData.find( ch, index );
01820 }
01821 #if MYGUI_IS_NATIVE_WCHAR_T
01823
01824 size_type find( wchar_t ch, size_type index = 0 ) const
01825 {
01826 return find( static_cast<unicode_char>( ch ), index );
01827 }
01828 #endif
01830
01831 size_type find( unicode_char ch, size_type index = 0 ) const
01832 {
01833 code_point cp[3] = { 0, 0, 0 };
01834 size_t l = _utf32_to_utf16( ch, cp );
01835 return find( UString( cp, l ), index );
01836 }
01837
01839 size_type rfind( const UString& str, size_type index = 0 ) const
01840 {
01841 return mData.rfind( str.c_str(), index );
01842 }
01844 size_type rfind( const code_point* cp_str, size_type index, size_type num ) const
01845 {
01846 UString tmp( cp_str );
01847 return mData.rfind( tmp.c_str(), index, num );
01848 }
01850 size_type rfind( const char* c_str, size_type index, size_type num ) const
01851 {
01852 UString tmp( c_str );
01853 return mData.rfind( tmp.c_str(), index, num );
01854 }
01855 #if MYGUI_IS_NATIVE_WCHAR_T
01857 size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const
01858 {
01859 UString tmp( w_str );
01860 return mData.rfind( tmp.c_str(), index, num );
01861 }
01862 #endif
01864 size_type rfind( char ch, size_type index = 0 ) const
01865 {
01866 return rfind( static_cast<code_point>( ch ), index );
01867 }
01869 size_type rfind( code_point ch, size_type index ) const
01870 {
01871 return mData.rfind( ch, index );
01872 }
01873 #if MYGUI_IS_NATIVE_WCHAR_T
01875 size_type rfind( wchar_t ch, size_type index = 0 ) const
01876 {
01877 return rfind( static_cast<unicode_char>( ch ), index );
01878 }
01879 #endif
01881 size_type rfind( unicode_char ch, size_type index = 0 ) const
01882 {
01883 code_point cp[3] = { 0, 0, 0 };
01884 size_t l = _utf32_to_utf16( ch, cp );
01885 return rfind( UString( cp, l ), index );
01886 }
01888
01890
01892
01893
01894 size_type find_first_of( const UString &str, size_type index = 0, size_type num = npos ) const
01895 {
01896 size_type i = 0;
01897 const size_type len = length();
01898 while ( i < num && ( index + i ) < len )
01899 {
01900 unicode_char ch = getChar( index + i );
01901 if ( str.inString( ch ) )
01902 return index + i;
01903 i += _utf16_char_length( ch );
01904 }
01905 return npos;
01906 }
01908 size_type find_first_of( code_point ch, size_type index = 0 ) const
01909 {
01910 UString tmp;
01911 tmp.assign( 1, ch );
01912 return find_first_of( tmp, index );
01913 }
01915 size_type find_first_of( char ch, size_type index = 0 ) const
01916 {
01917 return find_first_of( static_cast<code_point>( ch ), index );
01918 }
01919 #if MYGUI_IS_NATIVE_WCHAR_T
01921 size_type find_first_of( wchar_t ch, size_type index = 0 ) const
01922 {
01923 return find_first_of( static_cast<unicode_char>( ch ), index );
01924 }
01925 #endif
01927 size_type find_first_of( unicode_char ch, size_type index = 0 ) const
01928 {
01929 code_point cp[3] = { 0, 0, 0 };
01930 size_t l = _utf32_to_utf16( ch, cp );
01931 return find_first_of( UString( cp, l ), index );
01932 }
01933
01935 size_type find_first_not_of( const UString& str, size_type index = 0, size_type num = npos ) const
01936 {
01937 size_type i = 0;
01938 const size_type len = length();
01939 while ( i < num && ( index + i ) < len )
01940 {
01941 unicode_char ch = getChar( index + i );
01942 if ( !str.inString( ch ) )
01943 return index + i;
01944 i += _utf16_char_length( ch );
01945 }
01946 return npos;
01947 }
01949 size_type find_first_not_of( code_point ch, size_type index = 0 ) const
01950 {
01951 UString tmp;
01952 tmp.assign( 1, ch );
01953 return find_first_not_of( tmp, index );
01954 }
01956 size_type find_first_not_of( char ch, size_type index = 0 ) const
01957 {
01958 return find_first_not_of( static_cast<code_point>( ch ), index );
01959 }
01960 #if MYGUI_IS_NATIVE_WCHAR_T
01962 size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const
01963 {
01964 return find_first_not_of( static_cast<unicode_char>( ch ), index );
01965 }
01966 #endif
01968 size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const
01969 {
01970 code_point cp[3] = { 0, 0, 0 };
01971 size_t l = _utf32_to_utf16( ch, cp );
01972 return find_first_not_of( UString( cp, l ), index );
01973 }
01974
01976 size_type find_last_of( const UString& str, size_type index = npos, size_type num = npos ) const
01977 {
01978 size_type i = 0;
01979 const size_type len = length();
01980 if ( index > len ) index = len - 1;
01981
01982 while ( i < num && ( index - i ) != npos )
01983 {
01984 size_type j = index - i;
01985
01986 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
01987 {
01988 j = index - ++i;
01989 }
01990
01991 unicode_char ch = getChar( j );
01992 if ( str.inString( ch ) )
01993 return j;
01994 i++;
01995 }
01996 return npos;
01997 }
01999 size_type find_last_of( code_point ch, size_type index = npos ) const
02000 {
02001 UString tmp;
02002 tmp.assign( 1, ch );
02003 return find_last_of( tmp, index );
02004 }
02006 size_type find_last_of( char ch, size_type index = npos ) const
02007 {
02008 return find_last_of( static_cast<code_point>( ch ), index );
02009 }
02010 #if MYGUI_IS_NATIVE_WCHAR_T
02012 size_type find_last_of( wchar_t ch, size_type index = npos ) const
02013 {
02014 return find_last_of( static_cast<unicode_char>( ch ), index );
02015 }
02016 #endif
02018 size_type find_last_of( unicode_char ch, size_type index = npos ) const
02019 {
02020 code_point cp[3] = { 0, 0, 0 };
02021 size_t l = _utf32_to_utf16( ch, cp );
02022 return find_last_of( UString( cp, l ), index );
02023 }
02024
02026 size_type find_last_not_of( const UString& str, size_type index = npos, size_type num = npos ) const
02027 {
02028 size_type i = 0;
02029 const size_type len = length();
02030 if ( index > len ) index = len - 1;
02031
02032 while ( i < num && ( index - i ) != npos )
02033 {
02034 size_type j = index - i;
02035
02036 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
02037 {
02038 j = index - ++i;
02039 }
02040
02041 unicode_char ch = getChar( j );
02042 if ( !str.inString( ch ) )
02043 return j;
02044 i++;
02045 }
02046 return npos;
02047 }
02049 size_type find_last_not_of( code_point ch, size_type index = npos ) const
02050 {
02051 UString tmp;
02052 tmp.assign( 1, ch );
02053 return find_last_not_of( tmp, index );
02054 }
02056 size_type find_last_not_of( char ch, size_type index = npos ) const
02057 {
02058 return find_last_not_of( static_cast<code_point>( ch ), index );
02059 }
02060 #if MYGUI_IS_NATIVE_WCHAR_T
02062 size_type find_last_not_of( wchar_t ch, size_type index = npos ) const
02063 {
02064 return find_last_not_of( static_cast<unicode_char>( ch ), index );
02065 }
02066 #endif
02068 size_type find_last_not_of( unicode_char ch, size_type index = npos ) const
02069 {
02070 code_point cp[3] = { 0, 0, 0 };
02071 size_t l = _utf32_to_utf16( ch, cp );
02072 return find_last_not_of( UString( cp, l ), index );
02073 }
02075
02077
02079
02080
02081 bool operator<( const UString& right ) const
02082 {
02083 return compare( right ) < 0;
02084 }
02086 bool operator<=( const UString& right ) const
02087 {
02088 return compare( right ) <= 0;
02089 }
02091 bool operator>( const UString& right ) const
02092 {
02093 return compare( right ) > 0;
02094 }
02096 bool operator>=( const UString& right ) const
02097 {
02098 return compare( right ) >= 0;
02099 }
02101 bool operator==( const UString& right ) const
02102 {
02103 return compare( right ) == 0;
02104 }
02106 bool operator!=( const UString& right ) const
02107 {
02108 return !operator==( right );
02109 }
02111 UString& operator=( const UString& s )
02112 {
02113 return assign( s );
02114 }
02116 UString& operator=( code_point ch )
02117 {
02118 clear();
02119 return append( 1, ch );
02120 }
02122 UString& operator=( char ch )
02123 {
02124 clear();
02125 return append( 1, ch );
02126 }
02127 #if MYGUI_IS_NATIVE_WCHAR_T
02129 UString& operator=( wchar_t ch )
02130 {
02131 clear();
02132 return append( 1, ch );
02133 }
02134 #endif
02136 UString& operator=( unicode_char ch )
02137 {
02138 clear();
02139 return append( 1, ch );
02140 }
02142 code_point& operator[]( size_type index )
02143 {
02144 return at( index );
02145 }
02147 const code_point& operator[]( size_type index ) const
02148 {
02149 return at( index );
02150 }
02152
02154
02156
02157
02158 operator std::string() const
02159 {
02160 return std::string( asUTF8() );
02161 }
02163 operator std::wstring() const
02164 {
02165 return std::wstring( asWStr() );
02166 }
02168
02170
02172
02173
02174 static bool _utf16_independent_char( code_point cp )
02175 {
02176 if ( 0xD800 <= cp && cp <= 0xDFFF )
02177 return false;
02178 return true;
02179 }
02181 static bool _utf16_surrogate_lead( code_point cp )
02182 {
02183 if ( 0xD800 <= cp && cp <= 0xDBFF )
02184 return true;
02185 return false;
02186 }
02188 static bool _utf16_surrogate_follow( code_point cp )
02189 {
02190 if ( 0xDC00 <= cp && cp <= 0xDFFF )
02191 return true;
02192 return false;
02193 }
02195 static size_t _utf16_char_length( code_point cp )
02196 {
02197 if ( 0xD800 <= cp && cp <= 0xDBFF )
02198 return 2;
02199 return 1;
02200 }
02202 static size_t _utf16_char_length( unicode_char uc )
02203 {
02204 if ( uc > 0xFFFF )
02205 return 2;
02206 return 1;
02207 }
02209
02213 static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
02214 {
02215 const code_point& cp1 = in_cp[0];
02216 const code_point& cp2 = in_cp[1];
02217 bool wordPair = false;
02218
02219
02220 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF )
02221 {
02222
02223 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
02224 wordPair = true;
02225 }
02226
02227 if ( !wordPair )
02228 {
02229 out_uc = cp1;
02230 return 1;
02231 }
02232
02233 unsigned short cU = cp1, cL = cp2;
02234 cU -= 0xD800;
02235 cL -= 0xDC00;
02236
02237 out_uc = ( cU & 0x03FF ) << 10;
02238 out_uc |= ( cL & 0x03FF );
02239 out_uc += 0x10000;
02240
02241 return 2;
02242 }
02244
02249 static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
02250 {
02251 if ( in_uc <= 0xFFFF )
02252 {
02253 out_cp[0] = in_uc;
02254 return 1;
02255 }
02256 unicode_char uc = in_uc;
02257 unsigned short tmp;
02258 uc -= 0x10000;
02259
02260
02261 tmp = ( uc >> 10 ) & 0x03FF;
02262 tmp += 0xD800;
02263 out_cp[0] = tmp;
02264
02265
02266 tmp = uc & 0x03FF;
02267 tmp += 0xDC00;
02268 out_cp[1] = tmp;
02269
02270 return 2;
02271 }
02273
02275
02277
02278
02279 static bool _utf8_start_char( unsigned char cp )
02280 {
02281 return ( cp & ~_cont_mask ) != _cont;
02282 }
02284 static size_t _utf8_char_length( unsigned char cp )
02285 {
02286 if ( !( cp & 0x80 ) ) return 1;
02287 if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
02288 if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
02289 if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
02290 if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
02291 if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
02292 throw invalid_data( "invalid UTF-8 sequence header value" );
02293 }
02295 static size_t _utf8_char_length( unicode_char uc )
02296 {
02297
02298
02299
02300
02301
02302
02303
02304
02305 if ( !( uc & ~0x0000007F ) ) return 1;
02306 if ( !( uc & ~0x000007FF ) ) return 2;
02307 if ( !( uc & ~0x0000FFFF ) ) return 3;
02308 if ( !( uc & ~0x001FFFFF ) ) return 4;
02309 if ( !( uc & ~0x03FFFFFF ) ) return 5;
02310 if ( !( uc & ~0x7FFFFFFF ) ) return 6;
02311 throw invalid_data( "invalid UTF-32 value" );
02312 }
02313
02315 static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
02316 {
02317 size_t len = _utf8_char_length( in_cp[0] );
02318 if ( len == 1 )
02319 {
02320 out_uc = in_cp[0];
02321 return 1;
02322 }
02323
02324 unicode_char c = 0;
02325 size_t i = 0;
02326 switch ( len )
02327 {
02328 case 6:
02329 c = in_cp[i] & _lead5_mask;
02330 break;
02331 case 5:
02332 c = in_cp[i] & _lead4_mask;
02333 break;
02334 case 4:
02335 c = in_cp[i] & _lead3_mask;
02336 break;
02337 case 3:
02338 c = in_cp[i] & _lead2_mask;
02339 break;
02340 case 2:
02341 c = in_cp[i] & _lead1_mask;
02342 break;
02343 }
02344
02345 for ( ++i; i < len; i++ )
02346 {
02347 if (( in_cp[i] & ~_cont_mask ) != _cont )
02348 throw invalid_data( "bad UTF-8 continuation byte" );
02349 c <<= 6;
02350 c |= ( in_cp[i] & _cont_mask );
02351 }
02352
02353 out_uc = c;
02354 return len;
02355 }
02357 static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
02358 {
02359 size_t len = _utf8_char_length( in_uc );
02360 unicode_char c = in_uc;
02361
02362
02363 for ( size_t i = len - 1; i > 0; i-- )
02364 {
02365 out_cp[i] = (( c ) & _cont_mask ) | _cont;
02366 c >>= 6;
02367 }
02368
02369
02370 switch ( len )
02371 {
02372 case 6:
02373 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
02374 break;
02375 case 5:
02376 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
02377 break;
02378 case 4:
02379 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
02380 break;
02381 case 3:
02382 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
02383 break;
02384 case 2:
02385 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
02386 break;
02387 case 1:
02388 default:
02389 out_cp[0] = ( c ) & 0x7F;
02390 break;
02391 }
02392
02393
02394 return len;
02395 }
02396
02398 static size_type _verifyUTF8( const unsigned char* c_str )
02399 {
02400 std::string tmp( reinterpret_cast<const char*>( c_str ) );
02401 return _verifyUTF8( tmp );
02402 }
02404 static size_type _verifyUTF8( const std::string& str )
02405 {
02406 std::string::const_iterator i, ie = str.end();
02407 i = str.begin();
02408 size_type length = 0;
02409
02410 while ( i != ie )
02411 {
02412
02413 if (( *i ) & 0x80 )
02414 {
02415 unsigned char c = ( *i );
02416 size_t contBytes = 0;
02417
02418
02419 if (( c & ~_lead1_mask ) == _lead1 )
02420 {
02421 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
02422 contBytes = 1;
02423
02424 }
02425 else if (( c & ~_lead2_mask ) == _lead2 )
02426 {
02427 contBytes = 2;
02428 if ( c == _lead2 )
02429 {
02430 c = ( *( i + 1 ) );
02431 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02432 }
02433
02434 }
02435 else if (( c & ~_lead3_mask ) == _lead3 )
02436 {
02437 contBytes = 3;
02438 if ( c == _lead3 )
02439 {
02440 c = ( *( i + 1 ) );
02441 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02442 }
02443
02444 }
02445 else if (( c & ~_lead4_mask ) == _lead4 )
02446 {
02447 contBytes = 4;
02448 if ( c == _lead4 )
02449 {
02450 c = ( *( i + 1 ) );
02451 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02452 }
02453
02454 }
02455 else if (( c & ~_lead5_mask ) == _lead5 )
02456 {
02457 contBytes = 5;
02458 if ( c == _lead5 )
02459 {
02460 c = ( *( i + 1 ) );
02461 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02462 }
02463 }
02464
02465
02466 while ( contBytes-- )
02467 {
02468 c = ( *( ++i ) );
02469 if (( c & ~_cont_mask ) != _cont )
02470 throw invalid_data( "bad UTF-8 continuation byte" );
02471 }
02472 }
02473 length++;
02474 i++;
02475 }
02476 return length;
02477 }
02479
02480 private:
02481
02482 dstring mData;
02483
02485 enum BufferType
02486 {
02487 bt_none,
02488 bt_string,
02489 bt_wstring,
02490 bt_utf32string
02491 };
02492
02494 void _init()
02495 {
02496 m_buffer.mVoidBuffer = 0;
02497 m_bufferType = bt_none;
02498 m_bufferSize = 0;
02499 }
02500
02502
02504 void _cleanBuffer() const
02505 {
02506 if ( m_buffer.mVoidBuffer != 0 )
02507 {
02508 switch ( m_bufferType )
02509 {
02510 case bt_string:
02511 delete m_buffer.mStrBuffer;
02512 break;
02513 case bt_wstring:
02514 delete m_buffer.mWStrBuffer;
02515 break;
02516 case bt_utf32string:
02517 delete m_buffer.mUTF32StrBuffer;
02518 break;
02519 case bt_none:
02520 default:
02521
02522
02523 MYGUI_ASSERT(false, "This should never happen - mVoidBuffer should never contain something if we "
02524 "don't know the type");
02525 break;
02526 }
02527 m_buffer.mVoidBuffer = 0;
02528 m_bufferSize = 0;
02529 }
02530 }
02531
02533 void _getBufferStr() const
02534 {
02535 if ( m_bufferType != bt_string )
02536 {
02537 _cleanBuffer();
02538 m_buffer.mStrBuffer = new std::string();
02539 m_bufferType = bt_string;
02540 }
02541 m_buffer.mStrBuffer->clear();
02542 }
02544 void _getBufferWStr() const
02545 {
02546 if ( m_bufferType != bt_wstring )
02547 {
02548 _cleanBuffer();
02549 m_buffer.mWStrBuffer = new std::wstring();
02550 m_bufferType = bt_wstring;
02551 }
02552 m_buffer.mWStrBuffer->clear();
02553 }
02555 void _getBufferUTF32Str() const
02556 {
02557 if ( m_bufferType != bt_utf32string )
02558 {
02559 _cleanBuffer();
02560 m_buffer.mUTF32StrBuffer = new utf32string();
02561 m_bufferType = bt_utf32string;
02562 }
02563 m_buffer.mUTF32StrBuffer->clear();
02564 }
02565
02566 void _load_buffer_UTF8() const
02567 {
02568 _getBufferStr();
02569 std::string& buffer = ( *m_buffer.mStrBuffer );
02570 buffer.reserve( length() );
02571
02572 unsigned char utf8buf[6];
02573 char* charbuf = ( char* )utf8buf;
02574 unicode_char c;
02575 size_t len;
02576
02577 const_iterator i, ie = end();
02578 for ( i = begin(); i != ie; i.moveNext() )
02579 {
02580 c = i.getCharacter();
02581 len = _utf32_to_utf8( c, utf8buf );
02582 size_t j = 0;
02583 while ( j < len )
02584 buffer.push_back( charbuf[j++] );
02585 }
02586 }
02587 void _load_buffer_WStr() const
02588 {
02589 _getBufferWStr();
02590 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02591 buffer.reserve( length() );
02592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02593 const_iterator i, ie = end();
02594 for ( i = begin(); i != ie; ++i )
02595 {
02596 buffer.push_back(( wchar_t )( *i ) );
02597 }
02598 #else // wchar_t fits UTF-32
02599 unicode_char c;
02600 const_iterator i, ie = end();
02601 for ( i = begin(); i != ie; i.moveNext() )
02602 {
02603 c = i.getCharacter();
02604 buffer.push_back(( wchar_t )c );
02605 }
02606 #endif
02607 }
02608 void _load_buffer_UTF32() const
02609 {
02610 _getBufferUTF32Str();
02611 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02612 buffer.reserve( length() );
02613
02614 unicode_char c;
02615
02616 const_iterator i, ie = end();
02617 for ( i = begin(); i != ie; i.moveNext() )
02618 {
02619 c = i.getCharacter();
02620 buffer.push_back( c );
02621 }
02622 }
02623
02624 mutable BufferType m_bufferType;
02625 mutable size_t m_bufferSize;
02626
02627
02628 union
02629 {
02630 mutable void* mVoidBuffer;
02631 mutable std::string* mStrBuffer;
02632 mutable std::wstring* mWStrBuffer;
02633 mutable utf32string* mUTF32StrBuffer;
02634 }
02635 m_buffer;
02636 };
02637
02639 inline UString operator+( const UString& s1, const UString& s2 )
02640 {
02641 return UString( s1 ).append( s2 );
02642 }
02644 inline UString operator+( const UString& s1, UString::code_point c )
02645 {
02646 return UString( s1 ).append( 1, c );
02647 }
02649 inline UString operator+( const UString& s1, UString::unicode_char c )
02650 {
02651 return UString( s1 ).append( 1, c );
02652 }
02654 inline UString operator+( const UString& s1, char c )
02655 {
02656 return UString( s1 ).append( 1, c );
02657 }
02658 #if MYGUI_IS_NATIVE_WCHAR_T
02660 inline UString operator+( const UString& s1, wchar_t c )
02661 {
02662 return UString( s1 ).append( 1, c );
02663 }
02664 #endif
02666 inline UString operator+( UString::code_point c, const UString& s2 )
02667 {
02668 return UString().append( 1, c ).append( s2 );
02669 }
02671 inline UString operator+( UString::unicode_char c, const UString& s2 )
02672 {
02673 return UString().append( 1, c ).append( s2 );
02674 }
02676 inline UString operator+( char c, const UString& s2 )
02677 {
02678 return UString().append( 1, c ).append( s2 );
02679 }
02680 #if MYGUI_IS_NATIVE_WCHAR_T
02682 inline UString operator+( wchar_t c, const UString& s2 )
02683 {
02684 return UString().append( 1, c ).append( s2 );
02685 }
02686 #endif
02687
02688
02689 inline UString::size_type operator-( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02690 {
02691 return ( left.mIter - right.mIter );
02692 }
02693 inline bool operator==( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02694 {
02695 return left.mIter == right.mIter;
02696 }
02697 inline bool operator!=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02698 {
02699 return left.mIter != right.mIter;
02700 }
02701 inline bool operator<( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02702 {
02703 return left.mIter < right.mIter;
02704 }
02705 inline bool operator<=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02706 {
02707 return left.mIter <= right.mIter;
02708 }
02709 inline bool operator>( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02710 {
02711 return left.mIter > right.mIter;
02712 }
02713 inline bool operator>=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02714 {
02715 return left.mIter >= right.mIter;
02716 }
02717
02718
02719
02720 inline UString::size_type operator-( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02721 {
02722 return ( right.mIter - left.mIter );
02723 }
02724 inline bool operator==( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02725 {
02726 return left.mIter == right.mIter;
02727 }
02728 inline bool operator!=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02729 {
02730 return left.mIter != right.mIter;
02731 }
02732 inline bool operator<( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02733 {
02734 return right.mIter < left.mIter;
02735 }
02736 inline bool operator<=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02737 {
02738 return right.mIter <= left.mIter;
02739 }
02740 inline bool operator>( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02741 {
02742 return right.mIter > left.mIter;
02743 }
02744 inline bool operator>=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02745 {
02746 return right.mIter >= left.mIter;
02747 }
02748
02750 inline std::ostream& operator << ( std::ostream& os, const UString& s )
02751 {
02752 return os << s.asUTF8();
02753 }
02754
02756
02757
02758
02759
02760
02761
02762
02763 }
02764
02765 #endif // __MYGUI_U_STRING_H__