00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00041 #ifndef UTF8_H
00042
00043 #define UTF8_H
00044
00045 #ifndef SU_TYPES_H
00046 #include <sofia-sip/su_types.h>
00047 #endif
00048
00049 SOFIA_BEGIN_DECLS
00050
00051 typedef unsigned char utf8;
00052 typedef unsigned short utf16;
00053 typedef unsigned char ucs1;
00054 typedef unsigned short ucs2;
00055 typedef unsigned int ucs4;
00056
00057 SOFIAPUBFUN size_t utf8_width(const utf8 *);
00058
00059
00060 SOFIAPUBFUN size_t ucs18decode(char *dst, size_t dst_size, const utf8 *s);
00061 SOFIAPUBFUN size_t ucs1encode(utf8 *dst, const ucs1 *s, size_t n,
00062 const char quote[128]);
00063 SOFIAPUBFUN size_t ucs1declen(const utf8 *s);
00064 SOFIAPUBFUN size_t ucs1enclen(const ucs1 *s, size_t n, const char quote[128]);
00065
00066
00067 size_t ucs2decode(ucs2 *dst, size_t dst_size, const utf8 *s);
00068 size_t ucs2encode(utf8 *dst, const ucs2 *s, size_t n, const char quote[128]);
00069 size_t ucs2declen(const utf8 *s);
00070 size_t ucs2enclen(const ucs2 *s, size_t n, const char quote[128]);
00071
00072 size_t ucs4decode(ucs4 *dst, size_t dst_size, const utf8 *s);
00073 size_t ucs4encode(utf8 *dst, const ucs4 *s, size_t n, const char quote[128]);
00074 size_t ucs4declen(const utf8 *s);
00075 size_t ucs4enclen(const ucs4 *s, size_t n, const char quote[128]);
00076
00077 size_t ucs2len(ucs2 const *s);
00078 int ucs2cmp(ucs2 const *s1, ucs2 const *s2);
00079 int ucs2ncmp(ucs2 const *s1, ucs2 const *s2, size_t n);
00080
00081 size_t ucs4len(ucs4 const *s);
00082 int ucs4cmp(ucs4 const *s1, ucs4 const *s2);
00083 int ucs4ncmp(ucs4 const *s1, ucs4 const *s2, size_t n);
00084
00085
00086
00087
00088
00089 #define IS_UCS4_1(x) ((ucs4)(x) <= 0x7fu)
00090 #define IS_UCS4_2(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0x7ffu)
00091 #define IS_UCS4_3(x) (0x800u <= (ucs4)(x) && (ucs4)(x) <= 0xffffu)
00092 #define IS_UCS4_4(x) (0x10000u <= (ucs4)(x) && (ucs4)(x) <= 0x1fFFFFu)
00093 #define IS_UCS4_5(x) (0x200000u <= (ucs4)(x) && (ucs4)(x) <= 0x3ffFFFFu)
00094 #define IS_UCS4_6(x) (0x4000000u <= (ucs4)(x) && (ucs4)(x) <= 0x7fffFFFFu)
00095
00096
00097 #define IS_UCS4_I(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0xffu)
00098
00099
00100 #define UTF8_LEN4(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || \
00101 IS_UCS4_3(x) && 3 || IS_UCS4_4(x) && 4 || \
00102 IS_UCS4_5(x) && 5 || IS_UCS4_6(x) && 6)
00103
00104
00105 #define UTF8_LEN2(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || IS_UCS4_3(x) && 3)
00106
00107
00108
00109
00110 #define IS_UTF8_1(c) (0x00 == ((c) & 0x80))
00111 #define IS_UTF8_2(c) (0xc0 == ((c) & 0xe0))
00112 #define IS_UTF8_3(c) (0xe0 == ((c) & 0xf0))
00113 #define IS_UTF8_4(c) (0xf0 == ((c) & 0xf8))
00114 #define IS_UTF8_5(c) (0xf8 == ((c) & 0xfc))
00115 #define IS_UTF8_6(c) (0xfc == ((c) & 0xfe))
00116
00117
00118 #define IS_UTF8_X(c) (0x80 == ((c) & 0xc0))
00119
00120 #define IS_UTF8_I(c) (0xc0 == ((c) & 0xfc))
00121
00122 #define IS_UTF8_S1(s) \
00123 (IS_UTF8_1(s[0]))
00124 #define IS_UTF8_S2(s) \
00125 (IS_UTF8_2(s[0])&&((s)[1]&192)==128)
00126 #define IS_UTF8_SI(s) \
00127 (IS_UTF8_I(s[0])&&((s)[1]&192)==128)
00128 #define IS_UTF8_S3(s) \
00129 (IS_UTF8_3(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128)
00130 #define IS_UTF8_S4(s) \
00131 (IS_UTF8_4(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128)
00132 #define IS_UTF8_S5(s) \
00133 (IS_UTF8_5(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&\
00134 ((s)[3]&192)==128&&((s)[4]&192)==128)
00135 #define IS_UTF8_S6(s) \
00136 (IS_UTF8_6(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128&&\
00137 ((s)[4]&192)==128&&((s)[5]&192)==128)
00138
00139 #define UCS4_S1(s) ((ucs4)(s[0]))
00140 #define UCS4_S2(s) ((ucs4)\
00141 (((s[0])&31)<<6)|((s[1])&63))
00142 #define UCS4_S3(s) ((ucs4)\
00143 (((s[0])&15)<<12)|(((s[1])&63)<<6)|((s[2])&63))
00144 #define UCS4_S4(s) ((ucs4)\
00145 (((s[0])&7)<<18)|(((s[1])&63)<<12)|(((s[2])&63)<<6)|\
00146 ((s[3])&63))
00147 #define UCS4_S5(s) ((ucs4)\
00148 (((s[0])&3)<<24)|(((s[1])&63)<<18)|(((s[2])&63)<<12)|\
00149 (((s[3])&63)<<6)|((s[4])&63))
00150 #define UCS4_S6(s) ((ucs4)\
00151 (((s[0])&1)<<30)|(((s[1])&63)<<24)|(((s[2])&63)<<18)|\
00152 (((s[3])&63)<<12)|(((s[4])&63)<<6)|((s[5])&63))
00153
00154 #define UTF8_S1(s,c) ((s)[0]=(c))
00155 #define UTF8_S2(s,c) ((s)[0]=(((c)>>6)&31)|0xc0,\
00156 (s)[1]=((c)&63)|128)
00157 #define UTF8_S3(s,c) ((s)[0]=(((c)>>12)&15)|0xe0,\
00158 (s)[1]=((c>>6)&63)|128,\
00159 (s)[2]=((c)&63)|128)
00160 #define UTF8_S4(s,c) ((s)[0]=(((c)>>18)&7)|0xf0,\
00161 (s)[1]=((c>>12)&63)|128,\
00162 (s)[2]=((c>>6)&63)|128,\
00163 (s)[3]=((c)&63)|128)
00164 #define UTF8_S5(s,c) ((s)[0]=(((c)>>24)&3)|0xf8,\
00165 (s)[1]=((c>>18)&63)|128,\
00166 (s)[2]=((c>>12)&63)|128,\
00167 (s)[3]=((c>>6)&63)|128,\
00168 (s)[4]=((c)&63)|128)
00169 #define UTF8_S6(s,c) ((s)[0]=(((c)>>30)&1)|0xfc,\
00170 (s)[1]=((c>>24)&63)|128,\
00171 (s)[2]=((c>>18)&63)|128,\
00172 (s)[3]=((c>>12)&63)|128,\
00173 (s)[4]=((c>>6)&63)|128,\
00174 (s)[5]=((c)&63)|128)
00175
00176 SOFIA_END_DECLS
00177
00178 #endif