26 { 0,
"Windows-1250",
SLEN(
"Windows-1250"),
w1250 },
27 { 0,
"Windows-1251",
SLEN(
"Windows-1251"),
w1251 },
28 { 0,
"Windows-1252",
SLEN(
"Windows-1252"),
w1252 },
29 { 0,
"Windows-1253",
SLEN(
"Windows-1253"),
w1253 },
30 { 0,
"Windows-1254",
SLEN(
"Windows-1254"),
w1254 },
31 { 0,
"Windows-1255",
SLEN(
"Windows-1255"),
w1255 },
32 { 0,
"Windows-1256",
SLEN(
"Windows-1256"),
w1256 },
33 { 0,
"Windows-1257",
SLEN(
"Windows-1257"),
w1257 },
34 { 0,
"Windows-1258",
SLEN(
"Windows-1258"),
w1258 },
45 #define READ_BUFSIZE (8)
51 #define WRITE_BUFSIZE (8)
66 const uint8_t **source,
size_t *sourcelen,
67 uint8_t **dest,
size_t *destlen);
70 const uint8_t **source,
size_t *sourcelen,
71 uint8_t **dest,
size_t *destlen);
76 const uint8_t **source,
size_t *sourcelen,
77 uint8_t **dest,
size_t *destlen);
80 uint32_t ucs4, uint8_t **dest,
size_t *destlen);
82 uint32_t ucs4, uint8_t **s,
size_t *
len);
84 const uint8_t *s,
size_t len, uint32_t *ucs4);
130 charset, strlen(charset));
131 uint32_t *
table = NULL;
140 assert(table != NULL);
206 const uint8_t **source,
size_t *sourcelen,
207 uint8_t **dest,
size_t *destlen)
226 for (len = 0; len < c->
write_len; len++) {
239 while (*sourcelen > 0) {
245 while (towritelen > 0) {
261 for (len = 0; len < towritelen; len++)
325 const uint8_t **source,
size_t *sourcelen,
326 uint8_t **dest,
size_t *destlen)
336 *((uint32_t *) (
void *) *dest) =
346 if (*destlen < c->read_len * 4) {
359 while (*sourcelen > 0) {
361 source, sourcelen, dest, destlen);
419 const uint8_t **source,
size_t *sourcelen,
420 uint8_t **dest,
size_t *destlen)
430 ucs4, dest, destlen);
452 0xFFFD, dest, destlen);
476 uint32_t ucs4, uint8_t **dest,
size_t *destlen)
510 uint32_t ucs4, uint8_t **s,
size_t *
len)
523 for (i = 0; i < 128; i++) {
524 if (ucs4 == c->
table[i])
558 const uint8_t *s,
size_t len, uint32_t *ucs4)
568 if (c->
table[*s - 0x80] == 0xFFFF)
571 out = c->
table[*s - 0x80];
parserutils_charset_codec base
Base class.
Codec factory component definition.
uint32_t read_buf[READ_BUFSIZE]
Buffer for partial output sequences (decode) (host-endian)
static uint32_t w1256[128]
static uint32_t w1250[128]
static uint32_t w1255[128]
parserutils_error(* destroy)(parserutils_charset_codec *codec)
static uint32_t endian_host_to_big(uint32_t host)
static parserutils_error charset_ext8_codec_encode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Encode a chunk of UCS-4 (big endian) data into extended 8bit.
parserutils_error(* encode)(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
static bool charset_ext8_codec_handles_charset(const char *charset)
Determine whether this codec handles a specific charset.
static uint32_t w1253[128]
static parserutils_error charset_ext8_codec_decode(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Decode a chunk of extended 8bit data into UCS-4 (big endian)
static parserutils_error charset_ext8_codec_read_char(charset_ext8_codec *c, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
Read a character from the extended 8bit to UCS-4 (big endian)
uint32_t * table
Mapping table for 0x80-0xFF.
static parserutils_error charset_ext8_codec_reset(parserutils_charset_codec *codec)
Clear an extended 8bit codec's encoding state.
static uint32_t w1257[128]
size_t write_len
Character length of write_buf.
const parserutils_charset_handler charset_ext8_codec_handler
parserutils_charset_codec_errormode errormode
error mode
static parserutils_error charset_ext8_codec_destroy(parserutils_charset_codec *codec)
Destroy an extended 8bit codec.
static uint32_t endian_big_to_host(uint32_t big)
static uint32_t w1252[128]
static uint32_t w1251[128]
struct charset_ext8_codec charset_ext8_codec
Windows charset codec.
static uint32_t w1254[128]
static parserutils_error charset_ext8_from_ucs4(charset_ext8_codec *c, uint32_t ucs4, uint8_t **s, size_t *len)
Convert a UCS4 (host endian) character to extended 8bit.
static uint32_t w1258[128]
struct parserutils_charset_codec::@3 handler
Vtable for handler code.
size_t read_len
Character length of read_buf.
Abort processing if unrepresentable character encountered.
Core charset codec definition; implementations extend this.
parserutils_error(* reset)(parserutils_charset_codec *codec)
static parserutils_error charset_ext8_codec_create(const char *charset, parserutils_charset_codec **codec)
Create an extended 8bit codec.
uint16_t parserutils_charset_mibenum_from_name(const char *alias, size_t len)
Retrieve the MIB enum value assigned to an encoding name.
static parserutils_error charset_ext8_to_ucs4(charset_ext8_codec *c, const uint8_t *s, size_t len, uint32_t *ucs4)
Convert an extended 8bit character to UCS4 (host endian)
parserutils_error(* decode)(parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen)
static parserutils_error charset_ext8_codec_output_decoded_char(charset_ext8_codec *c, uint32_t ucs4, uint8_t **dest, size_t *destlen)
Output a UCS-4 character (big endian)
static struct @2 known_charsets[]
uint32_t write_buf[WRITE_BUFSIZE]
Buffer for partial output sequences (encode) (host-endian)