46 #include <sphinxbase/hash_table.h> 51 #include "pocketsphinx_export.h" 53 #define S3DICT_INC_SZ 4096 151 #define dict_size(d) ((d)->n_word) 152 #define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d)) 158 #define dict_num_real_words(d) \ 159 (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2) 160 #define dict_basewid(d,w) ((d)->word[w].basewid) 161 #define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word) 162 #define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word) 163 #define dict_nextalt(d,w) ((d)->word[w].alt) 164 #define dict_pronlen(d,w) ((d)->word[w].pronlen) 165 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p]) 166 #define dict_filler_start(d) ((d)->filler_start) 167 #define dict_filler_end(d) ((d)->filler_end) 168 #define dict_startwid(d) ((d)->startwid) 169 #define dict_finishwid(d) ((d)->finishwid) 170 #define dict_silwid(d) ((d)->silwid) 171 #define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1) 172 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0]) 173 #define dict_second_phone(d,w) ((d)->word[w].ciphone[1]) 174 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2]) 175 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1]) 178 #define S3_START_WORD "<s>" 179 #define S3_FINISH_WORD "</s>" 180 #define S3_SILENCE_WORD "<sil>" 181 #define S3_UNKNOWN_WORD "<UNK>" dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
char * word
Ascii word string.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
int32 filler_end
Last filler word id (read from filler dict)
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
s3wid_t startwid
FOR INTERNAL-USE ONLY.
s3wid_t silwid
FOR INTERNAL-USE ONLY.
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Size definition of semantically units.
a structure for one dictionary word.
s3wid_t basewid
Base pronunciation id.
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
int32 filler_start
First filler word id (read from filler dict)
int32 max_words
#Entries allocated in dict, including empty slots
s3cipid_t * ciphone
Pronunciation.
dictword_t * word
Array of entries in dictionary.
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
int32 pronlen
Pronunciation length.
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
hash_table_t * ht
Hash table for mapping word strings to word ids.
void dict_report(dict_t *d)
Report a dictionary structure.
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...