42 #include <sphinxbase/pio.h>
43 #include <sphinxbase/strfuncs.h>
50 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)
53 #define snprintf sprintf_s
56 extern const char *
const cmu6_lts_phone_table[];
59 dict_ciphone_id(
dict_t * d,
const char *str)
62 return bin_mdef_ciphone_id_nocase(d->
mdef, str);
64 return bin_mdef_ciphone_id(d->
mdef, str);
69 dict_ciphone_str(
dict_t * d, s3wid_t wid, int32 pos)
72 assert((wid >= 0) && (wid < d->n_word));
73 assert((pos >= 0) && (pos < d->word[wid].pronlen));
88 E_INFO(
"Reallocating to %d KiB for word entries\n",
98 wordp->
word = (
char *) ckd_salloc(word);
102 ckd_free(wordp->
word);
121 wword = ckd_salloc(word);
122 if ((len = dict_word2basestr(wword)) > 0) {
126 if (hash_table_lookup_int32(d->
ht, wword, &w) < 0) {
127 E_ERROR(
"Missing base word for: %s\n", word);
129 ckd_free(wordp->
word);
148 dict_read(FILE * fp,
dict_t * d)
156 size_t stralloc, phnalloc;
159 p = (
s3cipid_t *) ckd_calloc(maxwd + 4,
sizeof(*p));
160 wptr = (
char **) ckd_calloc(maxwd,
sizeof(
char *));
163 stralloc = phnalloc = 0;
164 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
166 if (0 == strncmp(li->buf,
"##", 2)
167 || 0 == strncmp(li->buf,
";;", 2))
170 if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
172 nwd = str2words(li->buf, NULL, 0);
175 p = (
s3cipid_t *) ckd_realloc(p, (maxwd + 4) *
sizeof(*p));
176 wptr = (
char **) ckd_realloc(wptr, maxwd *
sizeof(*wptr));
183 E_ERROR(
"Line %d: No pronunciation for word '%s'; ignored\n",
190 for (i = 1; i < nwd; i++) {
191 p[i - 1] = dict_ciphone_id(d, wptr[i]);
192 if (NOT_S3CIPID(p[i - 1])) {
193 E_ERROR(
"Line %d: Phone '%s' is mising in the acoustic model; word '%s' ignored\n",
194 lineno, wptr[i], wptr[0]);
200 w = dict_add_word(d, wptr[0], p, nwd - 1);
203 (
"Line %d: Failed to add the word '%s' (duplicate?); ignored\n",
206 stralloc += strlen(d->
word[w].
word);
211 E_INFO(
"Allocated %d KiB for strings, %d KiB for phones\n",
212 (
int)stralloc / 1024, (
int)phnalloc / 1024);
220 dict_write(
dict_t *dict,
char const *filename,
char const *format)
225 if ((fh = fopen(filename,
"w")) == NULL) {
226 E_ERROR_SYSTEM(
"Failed to open '%s'", filename);
229 for (i = 0; i < dict->
n_word; ++i) {
232 if (!dict_real_word(dict, i))
234 for (phlen = j = 0; j < dict_pronlen(dict, i); ++j)
235 phlen += strlen(dict_ciphone_str(dict, i, j)) + 1;
236 phones = ckd_calloc(1, phlen);
237 for (j = 0; j < dict_pronlen(dict, i); ++j) {
238 strcat(phones, dict_ciphone_str(dict, i, j));
239 if (j != dict_pronlen(dict, i) - 1)
242 fprintf(fh,
"%-30s %s\n", dict_wordstr(dict, i), phones);
258 char const *dictfile = NULL, *fillerfile = NULL;
261 dictfile = cmd_ln_str_r(config,
"-dict");
262 fillerfile = cmd_ln_str_r(config,
"-fdict");
273 if ((fp = fopen(dictfile,
"r")) == NULL) {
274 E_ERROR_SYSTEM(
"Failed to open dictionary file '%s' for reading", dictfile);
277 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
278 if (0 != strncmp(li->buf,
"##", 2)
279 && 0 != strncmp(li->buf,
";;", 2))
287 if ((fp2 = fopen(fillerfile,
"r")) == NULL) {
288 E_ERROR_SYSTEM(
"Failed to open filler dictionary file '%s' for reading", fillerfile);
292 for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
293 if (0 != strncmp(li->buf,
"##", 2)
294 && 0 != strncmp(li->buf,
";;", 2))
307 (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
308 if (n >= MAX_S3WID) {
309 E_ERROR(
"Number of words in dictionaries (%d) exceeds limit (%d)\n", n,
317 E_INFO(
"Allocating %d * %d bytes (%d KiB) for word entries\n",
323 d->
mdef = bin_mdef_retain(mdef);
326 if (config && cmd_ln_exists_r(config,
"-dictcase"))
327 d->nocase = cmd_ln_boolean_r(config,
"-dictcase");
332 E_INFO(
"Reading main dictionary: %s\n", dictfile);
335 E_INFO(
"%d words read\n", d->
n_word);
341 E_INFO(
"Reading filler dictionary: %s\n", fillerfile);
347 sil = bin_mdef_silphone(mdef);
350 if (dict_wordid(d, S3_START_WORD) ==
BAD_S3WID) {
351 dict_add_word(d, S3_START_WORD, &sil, 1);
353 if (dict_wordid(d, S3_FINISH_WORD) ==
BAD_S3WID) {
354 dict_add_word(d, S3_FINISH_WORD, &sil, 1);
356 if (dict_wordid(d, S3_SILENCE_WORD) ==
BAD_S3WID) {
357 dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
363 d->
startwid = dict_wordid(d, S3_START_WORD);
364 d->
finishwid = dict_wordid(d, S3_FINISH_WORD);
365 d->
silwid = dict_wordid(d, S3_SILENCE_WORD);
368 || (!dict_filler_word(d, d->
silwid))) {
369 E_ERROR(
"Word '%s' must occur (only) in filler dictionary\n",
389 if (hash_table_lookup_int32(d->
ht, word, &w) < 0)
399 assert((w >= 0) && (w < d->n_word));
401 w = dict_basewid(d, w);
413 assert((w >= 0) && (w < d->n_word));
415 w = dict_basewid(d, w);
425 dict_word2basestr(
char *word)
430 if (word[len - 1] ==
')') {
431 for (i = len - 2; (i > 0) && (word[i] !=
'('); --i);
462 for (i = 0; i < d->
n_word; i++) {
465 ckd_free((
void *) word->
word);
467 ckd_free((
void *) word->
ciphone);
471 ckd_free((
void *) d->
word);
473 hash_table_free(d->
ht);
475 bin_mdef_free(d->
mdef);
476 ckd_free((
void *) d);
484 E_INFO_NOFN(
"Initialization of dict_t, report:\n");
485 E_INFO_NOFN(
"Max word: %d\n", d->
max_words);
486 E_INFO_NOFN(
"No of word: %d\n", d->
n_word);