PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* System headers. */ 00039 #include <string.h> 00040 00041 /* SphinxBase headers. */ 00042 #include <sphinxbase/pio.h> 00043 #include <sphinxbase/strfuncs.h> 00044 00045 /* Local headers. */ 00046 #include "dict.h" 00047 00048 00049 #define DELIM " \t\n" /* Set of field separator characters */ 00050 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1) 00051 00052 #if WIN32 00053 #define snprintf sprintf_s 00054 #endif 00055 00056 extern const char *const cmu6_lts_phone_table[]; 00057 00058 static s3cipid_t 00059 dict_ciphone_id(dict_t * d, const char *str) 00060 { 00061 if (d->nocase) 00062 return bin_mdef_ciphone_id_nocase(d->mdef, str); 00063 else 00064 return bin_mdef_ciphone_id(d->mdef, str); 00065 } 00066 00067 00068 const char * 00069 dict_ciphone_str(dict_t * d, s3wid_t wid, int32 pos) 00070 { 00071 assert(d != NULL); 00072 assert((wid >= 0) && (wid < d->n_word)); 00073 assert((pos >= 0) && (pos < d->word[wid].pronlen)); 00074 00075 return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]); 00076 } 00077 00078 00079 s3wid_t 00080 dict_add_word(dict_t * d, char const *word, s3cipid_t const * p, int32 np) 00081 { 00082 int32 len; 00083 dictword_t *wordp; 00084 s3wid_t newwid; 00085 char *wword; 00086 00087 if (d->n_word >= d->max_words) { 00088 E_INFO("Reallocating to %d KiB for word entries\n", 00089 (d->max_words + S3DICT_INC_SZ) * sizeof(dictword_t) / 1024); 00090 d->word = 00091 (dictword_t *) ckd_realloc(d->word, 00092 (d->max_words + 00093 S3DICT_INC_SZ) * sizeof(dictword_t)); 00094 d->max_words = d->max_words + S3DICT_INC_SZ; 00095 return BAD_S3WID; 00096 } 00097 00098 wordp = d->word + d->n_word; 00099 wordp->word = (char *) ckd_salloc(word); /* Freed in dict_free */ 00100 00101 /* Associate word string with d->n_word in hash table */ 00102 if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) { 00103 ckd_free(wordp->word); 00104 wordp->word = NULL; 00105 return BAD_S3WID; 00106 } 00107 00108 /* Fill in word entry, and set defaults */ 00109 if (p && (np > 0)) { 00110 wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t)); /* Freed in dict_free */ 00111 memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t)); 00112 wordp->pronlen = np; 00113 } 00114 else { 00115 wordp->ciphone = NULL; 00116 wordp->pronlen = 0; 00117 } 00118 wordp->alt = BAD_S3WID; 00119 wordp->basewid = d->n_word; 00120 00121 /* Determine base/alt wids */ 00122 wword = ckd_salloc(word); 00123 if ((len = dict_word2basestr(wword)) > 0) { 00124 int32 w; 00125 00126 /* Truncated to a baseword string; find its ID */ 00127 if (hash_table_lookup_int32(d->ht, wword, &w) < 0) { 00128 E_ERROR("Missing base word for: %s\n", word); 00129 ckd_free(wword); 00130 ckd_free(wordp->word); 00131 wordp->word = NULL; 00132 return BAD_S3WID; 00133 } 00134 00135 /* Link into alt list */ 00136 wordp->basewid = w; 00137 wordp->alt = d->word[w].alt; 00138 d->word[w].alt = d->n_word; 00139 } 00140 ckd_free(wword); 00141 00142 newwid = d->n_word++; 00143 00144 return newwid; 00145 } 00146 00147 00148 static int32 00149 dict_read(FILE * fp, dict_t * d) 00150 { 00151 lineiter_t *li; 00152 char **wptr; 00153 s3cipid_t *p; 00154 int32 lineno, nwd; 00155 s3wid_t w; 00156 int32 i, maxwd; 00157 size_t stralloc, phnalloc; 00158 00159 maxwd = 512; 00160 p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p)); 00161 wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */ 00162 00163 lineno = 0; 00164 stralloc = phnalloc = 0; 00165 for (li = lineiter_start(fp); li; li = lineiter_next(li)) { 00166 lineno++; 00167 if (0 == strncmp(li->buf, "##", 2) 00168 || 0 == strncmp(li->buf, ";;", 2)) 00169 continue; 00170 00171 if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) { 00172 /* Increase size of p, wptr. */ 00173 nwd = str2words(li->buf, NULL, 0); 00174 assert(nwd > maxwd); /* why else would it fail? */ 00175 maxwd = nwd; 00176 p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p)); 00177 wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr)); 00178 } 00179 00180 if (nwd == 0) /* Empty line */ 00181 continue; 00182 /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ 00183 if (nwd == 1) { 00184 E_ERROR("Line %d: No pronunciation for word '%s'; ignored\n", 00185 lineno, wptr[0]); 00186 continue; 00187 } 00188 00189 00190 /* Convert pronunciation string to CI-phone-ids */ 00191 for (i = 1; i < nwd; i++) { 00192 p[i - 1] = dict_ciphone_id(d, wptr[i]); 00193 if (NOT_S3CIPID(p[i - 1])) { 00194 E_ERROR("Line %d: Phone '%s' is mising in the acoustic model; word '%s' ignored\n", 00195 lineno, wptr[i], wptr[0]); 00196 break; 00197 } 00198 } 00199 00200 if (i == nwd) { /* All CI-phones successfully converted to IDs */ 00201 w = dict_add_word(d, wptr[0], p, nwd - 1); 00202 if (NOT_S3WID(w)) 00203 E_ERROR 00204 ("Line %d: Failed to add the word '%s' (duplicate?); ignored\n", 00205 lineno, wptr[0]); 00206 else { 00207 stralloc += strlen(d->word[w].word); 00208 phnalloc += d->word[w].pronlen * sizeof(s3cipid_t); 00209 } 00210 } 00211 } 00212 E_INFO("Allocated %d KiB for strings, %d KiB for phones\n", 00213 (int)stralloc / 1024, (int)phnalloc / 1024); 00214 ckd_free(p); 00215 ckd_free(wptr); 00216 00217 return 0; 00218 } 00219 00220 int 00221 dict_write(dict_t *dict, char const *filename, char const *format) 00222 { 00223 FILE *fh; 00224 int i; 00225 00226 if ((fh = fopen(filename, "w")) == NULL) { 00227 E_ERROR_SYSTEM("Failed to open '%s'", filename); 00228 return -1; 00229 } 00230 for (i = 0; i < dict->n_word; ++i) { 00231 char *phones; 00232 int j, phlen; 00233 if (!dict_real_word(dict, i)) 00234 continue; 00235 for (phlen = j = 0; j < dict_pronlen(dict, i); ++j) 00236 phlen += strlen(dict_ciphone_str(dict, i, j)) + 1; 00237 phones = ckd_calloc(1, phlen); 00238 for (j = 0; j < dict_pronlen(dict, i); ++j) { 00239 strcat(phones, dict_ciphone_str(dict, i, j)); 00240 if (j != dict_pronlen(dict, i) - 1) 00241 strcat(phones, " "); 00242 } 00243 fprintf(fh, "%-30s %s\n", dict_wordstr(dict, i), phones); 00244 ckd_free(phones); 00245 } 00246 fclose(fh); 00247 return 0; 00248 } 00249 00250 00251 dict_t * 00252 dict_init(cmd_ln_t *config, bin_mdef_t * mdef) 00253 { 00254 FILE *fp, *fp2; 00255 int32 n; 00256 lineiter_t *li; 00257 dict_t *d; 00258 s3cipid_t sil; 00259 char const *dictfile = NULL, *fillerfile = NULL; 00260 00261 if (config) { 00262 dictfile = cmd_ln_str_r(config, "-dict"); 00263 fillerfile = cmd_ln_str_r(config, "-fdict"); 00264 } 00265 00266 /* 00267 * First obtain #words in dictionary (for hash table allocation). 00268 * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate 00269 * all the required memory in one go. 00270 */ 00271 fp = NULL; 00272 n = 0; 00273 if (dictfile) { 00274 if ((fp = fopen(dictfile, "r")) == NULL) 00275 E_FATAL_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile); 00276 for (li = lineiter_start(fp); li; li = lineiter_next(li)) { 00277 if (li->buf[0] != '#') 00278 n++; 00279 } 00280 rewind(fp); 00281 } 00282 00283 fp2 = NULL; 00284 if (fillerfile) { 00285 if ((fp2 = fopen(fillerfile, "r")) == NULL) 00286 E_FATAL_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile); 00287 for (li = lineiter_start(fp2); li; li = lineiter_next(li)) { 00288 if (li->buf[0] != '#') 00289 n++; 00290 } 00291 rewind(fp2); 00292 } 00293 00294 /* 00295 * Allocate dict entries. HACK!! Allow some extra entries for words not in file. 00296 * Also check for type size restrictions. 00297 */ 00298 d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */ 00299 d->refcnt = 1; 00300 d->max_words = 00301 (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID; 00302 if (n >= MAX_S3WID) 00303 E_FATAL("Number of words in dictionaries (%d) exceeds limit (%d)\n", n, 00304 MAX_S3WID); 00305 00306 E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n", 00307 d->max_words, sizeof(dictword_t), 00308 d->max_words * sizeof(dictword_t) / 1024); 00309 d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ 00310 d->n_word = 0; 00311 if (mdef) 00312 d->mdef = bin_mdef_retain(mdef); 00313 00314 /* Create new hash table for word strings; case-insensitive word strings */ 00315 if (config && cmd_ln_exists_r(config, "-dictcase")) 00316 d->nocase = cmd_ln_boolean_r(config, "-dictcase"); 00317 d->ht = hash_table_new(d->max_words, d->nocase); 00318 00319 /* Digest main dictionary file */ 00320 if (fp) { 00321 E_INFO("Reading main dictionary: %s\n", dictfile); 00322 dict_read(fp, d); 00323 fclose(fp); 00324 E_INFO("%d words read\n", d->n_word); 00325 } 00326 00327 /* Now the filler dictionary file, if it exists */ 00328 d->filler_start = d->n_word; 00329 if (fillerfile) { 00330 E_INFO("Reading filler dictionary: %s\n", fillerfile); 00331 dict_read(fp2, d); 00332 fclose(fp2); 00333 E_INFO("%d words read\n", d->n_word - d->filler_start); 00334 } 00335 if (mdef) 00336 sil = bin_mdef_silphone(mdef); 00337 else 00338 sil = 0; 00339 if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) { 00340 dict_add_word(d, S3_START_WORD, &sil, 1); 00341 } 00342 if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) { 00343 dict_add_word(d, S3_FINISH_WORD, &sil, 1); 00344 } 00345 if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) { 00346 dict_add_word(d, S3_SILENCE_WORD, &sil, 1); 00347 } 00348 00349 d->filler_end = d->n_word - 1; 00350 00351 /* Initialize distinguished word-ids */ 00352 d->startwid = dict_wordid(d, S3_START_WORD); 00353 d->finishwid = dict_wordid(d, S3_FINISH_WORD); 00354 d->silwid = dict_wordid(d, S3_SILENCE_WORD); 00355 00356 if ((d->filler_start > d->filler_end) 00357 || (!dict_filler_word(d, d->silwid))) 00358 E_FATAL("Word '%s' must occur (only) in filler dictionary\n", 00359 S3_SILENCE_WORD); 00360 00361 /* No check that alternative pronunciations for filler words are in filler range!! */ 00362 00363 return d; 00364 } 00365 00366 00367 s3wid_t 00368 dict_wordid(dict_t *d, const char *word) 00369 { 00370 int32 w; 00371 00372 assert(d); 00373 assert(word); 00374 00375 if (hash_table_lookup_int32(d->ht, word, &w) < 0) 00376 return (BAD_S3WID); 00377 return w; 00378 } 00379 00380 00381 int 00382 dict_filler_word(dict_t *d, s3wid_t w) 00383 { 00384 assert(d); 00385 assert((w >= 0) && (w < d->n_word)); 00386 00387 w = dict_basewid(d, w); 00388 if ((w == d->startwid) || (w == d->finishwid)) 00389 return 0; 00390 if ((w >= d->filler_start) && (w <= d->filler_end)) 00391 return 1; 00392 return 0; 00393 } 00394 00395 int 00396 dict_real_word(dict_t *d, s3wid_t w) 00397 { 00398 assert(d); 00399 assert((w >= 0) && (w < d->n_word)); 00400 00401 w = dict_basewid(d, w); 00402 if ((w == d->startwid) || (w == d->finishwid)) 00403 return 0; 00404 if ((w >= d->filler_start) && (w <= d->filler_end)) 00405 return 0; 00406 return 1; 00407 } 00408 00409 00410 int32 00411 dict_word2basestr(char *word) 00412 { 00413 int32 i, len; 00414 00415 len = strlen(word); 00416 if (word[len - 1] == ')') { 00417 for (i = len - 2; (i > 0) && (word[i] != '('); --i); 00418 00419 if (i > 0) { 00420 /* The word is of the form <baseword>(...); strip from left-paren */ 00421 word[i] = '\0'; 00422 return i; 00423 } 00424 } 00425 00426 return -1; 00427 } 00428 00429 dict_t * 00430 dict_retain(dict_t *d) 00431 { 00432 ++d->refcnt; 00433 return d; 00434 } 00435 00436 int 00437 dict_free(dict_t * d) 00438 { 00439 int i; 00440 dictword_t *word; 00441 00442 if (d == NULL) 00443 return 0; 00444 if (--d->refcnt > 0) 00445 return d->refcnt; 00446 00447 /* First Step, free all memory allocated for each word */ 00448 for (i = 0; i < d->n_word; i++) { 00449 word = (dictword_t *) & (d->word[i]); 00450 if (word->word) 00451 ckd_free((void *) word->word); 00452 if (word->ciphone) 00453 ckd_free((void *) word->ciphone); 00454 } 00455 00456 if (d->word) 00457 ckd_free((void *) d->word); 00458 if (d->ht) 00459 hash_table_free(d->ht); 00460 if (d->mdef) 00461 bin_mdef_free(d->mdef); 00462 ckd_free((void *) d); 00463 00464 return 0; 00465 } 00466 00467 void 00468 dict_report(dict_t * d) 00469 { 00470 E_INFO_NOFN("Initialization of dict_t, report:\n"); 00471 E_INFO_NOFN("Max word: %d\n", d->max_words); 00472 E_INFO_NOFN("No of word: %d\n", d->n_word); 00473 E_INFO_NOFN("\n"); 00474 }