PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 00035 /* 00036 * fsg_search.c -- Search structures for FSM decoding. 00037 * 00038 * ********************************************** 00039 * CMU ARPA Speech Project 00040 * 00041 * Copyright (c) 2004 Carnegie Mellon University. 00042 * ALL RIGHTS RESERVED. 00043 * ********************************************** 00044 * 00045 * HISTORY 00046 * 00047 * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00048 * Started. 00049 */ 00050 00051 /* System headers. */ 00052 #include <stdio.h> 00053 #include <string.h> 00054 #include <assert.h> 00055 00056 /* SphinxBase headers. */ 00057 #include <sphinxbase/err.h> 00058 #include <sphinxbase/ckd_alloc.h> 00059 #include <sphinxbase/strfuncs.h> 00060 #include <sphinxbase/cmd_ln.h> 00061 00062 /* Local headers. */ 00063 #include "pocketsphinx_internal.h" 00064 #include "ps_lattice_internal.h" 00065 #include "fsg_search_internal.h" 00066 #include "fsg_history.h" 00067 #include "fsg_lextree.h" 00068 00069 /* Turn this on for detailed debugging dump */ 00070 #define __FSG_DBG__ 0 00071 #define __FSG_DBG_CHAN__ 0 00072 00073 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search, int32 *out_score); 00074 static ps_lattice_t *fsg_search_lattice(ps_search_t *search); 00075 static int fsg_search_prob(ps_search_t *search); 00076 00077 static ps_searchfuncs_t fsg_funcs = { 00078 /* name: */ "fsg", 00079 /* start: */ fsg_search_start, 00080 /* step: */ fsg_search_step, 00081 /* finish: */ fsg_search_finish, 00082 /* reinit: */ fsg_search_reinit, 00083 /* free: */ fsg_search_free, 00084 /* lattice: */ fsg_search_lattice, 00085 /* hyp: */ fsg_search_hyp, 00086 /* prob: */ fsg_search_prob, 00087 /* seg_iter: */ fsg_search_seg_iter, 00088 }; 00089 00090 ps_search_t * 00091 fsg_search_init(cmd_ln_t *config, 00092 acmod_t *acmod, 00093 dict_t *dict, 00094 dict2pid_t *d2p) 00095 { 00096 fsg_search_t *fsgs; 00097 char const *path; 00098 00099 fsgs = ckd_calloc(1, sizeof(*fsgs)); 00100 ps_search_init(ps_search_base(fsgs), &fsg_funcs, config, acmod, dict, d2p); 00101 00102 /* Initialize HMM context. */ 00103 fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), 00104 acmod->tmat->tp, NULL, acmod->mdef->sseq); 00105 if (fsgs->hmmctx == NULL) { 00106 ps_search_free(ps_search_base(fsgs)); 00107 return NULL; 00108 } 00109 00110 /* Intialize the search history object */ 00111 fsgs->history = fsg_history_init(NULL, dict); 00112 fsgs->frame = -1; 00113 00114 /* Initialize FSG table. */ 00115 fsgs->fsgs = hash_table_new(5, HASH_CASE_YES); 00116 00117 /* Get search pruning parameters */ 00118 fsgs->beam_factor = 1.0f; 00119 fsgs->beam = fsgs->beam_orig 00120 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam")) 00121 >> SENSCR_SHIFT; 00122 fsgs->pbeam = fsgs->pbeam_orig 00123 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam")) 00124 >> SENSCR_SHIFT; 00125 fsgs->wbeam = fsgs->wbeam_orig 00126 = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam")) 00127 >> SENSCR_SHIFT; 00128 00129 /* LM related weights/penalties */ 00130 fsgs->lw = cmd_ln_float32_r(config, "-lw"); 00131 fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) 00132 * fsgs->lw) 00133 >> SENSCR_SHIFT; 00134 fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) 00135 * fsgs->lw) 00136 >> SENSCR_SHIFT; 00137 00138 /* Best path search (and confidence annotation)? */ 00139 if (cmd_ln_boolean_r(config, "-bestpath")) 00140 fsgs->bestpath = TRUE; 00141 00142 /* Acoustic score scale for posterior probabilities. */ 00143 fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale"); 00144 00145 E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n", 00146 fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig, 00147 fsgs->wip, fsgs->pip); 00148 00149 /* Load an FSG if one was specified in config */ 00150 if ((path = cmd_ln_str_r(config, "-fsg"))) { 00151 fsg_model_t *fsg; 00152 00153 if ((fsg = fsg_model_readfile(path, acmod->lmath, fsgs->lw)) == NULL) 00154 goto error_out; 00155 if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) { 00156 fsg_model_free(fsg); 00157 goto error_out; 00158 } 00159 if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL) 00160 goto error_out; 00161 if (fsg_search_reinit(ps_search_base(fsgs), 00162 ps_search_dict(fsgs), 00163 ps_search_dict2pid(fsgs)) < 0) 00164 goto error_out; 00165 } 00166 /* Or load a JSGF grammar */ 00167 else if ((path = cmd_ln_str_r(config, "-jsgf"))) { 00168 fsg_model_t *fsg; 00169 jsgf_rule_t *rule; 00170 char const *toprule; 00171 00172 if ((fsgs->jsgf = jsgf_parse_file(path, NULL)) == NULL) 00173 goto error_out; 00174 00175 rule = NULL; 00176 /* Take the -toprule if specified. */ 00177 if ((toprule = cmd_ln_str_r(config, "-toprule"))) { 00178 char *anglerule; 00179 anglerule = string_join("<", toprule, ">", NULL); 00180 rule = jsgf_get_rule(fsgs->jsgf, anglerule); 00181 ckd_free(anglerule); 00182 if (rule == NULL) { 00183 E_ERROR("Start rule %s not found\n", toprule); 00184 goto error_out; 00185 } 00186 } 00187 /* Otherwise, take the first public rule. */ 00188 else { 00189 jsgf_rule_iter_t *itor; 00190 00191 for (itor = jsgf_rule_iter(fsgs->jsgf); itor; 00192 itor = jsgf_rule_iter_next(itor)) { 00193 rule = jsgf_rule_iter_rule(itor); 00194 if (jsgf_rule_public(rule)) 00195 break; 00196 } 00197 if (rule == NULL) { 00198 E_ERROR("No public rules found in %s\n", path); 00199 goto error_out; 00200 } 00201 } 00202 fsg = jsgf_build_fsg(fsgs->jsgf, rule, acmod->lmath, fsgs->lw); 00203 if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) { 00204 fsg_model_free(fsg); 00205 goto error_out; 00206 } 00207 if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL) 00208 goto error_out; 00209 if (fsg_search_reinit(ps_search_base(fsgs), 00210 ps_search_dict(fsgs), 00211 ps_search_dict2pid(fsgs)) < 0) 00212 goto error_out; 00213 } 00214 00215 return ps_search_base(fsgs); 00216 00217 error_out: 00218 fsg_search_free(ps_search_base(fsgs)); 00219 return NULL; 00220 } 00221 00222 void 00223 fsg_search_free(ps_search_t *search) 00224 { 00225 fsg_search_t *fsgs = (fsg_search_t *)search; 00226 hash_iter_t *itor; 00227 00228 ps_search_deinit(search); 00229 if (fsgs->jsgf) 00230 jsgf_grammar_free(fsgs->jsgf); 00231 fsg_lextree_free(fsgs->lextree); 00232 if (fsgs->history) { 00233 fsg_history_reset(fsgs->history); 00234 fsg_history_set_fsg(fsgs->history, NULL, NULL); 00235 fsg_history_free(fsgs->history); 00236 } 00237 if (fsgs->fsgs) { 00238 for (itor = hash_table_iter(fsgs->fsgs); 00239 itor; itor = hash_table_iter_next(itor)) { 00240 fsg_model_t *fsg = (fsg_model_t *) hash_entry_val(itor->ent); 00241 fsg_model_free(fsg); 00242 } 00243 hash_table_free(fsgs->fsgs); 00244 } 00245 hmm_context_free(fsgs->hmmctx); 00246 ckd_free(fsgs); 00247 } 00248 00249 int 00250 fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) 00251 { 00252 fsg_search_t *fsgs = (fsg_search_t *)search; 00253 00254 /* Free the old lextree */ 00255 if (fsgs->lextree) 00256 fsg_lextree_free(fsgs->lextree); 00257 00258 /* Free old dict2pid, dict */ 00259 ps_search_base_reinit(search, dict, d2p); 00260 00261 /* Nothing to update */ 00262 if (fsgs->fsg == NULL) 00263 return; 00264 00265 /* Update the number of words (not used by this module though). */ 00266 search->n_words = dict_size(dict); 00267 00268 /* Allocate new lextree for the given FSG */ 00269 fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p, 00270 ps_search_acmod(fsgs)->mdef, 00271 fsgs->hmmctx, fsgs->wip, fsgs->pip); 00272 00273 /* Inform the history module of the new fsg */ 00274 fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict); 00275 00276 return 0; 00277 } 00278 00279 00280 static int 00281 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg) 00282 { 00283 dict_t *dict; 00284 int32 wid; 00285 int n_sil; 00286 00287 dict = ps_search_dict(fsgs); 00288 /* 00289 * NOTE: Unlike N-Gram search, we do not use explicit start and 00290 * end symbols. This is because the start and end nodes are 00291 * defined in the grammar. We do add silence/filler self-loops to 00292 * all states in order to allow for silence between words and at 00293 * the beginning and end of utterances. 00294 * 00295 * This has some implications for word graph generation, namely, 00296 * that there can (and usually will) be multiple start and end 00297 * states in the word graph. We therefore do add explicit start 00298 * and end nodes to the graph. 00299 */ 00300 /* Add silence self-loops to all states. */ 00301 fsg_model_add_silence(fsg, "<sil>", -1, 00302 cmd_ln_float32_r(ps_search_config(fsgs), "-silprob")); 00303 n_sil = 0; 00304 /* Add self-loops for all other fillers. */ 00305 for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) { 00306 char const *word = dict_wordstr(dict, wid); 00307 if (wid == dict_startwid(dict) || wid == dict_finishwid(dict)) 00308 continue; 00309 fsg_model_add_silence(fsg, word, -1, 00310 cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob")); 00311 ++n_sil; 00312 } 00313 00314 return n_sil; 00315 } 00316 00317 /* Scans the dictionary and check if all words are present. */ 00318 static int 00319 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg) 00320 { 00321 dict_t *dict; 00322 int i; 00323 00324 dict = ps_search_dict(fsgs); 00325 for (i = 0; i < fsg_model_n_word(fsg); ++i) { 00326 char const *word; 00327 int32 wid; 00328 00329 word = fsg_model_word_str(fsg, i); 00330 wid = dict_wordid(dict, word); 00331 if (wid == BAD_S3WID) { 00332 E_ERROR("The word '%s' is missing in the dictionary\n", word); 00333 return FALSE; 00334 } 00335 } 00336 00337 return TRUE; 00338 } 00339 00340 static int 00341 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg) 00342 { 00343 dict_t *dict; 00344 int n_alt, n_word; 00345 int i; 00346 00347 dict = ps_search_dict(fsgs); 00348 /* Scan FSG's vocabulary for words that have alternate pronunciations. */ 00349 n_alt = 0; 00350 n_word = fsg_model_n_word(fsg); 00351 for (i = 0; i < n_word; ++i) { 00352 char const *word; 00353 int32 wid; 00354 00355 word = fsg_model_word_str(fsg, i); 00356 wid = dict_wordid(dict, word); 00357 if (wid != BAD_S3WID) { 00358 while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) { 00359 n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid)); 00360 } 00361 } 00362 } 00363 00364 E_INFO("Added %d alternate word transitions\n", n_alt); 00365 return n_alt; 00366 } 00367 00368 fsg_model_t * 00369 fsg_set_get_fsg(fsg_search_t *fsgs, const char *name) 00370 { 00371 void *val; 00372 00373 if (hash_table_lookup(fsgs->fsgs, name, &val) < 0) 00374 return NULL; 00375 return (fsg_model_t *)val; 00376 } 00377 00378 fsg_model_t * 00379 fsg_set_add(fsg_search_t *fsgs, char const *name, fsg_model_t *fsg) 00380 { 00381 if (name == NULL) 00382 name = fsg_model_name(fsg); 00383 00384 if (!fsg_search_check_dict(fsgs, fsg)) 00385 return NULL; 00386 00387 /* Add silence transitions and alternate words. */ 00388 if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusefiller") 00389 && !fsg_model_has_sil(fsg)) 00390 fsg_search_add_silences(fsgs, fsg); 00391 if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusealtpron") 00392 && !fsg_model_has_alt(fsg)) 00393 fsg_search_add_altpron(fsgs, fsg); 00394 00395 return (fsg_model_t *)hash_table_enter(fsgs->fsgs, name, fsg); 00396 } 00397 00398 00399 fsg_model_t * 00400 fsg_set_remove_byname(fsg_search_t *fsgs, char const *key) 00401 { 00402 fsg_model_t *oldfsg; 00403 void *val; 00404 00405 /* Look for the matching FSG. */ 00406 if (hash_table_lookup(fsgs->fsgs, key, &val) < 0) { 00407 E_ERROR("FSG `%s' to be deleted not found\n", key); 00408 return NULL; 00409 } 00410 oldfsg = val; 00411 00412 /* Remove it from the FSG table. */ 00413 hash_table_delete(fsgs->fsgs, key); 00414 /* If this was the currently active FSG, also delete other stuff */ 00415 if (fsgs->fsg == oldfsg) { 00416 fsg_lextree_free(fsgs->lextree); 00417 fsgs->lextree = NULL; 00418 fsg_history_set_fsg(fsgs->history, NULL, NULL); 00419 fsgs->fsg = NULL; 00420 } 00421 return oldfsg; 00422 } 00423 00424 00425 fsg_model_t * 00426 fsg_set_remove(fsg_search_t *fsgs, fsg_model_t *fsg) 00427 { 00428 char const *key; 00429 hash_iter_t *itor; 00430 00431 key = NULL; 00432 for (itor = hash_table_iter(fsgs->fsgs); 00433 itor; itor = hash_table_iter_next(itor)) { 00434 fsg_model_t *oldfsg; 00435 00436 oldfsg = (fsg_model_t *) hash_entry_val(itor->ent); 00437 if (oldfsg == fsg) { 00438 key = hash_entry_key(itor->ent); 00439 hash_table_iter_free(itor); 00440 break; 00441 } 00442 } 00443 if (key == NULL) { 00444 E_WARN("FSG '%s' to be deleted not found\n", fsg_model_name(fsg)); 00445 return NULL; 00446 } 00447 else 00448 return fsg_set_remove_byname(fsgs, key); 00449 } 00450 00451 00452 fsg_model_t * 00453 fsg_set_select(fsg_search_t *fsgs, const char *name) 00454 { 00455 fsg_model_t *fsg; 00456 00457 fsg = fsg_set_get_fsg(fsgs, name); 00458 if (fsg == NULL) { 00459 E_ERROR("FSG '%s' not known; cannot make it current\n", name); 00460 return NULL; 00461 } 00462 fsgs->fsg = fsg; 00463 return fsg; 00464 } 00465 00466 fsg_set_iter_t * 00467 fsg_set_iter(fsg_set_t *fsgs) 00468 { 00469 return hash_table_iter(fsgs->fsgs); 00470 } 00471 00472 fsg_set_iter_t * 00473 fsg_set_iter_next(fsg_set_iter_t *itor) 00474 { 00475 return hash_table_iter_next(itor); 00476 } 00477 00478 fsg_model_t * 00479 fsg_set_iter_fsg(fsg_set_iter_t *itor) 00480 { 00481 return ((fsg_model_t *)itor->ent->val); 00482 } 00483 00484 void 00485 fsg_set_iter_free(fsg_set_iter_t *itor) 00486 { 00487 hash_table_iter_free(itor); 00488 } 00489 00490 static void 00491 fsg_search_sen_active(fsg_search_t *fsgs) 00492 { 00493 gnode_t *gn; 00494 fsg_pnode_t *pnode; 00495 hmm_t *hmm; 00496 00497 acmod_clear_active(ps_search_acmod(fsgs)); 00498 00499 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { 00500 pnode = (fsg_pnode_t *) gnode_ptr(gn); 00501 hmm = fsg_pnode_hmmptr(pnode); 00502 assert(hmm_frame(hmm) == fsgs->frame); 00503 acmod_activate_hmm(ps_search_acmod(fsgs), hmm); 00504 } 00505 } 00506 00507 00508 /* 00509 * Evaluate all the active HMMs. 00510 * (Executed once per frame.) 00511 */ 00512 static void 00513 fsg_search_hmm_eval(fsg_search_t *fsgs) 00514 { 00515 gnode_t *gn; 00516 fsg_pnode_t *pnode; 00517 hmm_t *hmm; 00518 int32 bestscore; 00519 int32 n, maxhmmpf; 00520 00521 bestscore = WORST_SCORE; 00522 00523 if (!fsgs->pnode_active) { 00524 E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame); 00525 return; 00526 } 00527 00528 for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) { 00529 int32 score; 00530 00531 pnode = (fsg_pnode_t *) gnode_ptr(gn); 00532 hmm = fsg_pnode_hmmptr(pnode); 00533 assert(hmm_frame(hmm) == fsgs->frame); 00534 00535 #if __FSG_DBG__ 00536 E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode, 00537 fsgs->frame); 00538 hmm_dump(hmm, stdout); 00539 #endif 00540 score = hmm_vit_eval(hmm); 00541 #if __FSG_DBG_CHAN__ 00542 E_INFO("pnode(%08x) after eval @frm %5d\n", 00543 (int32) pnode, fsgs->frame); 00544 hmm_dump(hmm, stdout); 00545 #endif 00546 00547 if (score BETTER_THAN bestscore) 00548 bestscore = score; 00549 } 00550 00551 #if __FSG_DBG__ 00552 E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore); 00553 #endif 00554 fsgs->n_hmm_eval += n; 00555 00556 /* Adjust beams if #active HMMs larger than absolute threshold */ 00557 maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf"); 00558 if (maxhmmpf != -1 && n > maxhmmpf) { 00559 /* 00560 * Too many HMMs active; reduce the beam factor applied to the default 00561 * beams, but not if the factor is already at a floor (0.1). 00562 */ 00563 if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */ 00564 fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */ 00565 fsgs->beam = 00566 (int32) (fsgs->beam_orig * fsgs->beam_factor); 00567 fsgs->pbeam = 00568 (int32) (fsgs->pbeam_orig * fsgs->beam_factor); 00569 fsgs->wbeam = 00570 (int32) (fsgs->wbeam_orig * fsgs->beam_factor); 00571 } 00572 } 00573 else { 00574 fsgs->beam_factor = 1.0f; 00575 fsgs->beam = fsgs->beam_orig; 00576 fsgs->pbeam = fsgs->pbeam_orig; 00577 fsgs->wbeam = fsgs->wbeam_orig; 00578 } 00579 00580 if (n > fsg_lextree_n_pnode(fsgs->lextree)) 00581 E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n", 00582 fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree)); 00583 00584 fsgs->bestscore = bestscore; 00585 } 00586 00587 00588 static void 00589 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode) 00590 { 00591 fsg_pnode_t *child; 00592 hmm_t *hmm; 00593 int32 newscore, thresh, nf; 00594 00595 assert(pnode); 00596 assert(!fsg_pnode_leaf(pnode)); 00597 00598 nf = fsgs->frame + 1; 00599 thresh = fsgs->bestscore + fsgs->beam; 00600 00601 hmm = fsg_pnode_hmmptr(pnode); 00602 00603 for (child = fsg_pnode_succ(pnode); 00604 child; child = fsg_pnode_sibling(child)) { 00605 newscore = hmm_out_score(hmm) + child->logs2prob; 00606 00607 if ((newscore BETTER_THAN thresh) 00608 && (newscore BETTER_THAN hmm_in_score(&child->hmm))) { 00609 /* Incoming score > pruning threshold and > target's existing score */ 00610 if (hmm_frame(&child->hmm) < nf) { 00611 /* Child node not yet activated; do so */ 00612 fsgs->pnode_active_next = 00613 glist_add_ptr(fsgs->pnode_active_next, 00614 (void *) child); 00615 } 00616 00617 hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf); 00618 } 00619 } 00620 } 00621 00622 00623 static void 00624 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode) 00625 { 00626 hmm_t *hmm; 00627 fsg_link_t *fl; 00628 int32 wid; 00629 fsg_pnode_ctxt_t ctxt; 00630 00631 assert(pnode); 00632 assert(fsg_pnode_leaf(pnode)); 00633 00634 hmm = fsg_pnode_hmmptr(pnode); 00635 fl = fsg_pnode_fsglink(pnode); 00636 assert(fl); 00637 00638 wid = fsg_link_wid(fl); 00639 assert(wid >= 0); 00640 00641 #if __FSG_DBG__ 00642 E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n", 00643 fsgs->frame, (int32) pnode, 00644 hmm_out_score(hmm), hmm_out_history(hmm)); 00645 #endif 00646 00647 /* 00648 * Check if this is filler or single phone word; these do not model right 00649 * context (i.e., the exit score applies to all right contexts). 00650 */ 00651 if (fsg_model_is_filler(fsgs->fsg, wid) 00652 /* FIXME: This might be slow due to repeated calls to dict_to_id(). */ 00653 || (dict_is_single_phone(ps_search_dict(fsgs), 00654 dict_wordid(ps_search_dict(fsgs), 00655 fsg_model_word_str(fsgs->fsg, wid))))) { 00656 /* Create a dummy context structure that applies to all right contexts */ 00657 fsg_pnode_add_all_ctxt(&ctxt); 00658 00659 /* Create history table entry for this word exit */ 00660 fsg_history_entry_add(fsgs->history, 00661 fl, 00662 fsgs->frame, 00663 hmm_out_score(hmm), 00664 hmm_out_history(hmm), 00665 pnode->ci_ext, ctxt); 00666 00667 } 00668 else { 00669 /* Create history table entry for this word exit */ 00670 fsg_history_entry_add(fsgs->history, 00671 fl, 00672 fsgs->frame, 00673 hmm_out_score(hmm), 00674 hmm_out_history(hmm), 00675 pnode->ci_ext, pnode->ctxt); 00676 } 00677 } 00678 00679 00680 /* 00681 * (Beam) prune the just evaluated HMMs, determine which ones remain 00682 * active, which ones transition to successors, which ones exit and 00683 * terminate in their respective destination FSM states. 00684 * (Executed once per frame.) 00685 */ 00686 static void 00687 fsg_search_hmm_prune_prop(fsg_search_t *fsgs) 00688 { 00689 gnode_t *gn; 00690 fsg_pnode_t *pnode; 00691 hmm_t *hmm; 00692 int32 thresh, word_thresh, phone_thresh; 00693 00694 assert(fsgs->pnode_active_next == NULL); 00695 00696 thresh = fsgs->bestscore + fsgs->beam; 00697 phone_thresh = fsgs->bestscore + fsgs->pbeam; 00698 word_thresh = fsgs->bestscore + fsgs->wbeam; 00699 00700 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { 00701 pnode = (fsg_pnode_t *) gnode_ptr(gn); 00702 hmm = fsg_pnode_hmmptr(pnode); 00703 00704 if (hmm_bestscore(hmm) >= thresh) { 00705 /* Keep this HMM active in the next frame */ 00706 if (hmm_frame(hmm) == fsgs->frame) { 00707 hmm_frame(hmm) = fsgs->frame + 1; 00708 fsgs->pnode_active_next = 00709 glist_add_ptr(fsgs->pnode_active_next, 00710 (void *) pnode); 00711 } 00712 else { 00713 assert(hmm_frame(hmm) == fsgs->frame + 1); 00714 } 00715 00716 if (!fsg_pnode_leaf(pnode)) { 00717 if (hmm_out_score(hmm) >= phone_thresh) { 00718 /* Transition out of this phone into its children */ 00719 fsg_search_pnode_trans(fsgs, pnode); 00720 } 00721 } 00722 else { 00723 if (hmm_out_score(hmm) >= word_thresh) { 00724 /* Transition out of leaf node into destination FSG state */ 00725 fsg_search_pnode_exit(fsgs, pnode); 00726 } 00727 } 00728 } 00729 } 00730 } 00731 00732 00733 /* 00734 * Propagate newly created history entries through null transitions. 00735 */ 00736 static void 00737 fsg_search_null_prop(fsg_search_t *fsgs) 00738 { 00739 int32 bpidx, n_entries, thresh, newscore; 00740 fsg_hist_entry_t *hist_entry; 00741 fsg_link_t *l; 00742 int32 s; 00743 fsg_model_t *fsg; 00744 00745 fsg = fsgs->fsg; 00746 thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */ 00747 00748 n_entries = fsg_history_n_entries(fsgs->history); 00749 00750 for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) { 00751 fsg_arciter_t *itor; 00752 hist_entry = fsg_history_entry_get(fsgs->history, bpidx); 00753 00754 l = fsg_hist_entry_fsglink(hist_entry); 00755 00756 /* Destination FSG state for history entry */ 00757 s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg); 00758 00759 /* 00760 * Check null transitions from d to all other states. (Only need to 00761 * propagate one step, since FSG contains transitive closure of null 00762 * transitions.) 00763 */ 00764 /* Add all links from from_state to dst */ 00765 for (itor = fsg_model_arcs(fsg, s); itor; 00766 itor = fsg_arciter_next(itor)) { 00767 fsg_link_t *l = fsg_arciter_get(itor); 00768 00769 /* FIXME: Need to deal with tag transitions somehow. */ 00770 if (fsg_link_wid(l) != -1) 00771 continue; 00772 newscore = 00773 fsg_hist_entry_score(hist_entry) + 00774 (fsg_link_logs2prob(l) >> SENSCR_SHIFT); 00775 00776 if (newscore >= thresh) { 00777 fsg_history_entry_add(fsgs->history, l, 00778 fsg_hist_entry_frame(hist_entry), 00779 newscore, 00780 bpidx, 00781 fsg_hist_entry_lc(hist_entry), 00782 fsg_hist_entry_rc(hist_entry)); 00783 } 00784 } 00785 } 00786 } 00787 00788 00789 /* 00790 * Perform cross-word transitions; propagate each history entry created in this 00791 * frame to lextree roots attached to the target FSG state for that entry. 00792 */ 00793 static void 00794 fsg_search_word_trans(fsg_search_t *fsgs) 00795 { 00796 int32 bpidx, n_entries; 00797 fsg_hist_entry_t *hist_entry; 00798 fsg_link_t *l; 00799 int32 score, newscore, thresh, nf, d; 00800 fsg_pnode_t *root; 00801 int32 lc, rc; 00802 00803 n_entries = fsg_history_n_entries(fsgs->history); 00804 00805 thresh = fsgs->bestscore + fsgs->beam; 00806 nf = fsgs->frame + 1; 00807 00808 for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) { 00809 hist_entry = fsg_history_entry_get(fsgs->history, bpidx); 00810 assert(hist_entry); 00811 score = fsg_hist_entry_score(hist_entry); 00812 assert(fsgs->frame == fsg_hist_entry_frame(hist_entry)); 00813 00814 l = fsg_hist_entry_fsglink(hist_entry); 00815 00816 /* Destination state for hist_entry */ 00817 d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs-> 00818 fsg); 00819 00820 lc = fsg_hist_entry_lc(hist_entry); 00821 00822 /* Transition to all root nodes attached to state d */ 00823 for (root = fsg_lextree_root(fsgs->lextree, d); 00824 root; root = root->sibling) { 00825 rc = root->ci_ext; 00826 00827 if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) && 00828 (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) { 00829 /* 00830 * Last CIphone of history entry is in left-context list supported by 00831 * target root node, and 00832 * first CIphone of target root node is in right context list supported 00833 * by history entry; 00834 * So the transition can go ahead (if new score is good enough). 00835 */ 00836 newscore = score + root->logs2prob; 00837 00838 if ((newscore BETTER_THAN thresh) 00839 && (newscore BETTER_THAN hmm_in_score(&root->hmm))) { 00840 if (hmm_frame(&root->hmm) < nf) { 00841 /* Newly activated node; add to active list */ 00842 fsgs->pnode_active_next = 00843 glist_add_ptr(fsgs->pnode_active_next, 00844 (void *) root); 00845 #if __FSG_DBG__ 00846 E_INFO 00847 ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n", 00848 fsgs->frame, bpidx, (int32) root); 00849 #endif 00850 } 00851 else { 00852 #if __FSG_DBG__ 00853 E_INFO 00854 ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n", 00855 fsgs->frame, bpidx, (int32) root); 00856 #endif 00857 } 00858 00859 hmm_enter(&root->hmm, newscore, bpidx, nf); 00860 } 00861 } 00862 } 00863 } 00864 } 00865 00866 00867 int 00868 fsg_search_step(ps_search_t *search, int frame_idx) 00869 { 00870 fsg_search_t *fsgs = (fsg_search_t *)search; 00871 int16 const *senscr; 00872 acmod_t *acmod = search->acmod; 00873 gnode_t *gn; 00874 fsg_pnode_t *pnode; 00875 hmm_t *hmm; 00876 00877 /* Activate our HMMs for the current frame if need be. */ 00878 if (!acmod->compallsen) 00879 fsg_search_sen_active(fsgs); 00880 /* Compute GMM scores for the current frame. */ 00881 senscr = acmod_score(acmod, &frame_idx); 00882 fsgs->n_sen_eval += acmod->n_senone_active; 00883 hmm_context_set_senscore(fsgs->hmmctx, senscr); 00884 00885 /* Mark backpointer table for current frame. */ 00886 fsgs->bpidx_start = fsg_history_n_entries(fsgs->history); 00887 00888 /* Evaluate all active pnodes (HMMs) */ 00889 fsg_search_hmm_eval(fsgs); 00890 00891 /* 00892 * Prune and propagate the HMMs evaluated; create history entries for 00893 * word exits. The words exits are tentative, and may be pruned; make 00894 * the survivors permanent via fsg_history_end_frame(). 00895 */ 00896 fsg_search_hmm_prune_prop(fsgs); 00897 fsg_history_end_frame(fsgs->history); 00898 00899 /* 00900 * Propagate new history entries through any null transitions, creating 00901 * new history entries, and then make the survivors permanent. 00902 */ 00903 fsg_search_null_prop(fsgs); 00904 fsg_history_end_frame(fsgs->history); 00905 00906 /* 00907 * Perform cross-word transitions; propagate each history entry across its 00908 * terminating state to the root nodes of the lextree attached to the state. 00909 */ 00910 fsg_search_word_trans(fsgs); 00911 00912 /* 00913 * We've now come full circle, HMM and FSG states have been updated for 00914 * the next frame. 00915 * Update the active lists, deactivate any currently active HMMs that 00916 * did not survive into the next frame 00917 */ 00918 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { 00919 pnode = (fsg_pnode_t *) gnode_ptr(gn); 00920 hmm = fsg_pnode_hmmptr(pnode); 00921 00922 if (hmm_frame(hmm) == fsgs->frame) { 00923 /* This HMM NOT activated for the next frame; reset it */ 00924 fsg_psubtree_pnode_deactivate(pnode); 00925 } 00926 else { 00927 assert(hmm_frame(hmm) == (fsgs->frame + 1)); 00928 } 00929 } 00930 00931 /* Free the currently active list */ 00932 glist_free(fsgs->pnode_active); 00933 00934 /* Make the next-frame active list the current one */ 00935 fsgs->pnode_active = fsgs->pnode_active_next; 00936 fsgs->pnode_active_next = NULL; 00937 00938 /* End of this frame; ready for the next */ 00939 ++fsgs->frame; 00940 00941 return 1; 00942 } 00943 00944 00945 /* 00946 * Set all HMMs to inactive, clear active lists, initialize FSM start 00947 * state to be the only active node. 00948 * (Executed at the start of each utterance.) 00949 */ 00950 int 00951 fsg_search_start(ps_search_t *search) 00952 { 00953 fsg_search_t *fsgs = (fsg_search_t *)search; 00954 int32 silcipid; 00955 fsg_pnode_ctxt_t ctxt; 00956 00957 /* Reset dynamic adjustment factor for beams */ 00958 fsgs->beam_factor = 1.0f; 00959 fsgs->beam = fsgs->beam_orig; 00960 fsgs->pbeam = fsgs->pbeam_orig; 00961 fsgs->wbeam = fsgs->wbeam_orig; 00962 00963 silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL"); 00964 00965 /* Initialize EVERYTHING to be inactive */ 00966 assert(fsgs->pnode_active == NULL); 00967 assert(fsgs->pnode_active_next == NULL); 00968 00969 fsg_history_reset(fsgs->history); 00970 fsg_history_utt_start(fsgs->history); 00971 fsgs->final = FALSE; 00972 00973 /* Dummy context structure that allows all right contexts to use this entry */ 00974 fsg_pnode_add_all_ctxt(&ctxt); 00975 00976 /* Create dummy history entry leading to start state */ 00977 fsgs->frame = -1; 00978 fsgs->bestscore = 0; 00979 fsg_history_entry_add(fsgs->history, 00980 NULL, -1, 0, -1, silcipid, ctxt); 00981 fsgs->bpidx_start = 0; 00982 00983 /* Propagate dummy history entry through NULL transitions from start state */ 00984 fsg_search_null_prop(fsgs); 00985 00986 /* Perform word transitions from this dummy history entry */ 00987 fsg_search_word_trans(fsgs); 00988 00989 /* Make the next-frame active list the current one */ 00990 fsgs->pnode_active = fsgs->pnode_active_next; 00991 fsgs->pnode_active_next = NULL; 00992 00993 ++fsgs->frame; 00994 00995 fsgs->n_hmm_eval = 0; 00996 fsgs->n_sen_eval = 0; 00997 00998 return 0; 00999 } 01000 01001 /* 01002 * Cleanup at the end of each utterance. 01003 */ 01004 int 01005 fsg_search_finish(ps_search_t *search) 01006 { 01007 fsg_search_t *fsgs = (fsg_search_t *)search; 01008 gnode_t *gn; 01009 fsg_pnode_t *pnode; 01010 int32 n_hist; 01011 01012 /* Deactivate all nodes in the current and next-frame active lists */ 01013 for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { 01014 pnode = (fsg_pnode_t *) gnode_ptr(gn); 01015 fsg_psubtree_pnode_deactivate(pnode); 01016 } 01017 for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) { 01018 pnode = (fsg_pnode_t *) gnode_ptr(gn); 01019 fsg_psubtree_pnode_deactivate(pnode); 01020 } 01021 01022 glist_free(fsgs->pnode_active); 01023 fsgs->pnode_active = NULL; 01024 glist_free(fsgs->pnode_active_next); 01025 fsgs->pnode_active_next = NULL; 01026 01027 fsgs->final = TRUE; 01028 01029 n_hist = fsg_history_n_entries(fsgs->history); 01030 E_INFO 01031 ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n", 01032 fsgs->frame, fsgs->n_hmm_eval, 01033 (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0, 01034 fsgs->n_sen_eval, 01035 (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0, 01036 n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0); 01037 01038 return 0; 01039 } 01040 01041 static int 01042 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score) 01043 { 01044 fsg_hist_entry_t *hist_entry; 01045 fsg_model_t *fsg; 01046 int bpidx, frm, last_frm, besthist; 01047 int32 bestscore; 01048 01049 if (frame_idx == -1) 01050 frame_idx = fsgs->frame - 1; 01051 last_frm = frm = frame_idx; 01052 01053 /* Scan backwards to find a word exit in frame_idx. */ 01054 bpidx = fsg_history_n_entries(fsgs->history) - 1; 01055 while (bpidx > 0) { 01056 hist_entry = fsg_history_entry_get(fsgs->history, bpidx); 01057 if (fsg_hist_entry_frame(hist_entry) <= frame_idx) { 01058 frm = last_frm = fsg_hist_entry_frame(hist_entry); 01059 break; 01060 } 01061 } 01062 01063 /* No hypothesis (yet). */ 01064 if (bpidx <= 0) 01065 return bpidx; 01066 01067 /* Now find best word exit in this frame. */ 01068 bestscore = INT_MIN; 01069 besthist = -1; 01070 fsg = fsgs->fsg; 01071 while (frm == last_frm) { 01072 fsg_link_t *fl; 01073 int32 score; 01074 01075 fl = fsg_hist_entry_fsglink(hist_entry); 01076 score = fsg_hist_entry_score(hist_entry); 01077 01078 if (fl == NULL) 01079 break; 01080 01081 if (score BETTER_THAN bestscore) { 01082 /* Only enforce the final state constraint if this is a final hypothesis. */ 01083 if ((!final) 01084 || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) { 01085 bestscore = score; 01086 besthist = bpidx; 01087 } 01088 } 01089 01090 --bpidx; 01091 if (bpidx < 0) 01092 break; 01093 hist_entry = fsg_history_entry_get(fsgs->history, bpidx); 01094 frm = fsg_hist_entry_frame(hist_entry); 01095 } 01096 01097 /* Final state not reached. */ 01098 if (besthist == -1) { 01099 E_ERROR("Final state not reached in frame %d\n", frame_idx); 01100 return -1; 01101 } 01102 01103 /* This here's the one we want. */ 01104 if (out_score) 01105 *out_score = bestscore; 01106 return besthist; 01107 } 01108 01109 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */ 01110 static ps_latlink_t * 01111 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward) 01112 { 01113 fsg_search_t *fsgs = (fsg_search_t *)search; 01114 01115 if (search->last_link == NULL) { 01116 search->last_link = ps_lattice_bestpath(search->dag, NULL, 01117 1.0, fsgs->ascale); 01118 if (search->last_link == NULL) 01119 return NULL; 01120 /* Also calculate betas so we can fill in the posterior 01121 * probability field in the segmentation. */ 01122 if (search->post == 0) 01123 search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale); 01124 } 01125 if (out_score) 01126 *out_score = search->last_link->path_scr + search->dag->final_node_ascr; 01127 return search->last_link; 01128 } 01129 01130 char const * 01131 fsg_search_hyp(ps_search_t *search, int32 *out_score) 01132 { 01133 fsg_search_t *fsgs = (fsg_search_t *)search; 01134 dict_t *dict = ps_search_dict(search); 01135 char *c; 01136 size_t len; 01137 int bp, bpidx; 01138 01139 /* Get last backpointer table index. */ 01140 bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score); 01141 /* No hypothesis (yet). */ 01142 if (bpidx <= 0) 01143 return NULL; 01144 01145 /* If bestpath is enabled and the utterance is complete, then run it. */ 01146 if (fsgs->bestpath && fsgs->final) { 01147 ps_lattice_t *dag; 01148 ps_latlink_t *link; 01149 01150 if ((dag = fsg_search_lattice(search)) == NULL) { 01151 E_WARN("Failed to obtain the lattice while bestpath enabled\n"); 01152 return NULL; 01153 } 01154 if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) { 01155 E_WARN("Failed to bestpath in a lattice\n"); 01156 return NULL; 01157 } 01158 return ps_lattice_hyp(dag, link); 01159 } 01160 01161 bp = bpidx; 01162 len = 0; 01163 while (bp > 0) { 01164 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); 01165 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); 01166 char const *baseword; 01167 int32 wid; 01168 01169 bp = fsg_hist_entry_pred(hist_entry); 01170 wid = fsg_link_wid(fl); 01171 if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) 01172 continue; 01173 baseword = dict_basestr(dict, 01174 dict_wordid(dict, 01175 fsg_model_word_str(fsgs->fsg, wid))); 01176 len += strlen(baseword) + 1; 01177 } 01178 01179 ckd_free(search->hyp_str); 01180 if (len == 0) { 01181 search->hyp_str = NULL; 01182 return search->hyp_str; 01183 } 01184 search->hyp_str = ckd_calloc(1, len); 01185 01186 bp = bpidx; 01187 c = search->hyp_str + len - 1; 01188 while (bp > 0) { 01189 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); 01190 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); 01191 char const *baseword; 01192 int32 wid; 01193 01194 bp = fsg_hist_entry_pred(hist_entry); 01195 wid = fsg_link_wid(fl); 01196 if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) 01197 continue; 01198 baseword = dict_basestr(dict, 01199 dict_wordid(dict, 01200 fsg_model_word_str(fsgs->fsg, wid))); 01201 len = strlen(baseword); 01202 c -= len; 01203 memcpy(c, baseword, len); 01204 if (c > search->hyp_str) { 01205 --c; 01206 *c = ' '; 01207 } 01208 } 01209 01210 return search->hyp_str; 01211 } 01212 01213 static void 01214 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry) 01215 { 01216 fsg_search_t *fsgs = (fsg_search_t *)seg->search; 01217 fsg_hist_entry_t *ph = NULL; 01218 int32 bp; 01219 01220 if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0) 01221 ph = fsg_history_entry_get(fsgs->history, bp); 01222 seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid); 01223 seg->ef = fsg_hist_entry_frame(hist_entry); 01224 seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0; 01225 /* This is kind of silly but it happens for null transitions. */ 01226 if (seg->sf > seg->ef) seg->sf = seg->ef; 01227 seg->prob = 0; /* Bogus value... */ 01228 /* "Language model" score = transition probability. */ 01229 seg->lback = 1; 01230 seg->lscr = hist_entry->fsglink->logs2prob; 01231 if (ph) { 01232 /* FIXME: Not sure exactly how cross-word triphones are handled. */ 01233 seg->ascr = hist_entry->score - ph->score - seg->lscr; 01234 } 01235 else 01236 seg->ascr = hist_entry->score - seg->lscr; 01237 } 01238 01239 01240 static void 01241 fsg_seg_free(ps_seg_t *seg) 01242 { 01243 fsg_seg_t *itor = (fsg_seg_t *)seg; 01244 ckd_free(itor->hist); 01245 ckd_free(itor); 01246 } 01247 01248 static ps_seg_t * 01249 fsg_seg_next(ps_seg_t *seg) 01250 { 01251 fsg_seg_t *itor = (fsg_seg_t *)seg; 01252 01253 if (++itor->cur == itor->n_hist) { 01254 fsg_seg_free(seg); 01255 return NULL; 01256 } 01257 01258 fsg_seg_bp2itor(seg, itor->hist[itor->cur]); 01259 return seg; 01260 } 01261 01262 static ps_segfuncs_t fsg_segfuncs = { 01263 /* seg_next */ fsg_seg_next, 01264 /* seg_free */ fsg_seg_free 01265 }; 01266 01267 static ps_seg_t * 01268 fsg_search_seg_iter(ps_search_t *search, int32 *out_score) 01269 { 01270 fsg_search_t *fsgs = (fsg_search_t *)search; 01271 fsg_seg_t *itor; 01272 int bp, bpidx, cur; 01273 01274 bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score); 01275 /* No hypothesis (yet). */ 01276 if (bpidx <= 0) 01277 return NULL; 01278 01279 /* If bestpath is enabled and the utterance is complete, then run it. */ 01280 if (fsgs->bestpath && fsgs->final) { 01281 ps_lattice_t *dag; 01282 ps_latlink_t *link; 01283 01284 if ((dag = fsg_search_lattice(search)) == NULL) 01285 return NULL; 01286 if ((link = fsg_search_bestpath(search, out_score, TRUE)) == NULL) 01287 return NULL; 01288 return ps_lattice_seg_iter(dag, link, 1.0); 01289 } 01290 01291 /* Calling this an "iterator" is a bit of a misnomer since we have 01292 * to get the entire backtrace in order to produce it. On the 01293 * other hand, all we actually need is the bptbl IDs, and we can 01294 * allocate a fixed-size array of them. */ 01295 itor = ckd_calloc(1, sizeof(*itor)); 01296 itor->base.vt = &fsg_segfuncs; 01297 itor->base.search = search; 01298 itor->base.lwf = 1.0; 01299 itor->n_hist = 0; 01300 bp = bpidx; 01301 while (bp > 0) { 01302 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); 01303 bp = fsg_hist_entry_pred(hist_entry); 01304 ++itor->n_hist; 01305 } 01306 if (itor->n_hist == 0) { 01307 ckd_free(itor); 01308 return NULL; 01309 } 01310 itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist)); 01311 cur = itor->n_hist - 1; 01312 bp = bpidx; 01313 while (bp > 0) { 01314 fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); 01315 itor->hist[cur] = hist_entry; 01316 bp = fsg_hist_entry_pred(hist_entry); 01317 --cur; 01318 } 01319 01320 /* Fill in relevant fields for first element. */ 01321 fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]); 01322 01323 return (ps_seg_t *)itor; 01324 } 01325 01326 static int 01327 fsg_search_prob(ps_search_t *search) 01328 { 01329 fsg_search_t *fsgs = (fsg_search_t *)search; 01330 01331 /* If bestpath is enabled and the utterance is complete, then run it. */ 01332 if (fsgs->bestpath && fsgs->final) { 01333 ps_lattice_t *dag; 01334 ps_latlink_t *link; 01335 01336 if ((dag = fsg_search_lattice(search)) == NULL) 01337 return 0; 01338 if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL) 01339 return 0; 01340 return search->post; 01341 } 01342 else { 01343 /* FIXME: Give some kind of good estimate here, eventually. */ 01344 return 0; 01345 } 01346 } 01347 01348 static ps_latnode_t * 01349 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 ascr) 01350 { 01351 ps_latnode_t *node; 01352 01353 for (node = dag->nodes; node; node = node->next) 01354 if (node->sf == sf && node->wid == wid) 01355 break; 01356 01357 if (node) { 01358 /* Update end frames. */ 01359 if (node->lef == -1 || node->lef < ef) 01360 node->lef = ef; 01361 if (node->fef == -1 || node->fef > ef) 01362 node->fef = ef; 01363 /* Update best link score. */ 01364 if (ascr BETTER_THAN node->info.best_exit) 01365 node->info.best_exit = ascr; 01366 } 01367 else { 01368 /* New node; link to head of list */ 01369 node = listelem_malloc(dag->latnode_alloc); 01370 node->wid = wid; 01371 node->sf = sf; 01372 node->fef = node->lef = ef; 01373 node->reachable = FALSE; 01374 node->entries = NULL; 01375 node->exits = NULL; 01376 node->info.best_exit = ascr; 01377 01378 node->next = dag->nodes; 01379 dag->nodes = node; 01380 ++dag->n_nodes; 01381 } 01382 01383 return node; 01384 } 01385 01386 static ps_latnode_t * 01387 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid) 01388 { 01389 ps_latnode_t *node; 01390 01391 for (node = dag->nodes; node; node = node->next) 01392 if (node->sf == sf && node->wid == wid) 01393 break; 01394 return node; 01395 } 01396 01397 static ps_latnode_t * 01398 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag) 01399 { 01400 ps_latnode_t *node; 01401 glist_t start = NULL; 01402 int nstart = 0; 01403 01404 /* Look for all nodes starting in frame zero with some exits. */ 01405 for (node = dag->nodes; node; node = node->next) { 01406 if (node->sf == 0 && node->exits) { 01407 E_INFO("Start node %s.%d:%d:%d\n", 01408 fsg_model_word_str(fsgs->fsg, node->wid), 01409 node->sf, node->fef, node->lef); 01410 start = glist_add_ptr(start, node); 01411 ++nstart; 01412 } 01413 } 01414 01415 /* If there was more than one start node candidate, then we need 01416 * to create an artificial start node with epsilon transitions to 01417 * all of them. */ 01418 if (nstart == 1) { 01419 node = gnode_ptr(start); 01420 } 01421 else { 01422 gnode_t *st; 01423 int wid; 01424 01425 wid = fsg_model_word_add(fsgs->fsg, "<s>"); 01426 if (fsgs->fsg->silwords) 01427 bitvec_set(fsgs->fsg->silwords, wid); 01428 node = new_node(dag, fsgs->fsg, 0, 0, wid, 0); 01429 for (st = start; st; st = gnode_next(st)) 01430 ps_lattice_link(dag, node, gnode_ptr(st), 0, 0); 01431 } 01432 glist_free(start); 01433 return node; 01434 } 01435 01436 static ps_latnode_t * 01437 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag) 01438 { 01439 ps_latnode_t *node; 01440 glist_t end = NULL; 01441 int nend = 0; 01442 01443 /* Look for all nodes ending in last frame with some entries. */ 01444 for (node = dag->nodes; node; node = node->next) { 01445 if (node->lef == dag->n_frames - 1 && node->entries) { 01446 E_INFO("End node %s.%d:%d:%d (%d)\n", 01447 fsg_model_word_str(fsgs->fsg, node->wid), 01448 node->sf, node->fef, node->lef, node->info.best_exit); 01449 end = glist_add_ptr(end, node); 01450 ++nend; 01451 } 01452 } 01453 01454 if (nend == 1) { 01455 node = gnode_ptr(end); 01456 } 01457 else if (nend == 0) { 01458 ps_latnode_t *last = NULL; 01459 int ef = 0; 01460 01461 /* If there were no end node candidates, then just use the 01462 * node with the last exit frame. */ 01463 for (node = dag->nodes; node; node = node->next) { 01464 if (node->lef > ef && node->entries) { 01465 last = node; 01466 ef = node->lef; 01467 } 01468 } 01469 node = last; 01470 if (node) 01471 E_INFO("End node %s.%d:%d:%d (%d)\n", 01472 fsg_model_word_str(fsgs->fsg, node->wid), 01473 node->sf, node->fef, node->lef, node->info.best_exit); 01474 } 01475 else { 01476 /* If there was more than one end node candidate, then we need 01477 * to create an artificial end node with epsilon transitions 01478 * out of all of them. */ 01479 gnode_t *st; 01480 int wid; 01481 01482 wid = fsg_model_word_add(fsgs->fsg, "</s>"); 01483 if (fsgs->fsg->silwords) 01484 bitvec_set(fsgs->fsg->silwords, wid); 01485 node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, 0); 01486 /* Use the "best" (in reality it will be the only) exit link 01487 * score from this final node as the link score. */ 01488 for (st = end; st; st = gnode_next(st)) { 01489 ps_latnode_t *src = gnode_ptr(st); 01490 ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame); 01491 } 01492 } 01493 glist_free(end); 01494 return node; 01495 } 01496 01497 static void 01498 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end) 01499 { 01500 glist_t q; 01501 01502 /* It doesn't matter which order we do this in. */ 01503 end->reachable = TRUE; 01504 q = glist_add_ptr(NULL, end); 01505 while (q) { 01506 ps_latnode_t *node = gnode_ptr(q); 01507 latlink_list_t *x; 01508 01509 /* Pop the front of the list. */ 01510 q = gnode_free(q, NULL); 01511 /* Expand all its predecessors that haven't been seen yet. */ 01512 for (x = node->entries; x; x = x->next) { 01513 ps_latnode_t *next = x->link->from; 01514 if (!next->reachable) { 01515 next->reachable = TRUE; 01516 q = glist_add_ptr(q, next); 01517 } 01518 } 01519 } 01520 } 01521 01530 static ps_lattice_t * 01531 fsg_search_lattice(ps_search_t *search) 01532 { 01533 fsg_search_t *fsgs; 01534 fsg_model_t *fsg; 01535 ps_latnode_t *node; 01536 ps_lattice_t *dag; 01537 int32 i, n; 01538 01539 fsgs = (fsg_search_t *)search; 01540 01541 /* Check to see if a lattice has previously been created over the 01542 * same number of frames, and reuse it if so. */ 01543 if (search->dag && search->dag->n_frames == fsgs->frame) 01544 return search->dag; 01545 01546 /* Nope, create a new one. */ 01547 ps_lattice_free(search->dag); 01548 search->dag = NULL; 01549 dag = ps_lattice_init_search(search, fsgs->frame); 01550 fsg = fsgs->fsg; 01551 01552 /* 01553 * Each history table entry represents a link in the word graph. 01554 * The set of nodes is determined by the number of unique 01555 * (word,start-frame) pairs in the history table. So we will 01556 * first find all those nodes. 01557 */ 01558 n = fsg_history_n_entries(fsgs->history); 01559 for (i = 0; i < n; ++i) { 01560 fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i); 01561 ps_latnode_t *node; 01562 int32 ascr; 01563 int sf; 01564 01565 /* Skip null transitions. */ 01566 if (fh->fsglink == NULL || fh->fsglink->wid == -1) 01567 continue; 01568 01569 /* Find the start node of this link. */ 01570 if (fh->pred) { 01571 fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred); 01572 /* FIXME: We include the transition score in the lattice 01573 * link score. This is because of the practical 01574 * difficulty of obtaining it separately in bestpath or 01575 * forward-backward search, and because it is essentially 01576 * a unigram probability, so there is no need to treat it 01577 * separately from the acoustic score. However, it's not 01578 * clear that this will actually yield correct results.*/ 01579 ascr = fh->score - pfh->score; 01580 sf = pfh->frame + 1; 01581 } 01582 else { 01583 ascr = fh->score; 01584 sf = 0; 01585 } 01586 01587 /* 01588 * Note that although scores are tied to links rather than 01589 * nodes, it's possible that there are no links out of the 01590 * destination node, and thus we need to preserve its score in 01591 * case it turns out to be utterance-final. 01592 */ 01593 node = new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, ascr); 01594 } 01595 01596 /* 01597 * Now, we will create links only to nodes that actually exist. 01598 */ 01599 n = fsg_history_n_entries(fsgs->history); 01600 for (i = 0; i < n; ++i) { 01601 fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i); 01602 fsg_arciter_t *itor; 01603 ps_latnode_t *src, *dest; 01604 int32 ascr; 01605 int sf; 01606 01607 /* Skip null transitions. */ 01608 if (fh->fsglink == NULL || fh->fsglink->wid == -1) 01609 continue; 01610 01611 /* Find the start node of this link and calculate its link score. */ 01612 if (fh->pred) { 01613 fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred); 01614 sf = pfh->frame + 1; 01615 ascr = fh->score - pfh->score; 01616 } 01617 else { 01618 ascr = fh->score; 01619 sf = 0; 01620 } 01621 src = find_node(dag, fsg, sf, fh->fsglink->wid); 01622 01623 sf = fh->frame + 1; 01624 for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink)); 01625 itor; itor = fsg_arciter_next(itor)) { 01626 fsg_link_t *link = fsg_arciter_get(itor); 01627 01628 /* FIXME: Need to figure out what to do about tag transitions. */ 01629 if (link->wid >= 0) { 01630 /* 01631 * For each non-epsilon link following this one, look for a 01632 * matching node in the lattice and link to it. 01633 */ 01634 if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL) 01635 ps_lattice_link(dag, src, dest, ascr, fh->frame); 01636 } 01637 else { 01638 /* 01639 * Transitive closure on nulls has already been done, so we 01640 * just need to look one link forward from them. 01641 */ 01642 fsg_arciter_t *itor2; 01643 01644 /* Add all non-null links out of j. */ 01645 for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link)); 01646 itor2; itor2 = fsg_arciter_next(itor2)) { 01647 fsg_link_t *link = fsg_arciter_get(itor2); 01648 if (link->wid == -1) 01649 continue; 01650 if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL) 01651 ps_lattice_link(dag, src, dest, ascr, fh->frame); 01652 } 01653 } 01654 } 01655 } 01656 01657 /* Figure out which nodes are the start and end nodes. */ 01658 if ((dag->start = find_start_node(fsgs, dag)) == NULL) 01659 goto error_out; 01660 if ((dag->end = find_end_node(fsgs, dag)) == NULL) 01661 goto error_out; 01662 E_INFO("lattice start node %s.%d end node %s.%d\n", 01663 fsg_model_word_str(fsg, dag->start->wid), dag->start->sf, 01664 fsg_model_word_str(fsg, dag->end->wid), dag->end->sf); 01665 /* FIXME: Need to calculate final_node_ascr here. */ 01666 01667 /* 01668 * Convert word IDs from FSG to dictionary. 01669 */ 01670 for (node = dag->nodes; node; node = node->next) { 01671 node->wid = dict_wordid(dag->search->dict, 01672 fsg_model_word_str(fsg, node->wid)); 01673 node->basewid = dict_basewid(dag->search->dict, node->wid); 01674 } 01675 01676 /* 01677 * Now we are done, because the links in the graph are uniquely 01678 * defined by the history table. However we should remove any 01679 * nodes which are not reachable from the end node of the FSG. 01680 * Everything is reachable from the start node by definition. 01681 */ 01682 mark_reachable(dag, dag->end); 01683 01684 ps_lattice_delete_unreachable(dag); 01685 { 01686 int32 silpen, fillpen; 01687 01688 silpen = (int32)(logmath_log(fsg->lmath, 01689 cmd_ln_float32_r(ps_search_config(fsgs), "-silprob")) 01690 * fsg->lw) 01691 >> SENSCR_SHIFT; 01692 fillpen = (int32)(logmath_log(fsg->lmath, 01693 cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob")) 01694 * fsg->lw) 01695 >> SENSCR_SHIFT; 01696 ps_lattice_bypass_fillers(dag, silpen, fillpen); 01697 } 01698 search->dag = dag; 01699 return dag; 01700 01701 error_out: 01702 ps_lattice_free(dag); 01703 return NULL; 01704 01705 }