PocketSphinx  0.6
src/libpocketsphinx/ngram_search_fwdflat.c
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <sphinxbase/ckd_alloc.h>
00048 #include <sphinxbase/listelem_alloc.h>
00049 #include <sphinxbase/err.h>
00050 
00051 /* Local headers. */
00052 #include "ngram_search.h"
00053 #include "ps_lattice_internal.h"
00054 
00055 /* Turn this on to dump channels for debugging */
00056 #define __CHAN_DUMP__           0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062 
00063 static void
00064 ngram_fwdflat_expand_all(ngram_search_t *ngs)
00065 {
00066     int n_words, i;
00067 
00068     /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
00069      *
00070      * 1) Add the ones which are in the LM to the fwdflat wordlist
00071      * 2) And to the expansion list (since we are expanding all)
00072      */
00073     ngs->n_expand_words = 0;
00074     n_words = ps_search_n_words(ngs);
00075     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00076     for (i = 0; i < n_words; ++i) {
00077         if (!ngram_model_set_known_wid(ngs->lmset,
00078                                        dict_basewid(ps_search_dict(ngs),i)))
00079             continue;
00080         ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
00081         ngs->expand_word_list[ngs->n_expand_words] = i;
00082         bitvec_set(ngs->expand_word_flag, i);
00083         ngs->n_expand_words++;
00084     }
00085     E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
00086     ngs->expand_word_list[ngs->n_expand_words] = -1;
00087     ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
00088 }
00089 
00090 static void
00091 ngram_fwdflat_allocate_1ph(ngram_search_t *ngs)
00092 {
00093     dict_t *dict = ps_search_dict(ngs);
00094     int n_words = ps_search_n_words(ngs);
00095     int i, w;
00096 
00097     /* Allocate single-phone words, since they won't have
00098      * been allocated for us by fwdtree initialization. */
00099     ngs->n_1ph_words = 0;
00100     for (w = 0; w < n_words; w++) {
00101         if (dict_is_single_phone(dict, w))
00102             ++ngs->n_1ph_words;
00103     }
00104     ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00105                                        sizeof(*ngs->single_phone_wid));
00106     ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00107     i = 0;
00108     for (w = 0; w < n_words; w++) {
00109         if (!dict_is_single_phone(dict, w))
00110             continue;
00111 
00112         /* DICT2PID location */
00113         ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
00114         ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00115         hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00116                  /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef,
00117                                               ngs->rhmm_1ph[i].ciphone),
00118                  /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
00119                                                   ngs->rhmm_1ph[i].ciphone));
00120         ngs->rhmm_1ph[i].next = NULL;
00121         ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00122         ngs->single_phone_wid[i] = w;
00123         i++;
00124     }
00125 }
00126 
00127 static void
00128 ngram_fwdflat_free_1ph(ngram_search_t *ngs)
00129 {
00130     int i, w;
00131     int n_words = ps_search_n_words(ngs);
00132 
00133     for (i = w = 0; w < n_words; ++w) {
00134         if (!dict_is_single_phone(ps_search_dict(ngs), w))
00135             continue;
00136         hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00137         ++i;
00138     }
00139     ckd_free(ngs->rhmm_1ph);
00140     ngs->rhmm_1ph = NULL;
00141     ckd_free(ngs->single_phone_wid);
00142 }
00143 
00144 void
00145 ngram_fwdflat_init(ngram_search_t *ngs)
00146 {
00147     int n_words;
00148 
00149     n_words = ps_search_n_words(ngs);
00150     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00151     ngs->expand_word_flag = bitvec_alloc(n_words);
00152     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00153     ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist));
00154     ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid");
00155     ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin");
00156     E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n",
00157            ngs->min_ef_width, ngs->max_sf_win);
00158 
00159     /* No tree-search; pre-build the expansion list, including all LM words. */
00160     if (!ngs->fwdtree) {
00161         /* Build full expansion list from LM words. */
00162         ngram_fwdflat_expand_all(ngs);
00163         /* Allocate single phone words. */
00164         ngram_fwdflat_allocate_1ph(ngs);
00165     }
00166 }
00167 
00168 void
00169 ngram_fwdflat_deinit(ngram_search_t *ngs)
00170 {
00171     double n_speech = (double)ngs->n_tot_frame
00172             / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
00173 
00174     E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n",
00175            ngs->fwdflat_perf.t_tot_cpu,
00176            ngs->fwdflat_perf.t_tot_cpu / n_speech);
00177     E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n",
00178            ngs->fwdflat_perf.t_tot_elapsed,
00179            ngs->fwdflat_perf.t_tot_elapsed / n_speech);
00180 
00181     /* Free single-phone words if we allocated them. */
00182     if (!ngs->fwdtree) {
00183         ngram_fwdflat_free_1ph(ngs);
00184     }
00185     ckd_free(ngs->fwdflat_wordlist);
00186     bitvec_free(ngs->expand_word_flag);
00187     ckd_free(ngs->expand_word_list);
00188     ckd_free(ngs->frm_wordlist);
00189 }
00190 
00191 int
00192 ngram_fwdflat_reinit(ngram_search_t *ngs)
00193 {
00194     /* Reallocate things that depend on the number of words. */
00195     int n_words;
00196 
00197     ckd_free(ngs->fwdflat_wordlist);
00198     ckd_free(ngs->expand_word_list);
00199     bitvec_free(ngs->expand_word_flag);
00200     n_words = ps_search_n_words(ngs);
00201     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00202     ngs->expand_word_flag = bitvec_alloc(n_words);
00203     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00204     
00205     /* No tree-search; take care of the expansion list and single phone words. */
00206     if (!ngs->fwdtree) {
00207         /* Free single-phone words. */
00208         ngram_fwdflat_free_1ph(ngs);
00209         /* Reallocate word_chan. */
00210         ckd_free(ngs->word_chan);
00211         ngs->word_chan = ckd_calloc(dict_size(ps_search_dict(ngs)),
00212                                     sizeof(*ngs->word_chan));
00213         /* Rebuild full expansion list from LM words. */
00214         ngram_fwdflat_expand_all(ngs);
00215         /* Allocate single phone words. */
00216         ngram_fwdflat_allocate_1ph(ngs);
00217     }
00218     /* Otherwise there is nothing to do since the wordlist is
00219      * generated anew every utterance. */
00220     return 0;
00221 }
00222 
00226 static void
00227 build_fwdflat_wordlist(ngram_search_t *ngs)
00228 {
00229     int32 i, f, sf, ef, wid, nwd;
00230     dict_t *dict;
00231     bptbl_t *bp;
00232     ps_latnode_t *node, *prevnode, *nextnode;
00233 
00234     /* No tree-search, use statically allocated wordlist. */
00235     if (!ngs->fwdtree)
00236         return;
00237 
00238     dict = ps_search_dict(ngs);
00239 
00240     memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));
00241 
00242     /* Scan the backpointer table for all active words and record
00243      * their exit frames. */
00244     for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
00245         sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
00246         ef = bp->frame;
00247         wid = bp->wid;
00248 
00249         /* Anything that can be transitioned to in the LM can go in
00250          * the word list. */
00251         if (!ngram_model_set_known_wid(ngs->lmset,
00252                                        dict_basewid(ps_search_dict(ngs), wid)))
00253             continue;
00254 
00255         /* Look for it in the wordlist. */
00256         for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
00257              node = node->next);
00258 
00259         /* Update last end frame. */
00260         if (node)
00261             node->lef = ef;
00262         else {
00263             /* New node; link to head of list */
00264             node = listelem_malloc(ngs->latnode_alloc);
00265             node->wid = wid;
00266             node->fef = node->lef = ef;
00267 
00268             node->next = ngs->frm_wordlist[sf];
00269             ngs->frm_wordlist[sf] = node;
00270         }
00271     }
00272 
00273     /* Eliminate "unlikely" words, for which there are too few end points */
00274     for (f = 0; f < ngs->n_frame; f++) {
00275         prevnode = NULL;
00276         for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
00277             nextnode = node->next;
00278             /* Word has too few endpoints */
00279             if ((node->lef - node->fef < ngs->min_ef_width) ||
00280                 /* Word is </s> and doesn't actually end in last frame */
00281                 ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
00282                 if (!prevnode)
00283                     ngs->frm_wordlist[f] = nextnode;
00284                 else
00285                     prevnode->next = nextnode;
00286                 listelem_free(ngs->latnode_alloc, node);
00287             }
00288             else
00289                 prevnode = node;
00290         }
00291     }
00292 
00293     /* Form overall wordlist for 2nd pass */
00294     nwd = 0;
00295     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00296     for (f = 0; f < ngs->n_frame; f++) {
00297         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00298             if (!bitvec_is_set(ngs->word_active, node->wid)) {
00299                 bitvec_set(ngs->word_active, node->wid);
00300                 ngs->fwdflat_wordlist[nwd++] = node->wid;
00301             }
00302         }
00303     }
00304     ngs->fwdflat_wordlist[nwd] = -1;
00305     E_INFO("Utterance vocabulary contains %d words\n", nwd);
00306 }
00307 
00311 static void
00312 build_fwdflat_chan(ngram_search_t *ngs)
00313 {
00314     int32 i, wid, p;
00315     root_chan_t *rhmm;
00316     chan_t *hmm, *prevhmm;
00317     dict_t *dict;
00318     dict2pid_t *d2p;
00319 
00320     dict = ps_search_dict(ngs);
00321     d2p = ps_search_dict2pid(ngs);
00322 
00323     /* Build word HMMs for each word in the lattice. */
00324     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00325         wid = ngs->fwdflat_wordlist[i];
00326 
00327         /* Single-phone words are permanently allocated */
00328         if (dict_is_single_phone(dict, wid))
00329             continue;
00330 
00331         assert(ngs->word_chan[wid] == NULL);
00332 
00333         /* Multiplex root HMM for first phone (one root per word, flat
00334          * lexicon).  diphone is irrelevant here, for the time being,
00335          * at least. */
00336         rhmm = listelem_malloc(ngs->root_chan_alloc);
00337         rhmm->ci2phone = dict_second_phone(dict, wid);
00338         rhmm->ciphone = dict_first_phone(dict, wid);
00339         rhmm->next = NULL;
00340         hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
00341                  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
00342                  bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone));
00343 
00344         /* HMMs for word-internal phones */
00345         prevhmm = NULL;
00346         for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) {
00347             hmm = listelem_malloc(ngs->chan_alloc);
00348             hmm->ciphone = dict_pron(dict, wid, p);
00349             hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1;
00350             hmm->next = NULL;
00351             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
00352                      dict2pid_internal(d2p,wid,p), 
00353                      bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone));
00354 
00355             if (prevhmm)
00356                 prevhmm->next = hmm;
00357             else
00358                 rhmm->next = hmm;
00359 
00360             prevhmm = hmm;
00361         }
00362 
00363         /* Right-context phones */
00364         ngram_search_alloc_all_rc(ngs, wid);
00365 
00366         /* Link in just allocated right-context phones */
00367         if (prevhmm)
00368             prevhmm->next = ngs->word_chan[wid];
00369         else
00370             rhmm->next = ngs->word_chan[wid];
00371         ngs->word_chan[wid] = (chan_t *) rhmm;
00372     }
00373 
00374 }
00375 
00376 void
00377 ngram_fwdflat_start(ngram_search_t *ngs)
00378 {
00379     root_chan_t *rhmm;
00380     int i;
00381 
00382     ptmr_reset(&ngs->fwdflat_perf);
00383     ptmr_start(&ngs->fwdflat_perf);
00384     build_fwdflat_wordlist(ngs);
00385     build_fwdflat_chan(ngs);
00386 
00387     ngs->bpidx = 0;
00388     ngs->bss_head = 0;
00389 
00390     for (i = 0; i < ps_search_n_words(ngs); i++)
00391         ngs->word_lat_idx[i] = NO_BP;
00392 
00393     /* Reset the permanently allocated single-phone words, since they
00394      * may have junk left over in them from previous searches. */
00395     for (i = 0; i < ngs->n_1ph_words; i++) {
00396         int32 w = ngs->single_phone_wid[i];
00397         rhmm = (root_chan_t *) ngs->word_chan[w];
00398         hmm_clear(&rhmm->hmm);
00399     }
00400 
00401     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00402     rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)];
00403     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00404     ngs->active_word_list[0][0] = ps_search_start_wid(ngs);
00405     ngs->n_active_word[0] = 1;
00406 
00407     ngs->best_score = 0;
00408     ngs->renormalized = FALSE;
00409 
00410     for (i = 0; i < ps_search_n_words(ngs); i++)
00411         ngs->last_ltrans[i].sf = -1;
00412 
00413     if (!ngs->fwdtree)
00414         ngs->n_frame = 0;
00415 
00416     ngs->st.n_fwdflat_chan = 0;
00417     ngs->st.n_fwdflat_words = 0;
00418     ngs->st.n_fwdflat_word_transition = 0;
00419     ngs->st.n_senone_active_utt = 0;
00420 }
00421 
00422 static void
00423 compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
00424 {
00425     int32 i, w;
00426     int32 *awl;
00427     root_chan_t *rhmm;
00428     chan_t *hmm;
00429 
00430     acmod_clear_active(ps_search_acmod(ngs));
00431 
00432     i = ngs->n_active_word[frame_idx & 0x1];
00433     awl = ngs->active_word_list[frame_idx & 0x1];
00434 
00435     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00436         rhmm = (root_chan_t *)ngs->word_chan[w];
00437         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00438             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00439         }
00440 
00441         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00442             if (hmm_frame(&hmm->hmm) == frame_idx) {
00443                 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00444             }
00445         }
00446     }
00447 }
00448 
00449 static void
00450 fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
00451 {
00452     int32 i, w, bestscore;
00453     int32 *awl;
00454     root_chan_t *rhmm;
00455     chan_t *hmm;
00456 
00457     i = ngs->n_active_word[frame_idx & 0x1];
00458     awl = ngs->active_word_list[frame_idx & 0x1];
00459     bestscore = WORST_SCORE;
00460 
00461     ngs->st.n_fwdflat_words += i;
00462 
00463     /* Scan all active words. */
00464     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00465         rhmm = (root_chan_t *) ngs->word_chan[w];
00466         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00467             int32 score = chan_v_eval(rhmm);
00468             if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs)))
00469                 bestscore = score;
00470             ngs->st.n_fwdflat_chan++;
00471         }
00472 
00473         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00474             if (hmm_frame(&hmm->hmm) == frame_idx) {
00475                 int32 score = chan_v_eval(hmm);
00476                 if (score BETTER_THAN bestscore)
00477                     bestscore = score;
00478                 ngs->st.n_fwdflat_chan++;
00479             }
00480         }
00481     }
00482 
00483     ngs->best_score = bestscore;
00484 }
00485 
00486 static void
00487 fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
00488 {
00489     int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
00490     int32 *awl;
00491     root_chan_t *rhmm;
00492     chan_t *hmm, *nexthmm;
00493 
00494     cf = frame_idx;
00495     nf = cf + 1;
00496     i = ngs->n_active_word[cf & 0x1];
00497     awl = ngs->active_word_list[cf & 0x1];
00498     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00499 
00500     thresh = ngs->best_score + ngs->fwdflatbeam;
00501     wordthresh = ngs->best_score + ngs->fwdflatwbeam;
00502     pip = ngs->pip;
00503     E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));
00504 
00505     /* Scan all active words. */
00506     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00507         rhmm = (root_chan_t *) ngs->word_chan[w];
00508         /* Propagate active root channels */
00509         if (hmm_frame(&rhmm->hmm) == cf
00510             && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00511             hmm_frame(&rhmm->hmm) = nf;
00512             bitvec_set(ngs->word_active, w);
00513 
00514             /* Transitions out of root channel */
00515             newscore = hmm_out_score(&rhmm->hmm);
00516             if (rhmm->next) {
00517                 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
00518 
00519                 newscore += pip;
00520                 if (newscore BETTER_THAN thresh) {
00521                     hmm = rhmm->next;
00522                     /* Enter all right context phones */
00523                     if (hmm->info.rc_id >= 0) {
00524                         for (; hmm; hmm = hmm->next) {
00525                             if ((hmm_frame(&hmm->hmm) < cf)
00526                                 || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00527                                 hmm_enter(&hmm->hmm, newscore,
00528                                           hmm_out_history(&rhmm->hmm), nf);
00529                             }
00530                         }
00531                     }
00532                     /* Just a normal word internal phone */
00533                     else {
00534                         if ((hmm_frame(&hmm->hmm) < cf)
00535                             || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00536                                 hmm_enter(&hmm->hmm, newscore,
00537                                           hmm_out_history(&rhmm->hmm), nf);
00538                         }
00539                     }
00540                 }
00541             }
00542             else {
00543                 assert(dict_is_single_phone(ps_search_dict(ngs), w));
00544 
00545                 /* Word exit for single-phone words (where did their
00546                  * whmms come from?) (either from
00547                  * ngram_search_fwdtree, or from
00548                  * ngram_fwdflat_allocate_1ph(), that's where) */
00549                 if (newscore BETTER_THAN wordthresh) {
00550                     ngram_search_save_bp(ngs, cf, w, newscore,
00551                                          hmm_out_history(&rhmm->hmm), 0);
00552                 }
00553             }
00554         }
00555 
00556         /* Transitions out of non-root channels. */
00557         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00558             if (hmm_frame(&hmm->hmm) >= cf) {
00559                 /* Propagate forward HMMs inside the beam. */
00560                 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00561                     hmm_frame(&hmm->hmm) = nf;
00562                     bitvec_set(ngs->word_active, w);
00563 
00564                     newscore = hmm_out_score(&hmm->hmm);
00565                     /* Word-internal phones */
00566                     if (hmm->info.rc_id < 0) {
00567                         newscore += pip;
00568                         if (newscore BETTER_THAN thresh) {
00569                             nexthmm = hmm->next;
00570                             /* Enter all right-context phones. */
00571                             if (nexthmm->info.rc_id >= 0) {
00572                                  for (; nexthmm; nexthmm = nexthmm->next) {
00573                                     if ((hmm_frame(&nexthmm->hmm) < cf)
00574                                         || (newscore BETTER_THAN
00575                                             hmm_in_score(&nexthmm->hmm))) {
00576                                         hmm_enter(&nexthmm->hmm,
00577                                                   newscore,
00578                                                   hmm_out_history(&hmm->hmm),
00579                                                   nf);
00580                                     }
00581                                 }
00582                             }
00583                             /* Enter single word-internal phone. */
00584                             else {
00585                                 if ((hmm_frame(&nexthmm->hmm) < cf)
00586                                     || (newscore BETTER_THAN
00587                                         hmm_in_score(&nexthmm->hmm))) {
00588                                     hmm_enter(&nexthmm->hmm, newscore,
00589                                               hmm_out_history(&hmm->hmm), nf);
00590                                 }
00591                             }
00592                         }
00593                     }
00594                     /* Right-context phones - apply word beam and exit. */
00595                     else {
00596                         if (newscore BETTER_THAN wordthresh) {
00597                             ngram_search_save_bp(ngs, cf, w, newscore,
00598                                                  hmm_out_history(&hmm->hmm),
00599                                                  hmm->info.rc_id);
00600                         }
00601                     }
00602                 }
00603                 /* Zero out inactive HMMs. */
00604                 else if (hmm_frame(&hmm->hmm) != nf) {
00605                     hmm_clear_scores(&hmm->hmm);
00606                 }
00607             }
00608         }
00609     }
00610 }
00611 
00612 static void
00613 get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
00614 {
00615     int32 f, sf, ef;
00616     ps_latnode_t *node;
00617 
00618     if (!ngs->fwdtree) {
00619         ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00620         return;
00621     }
00622 
00623     sf = frm - win;
00624     if (sf < 0)
00625         sf = 0;
00626     ef = frm + win;
00627     if (ef > ngs->n_frame)
00628         ef = ngs->n_frame;
00629 
00630     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00631     ngs->n_expand_words = 0;
00632 
00633     for (f = sf; f < ef; f++) {
00634         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00635             if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
00636                 ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
00637                 bitvec_set(ngs->expand_word_flag, node->wid);
00638             }
00639         }
00640     }
00641     ngs->expand_word_list[ngs->n_expand_words] = -1;
00642     ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00643 }
00644 
00645 static void
00646 fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
00647 {
00648     int32 cf, nf, b, thresh, pip, i, w, newscore;
00649     int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
00650     bptbl_t *bp;
00651     int32 *rcss;
00652     root_chan_t *rhmm;
00653     int32 *awl;
00654     float32 lwf;
00655     dict_t *dict = ps_search_dict(ngs);
00656     dict2pid_t *d2p = ps_search_dict2pid(ngs);
00657 
00658     cf = frame_idx;
00659     nf = cf + 1;
00660     thresh = ngs->best_score + ngs->fwdflatbeam;
00661     pip = ngs->pip;
00662     best_silrc_score = WORST_SCORE;
00663     lwf = ngs->fwdflat_fwdtree_lw_ratio;
00664 
00665     /* Search for all words starting within a window of this frame.
00666      * These are the successors for words exiting now. */
00667     get_expand_wordlist(ngs, cf, ngs->max_sf_win);
00668 
00669     /* Scan words exited in current frame */
00670     for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
00671         xwdssid_t *rssid;
00672         int32 silscore;
00673 
00674         bp = ngs->bp_table + b;
00675         ngs->word_lat_idx[bp->wid] = NO_BP;
00676 
00677         if (bp->wid == ps_search_finish_wid(ngs))
00678             continue;
00679 
00680         /* DICT2PID location */
00681         /* Get the mapping from right context phone ID to index in the
00682          * right context table and the bscore_stack. */
00683         rcss = ngs->bscore_stack + bp->s_idx;
00684         if (bp->last2_phone == -1)
00685             rssid = NULL;
00686         else
00687             rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);
00688 
00689         /* Transition to all successor words. */
00690         for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
00691             int32 n_used;
00692 
00693             w = ngs->expand_word_list[i];
00694 
00695             /* Get the exit score we recorded in save_bwd_ptr(), or
00696              * something approximating it. */
00697             if (rssid)
00698                 newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
00699             else
00700                 newscore = bp->score;
00701             if (newscore == WORST_SCORE)
00702                 continue;
00703             /* FIXME: Floating point... */
00704             newscore += lwf
00705                 * (ngram_tg_score(ngs->lmset,
00706                                   dict_basewid(dict, w),
00707                                   bp->real_wid,
00708                                   bp->prev_real_wid,
00709                                   &n_used) >> SENSCR_SHIFT);
00710             newscore += pip;
00711 
00712             /* Enter the next word */
00713             if (newscore BETTER_THAN thresh) {
00714                 rhmm = (root_chan_t *) ngs->word_chan[w];
00715                 if ((hmm_frame(&rhmm->hmm) < cf)
00716                     || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00717                     hmm_enter(&rhmm->hmm, newscore, b, nf);
00718                     /* DICT2PID: This is where mpx ssids get introduced. */
00719                     /* Look up the ssid to use when entering this mpx triphone. */
00720                     hmm_mpx_ssid(&rhmm->hmm, 0) =
00721                         dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
00722                                           dict_last_phone(dict, bp->wid));
00723                     assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
00724                     E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
00725                                rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
00726                                hmm_mpx_ssid(&rhmm->hmm, 0)));
00727                     bitvec_set(ngs->word_active, w);
00728                 }
00729             }
00730         }
00731 
00732         /* Get the best exit into silence. */
00733         if (rssid)
00734             silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
00735         else
00736             silscore = bp->score;
00737         if (silscore BETTER_THAN best_silrc_score) {
00738             best_silrc_score = silscore;
00739             best_silrc_bp = b;
00740         }
00741     }
00742 
00743     /* Transition to <sil> */
00744     newscore = best_silrc_score + ngs->silpen + pip;
00745     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00746         w = ps_search_silence_wid(ngs);
00747         rhmm = (root_chan_t *) ngs->word_chan[w];
00748         if ((hmm_frame(&rhmm->hmm) < cf)
00749             || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00750             hmm_enter(&rhmm->hmm, newscore,
00751                       best_silrc_bp, nf);
00752             bitvec_set(ngs->word_active, w);
00753         }
00754     }
00755     /* Transition to noise words */
00756     newscore = best_silrc_score + ngs->fillpen + pip;
00757     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00758         for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
00759             rhmm = (root_chan_t *) ngs->word_chan[w];
00760             /* Noise words that aren't a single phone will have NULL here. */
00761             if (rhmm == NULL)
00762                 continue;
00763             if ((hmm_frame(&rhmm->hmm) < cf)
00764                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00765                 hmm_enter(&rhmm->hmm, newscore,
00766                           best_silrc_bp, nf);
00767                 bitvec_set(ngs->word_active, w);
00768             }
00769         }
00770     }
00771 
00772     /* Reset initial channels of words that have become inactive even after word trans. */
00773     i = ngs->n_active_word[cf & 0x1];
00774     awl = ngs->active_word_list[cf & 0x1];
00775     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00776         rhmm = (root_chan_t *) ngs->word_chan[w];
00777         if (hmm_frame(&rhmm->hmm) == cf) {
00778             hmm_clear_scores(&rhmm->hmm);
00779         }
00780     }
00781 }
00782 
00783 static void
00784 fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00785 {
00786     root_chan_t *rhmm;
00787     chan_t *hmm;
00788     int32 i, cf, w, *awl;
00789 
00790     cf = frame_idx;
00791 
00792     /* Renormalize individual word channels */
00793     i = ngs->n_active_word[cf & 0x1];
00794     awl = ngs->active_word_list[cf & 0x1];
00795     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00796         rhmm = (root_chan_t *) ngs->word_chan[w];
00797         if (hmm_frame(&rhmm->hmm) == cf) {
00798             hmm_normalize(&rhmm->hmm, norm);
00799         }
00800         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00801             if (hmm_frame(&hmm->hmm) == cf) {
00802                 hmm_normalize(&hmm->hmm, norm);
00803             }
00804         }
00805     }
00806 
00807     ngs->renormalized = TRUE;
00808 }
00809 
00810 int
00811 ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
00812 {
00813     int16 const *senscr;
00814     int32 nf, i, j;
00815     int32 *nawl;
00816 
00817     /* Activate our HMMs for the current frame if need be. */
00818     if (!ps_search_acmod(ngs)->compallsen)
00819         compute_fwdflat_sen_active(ngs, frame_idx);
00820 
00821     /* Compute GMM scores for the current frame. */
00822     senscr = acmod_score(ps_search_acmod(ngs), &frame_idx);
00823     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
00824 
00825     /* Mark backpointer table for current frame. */
00826     ngram_search_mark_bptable(ngs, frame_idx);
00827 
00828     /* If the best score is equal to or worse than WORST_SCORE,
00829      * recognition has failed, don't bother to keep trying. */
00830     if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
00831         return 0;
00832     /* Renormalize if necessary */
00833     if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
00834         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00835                frame_idx, ngs->best_score);
00836         fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score);
00837     }
00838 
00839     ngs->best_score = WORST_SCORE;
00840     hmm_context_set_senscore(ngs->hmmctx, senscr);
00841 
00842     /* Evaluate HMMs */
00843     fwdflat_eval_chan(ngs, frame_idx);
00844     /* Prune HMMs and do phone transitions. */
00845     fwdflat_prune_chan(ngs, frame_idx);
00846     /* Do word transitions. */
00847     fwdflat_word_transition(ngs, frame_idx);
00848 
00849     /* Create next active word list */
00850     nf = frame_idx + 1;
00851     nawl = ngs->active_word_list[nf & 0x1];
00852     for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00853         if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) {
00854             *(nawl++) = ngs->fwdflat_wordlist[i];
00855             j++;
00856         }
00857     }
00858     for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) {
00859         if (bitvec_is_set(ngs->word_active, i)) {
00860             *(nawl++) = i;
00861             j++;
00862         }
00863     }
00864     if (!ngs->fwdtree)
00865         ++ngs->n_frame;
00866     ngs->n_active_word[nf & 0x1] = j;
00867 
00868     /* Return the number of frames processed. */
00869     return 1;
00870 }
00871 
00875 static void
00876 destroy_fwdflat_wordlist(ngram_search_t *ngs)
00877 {
00878     ps_latnode_t *node, *tnode;
00879     int32 f;
00880 
00881     if (!ngs->fwdtree)
00882         return;
00883 
00884     for (f = 0; f < ngs->n_frame; f++) {
00885         for (node = ngs->frm_wordlist[f]; node; node = tnode) {
00886             tnode = node->next;
00887             listelem_free(ngs->latnode_alloc, node);
00888         }
00889     }
00890 }
00891 
00895 static void
00896 destroy_fwdflat_chan(ngram_search_t *ngs)
00897 {
00898     int32 i, wid;
00899 
00900     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00901         root_chan_t *rhmm;
00902         chan_t *thmm;
00903         wid = ngs->fwdflat_wordlist[i];
00904         if (dict_is_single_phone(ps_search_dict(ngs),wid))
00905             continue;
00906         assert(ngs->word_chan[wid] != NULL);
00907 
00908         /* The first HMM in ngs->word_chan[wid] was allocated with
00909          * ngs->root_chan_alloc, but this will attempt to free it
00910          * using ngs->chan_alloc, which will not work.  Therefore we
00911          * free it manually and move the list forward before handing
00912          * it off. */
00913         rhmm = (root_chan_t *)ngs->word_chan[wid];
00914         thmm = rhmm->next;
00915         listelem_free(ngs->root_chan_alloc, rhmm);
00916         ngs->word_chan[wid] = thmm;
00917         ngram_search_free_all_rc(ngs, wid);
00918     }
00919 }
00920 
00921 void
00922 ngram_fwdflat_finish(ngram_search_t *ngs)
00923 {
00924     int32 cf;
00925 
00926     destroy_fwdflat_chan(ngs);
00927     destroy_fwdflat_wordlist(ngs);
00928     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00929 
00930     /* This is the number of frames processed. */
00931     cf = ps_search_acmod(ngs)->output_frame;
00932     /* Add a mark in the backpointer table for one past the final frame. */
00933     ngram_search_mark_bptable(ngs, cf);
00934 
00935     ptmr_stop(&ngs->fwdflat_perf);
00936     /* Print out some statistics. */
00937     if (cf > 0) {
00938         double n_speech = (double)(cf + 1)
00939             / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
00940         E_INFO("%8d words recognized (%d/fr)\n",
00941                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
00942         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
00943                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
00944         E_INFO("%8d channels searched (%d/fr)\n",
00945                ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1));
00946         E_INFO("%8d words searched (%d/fr)\n",
00947                ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1));
00948         E_INFO("%8d word transitions (%d/fr)\n",
00949                ngs->st.n_fwdflat_word_transition,
00950                ngs->st.n_fwdflat_word_transition / (cf + 1));
00951         E_INFO("fwdflat %.2f CPU %.3f xRT\n",
00952                ngs->fwdflat_perf.t_cpu,
00953                ngs->fwdflat_perf.t_cpu / n_speech);
00954         E_INFO("fwdflat %.2f wall %.3f xRT\n",
00955                ngs->fwdflat_perf.t_elapsed,
00956                ngs->fwdflat_perf.t_elapsed / n_speech);
00957     }
00958 }