• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ngram_search_fwdflat.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 
00050 /* Local headers. */
00051 #include "ngram_search.h"
00052 #include "ps_lattice_internal.h"
00053 
00054 /* Turn this on to dump channels for debugging */
00055 #define __CHAN_DUMP__           0
00056 #if __CHAN_DUMP__
00057 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00058 #else
00059 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00060 #endif
00061 
00062 static void
00063 ngram_fwdflat_expand_all(ngram_search_t *ngs)
00064 {
00065     int n_words, i;
00066 
00067     ngs->n_expand_words = 0;
00068     n_words = ps_search_n_words(ngs);
00069     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00070     for (i = 0; i < ps_search_start_wid(ngs); i++) {
00071         if (ngram_model_set_known_wid(ngs->lmset,
00072                                       ps_search_dict(ngs)->dict_list[i]->wid)) {
00073             ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
00074             ngs->expand_word_list[ngs->n_expand_words] = i;
00075             bitvec_set(ngs->expand_word_flag, i);
00076             ngs->n_expand_words++;
00077         }
00078     }
00079     ngs->expand_word_list[ngs->n_expand_words] = -1;
00080     ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
00081 }
00082 
00083 void
00084 ngram_fwdflat_init(ngram_search_t *ngs)
00085 {
00086     int n_words, i;
00087 
00088     n_words = ps_search_n_words(ngs);
00089     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00090     ngs->expand_word_flag = bitvec_alloc(n_words);
00091     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00092     ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist));
00093     ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid");
00094     ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin");
00095     E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n",
00096            ngs->min_ef_width, ngs->max_sf_win);
00097 
00098     /* No tree-search; pre-build the expansion list, including all LM words. */
00099     if (!ngs->fwdtree) {
00100         int w;
00101 
00102         /* Build full expansion list from LM words. */
00103         ngram_fwdflat_expand_all(ngs);
00104 
00105         /* Allocate single-phone words, since they won't have
00106          * been allocated for us by fwdtree initialization. */
00107         ngs->n_1ph_words = 0;
00108         for (w = 0; w < n_words; w++) {
00109             dict_entry_t *de = ps_search_dict(ngs)->dict_list[w];
00110             if (de->len == 1)
00111                 ++ngs->n_1ph_words;
00112         }
00113         ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00114         i = 0;
00115         for (w = 0; w < n_words; w++) {
00116             dict_entry_t *de = ps_search_dict(ngs)->dict_list[w];
00117             if (de->len != 1)
00118                 continue;
00119 
00120             ngs->rhmm_1ph[i].diphone = de->phone_ids[0];
00121             ngs->rhmm_1ph[i].ciphone = de->ci_phone_ids[0];
00122             hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, de->mpx,
00123                      de->phone_ids[0], de->ci_phone_ids[0]);
00124             ngs->rhmm_1ph[i].next = NULL;
00125             ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00126             i++;
00127         }
00128     }
00129 }
00130 
00131 void
00132 ngram_fwdflat_deinit(ngram_search_t *ngs)
00133 {
00134     /* Free single-phone words if we allocated them. */
00135     if (!ngs->fwdtree) {
00136         ckd_free(ngs->rhmm_1ph);
00137     }
00138     ckd_free(ngs->fwdflat_wordlist);
00139     bitvec_free(ngs->expand_word_flag);
00140     ckd_free(ngs->expand_word_list);
00141     ckd_free(ngs->frm_wordlist);
00142 }
00143 
00144 int
00145 ngram_fwdflat_reinit(ngram_search_t *ngs)
00146 {
00147     /* No tree-search; re-build the expansion list from all LM words. */
00148     if (!ngs->fwdtree) {
00149         /* Rebuild full expansion list from LM words. */
00150         ngram_fwdflat_expand_all(ngs);
00151     }
00152     /* Otherwise there is nothing to do since the wordlist is
00153      * generated anew every utterance. */
00154     return 0;
00155 }
00156 
00160 static void
00161 build_fwdflat_wordlist(ngram_search_t *ngs)
00162 {
00163     int32 i, f, sf, ef, wid, nwd;
00164     bptbl_t *bp;
00165     ps_latnode_t *node, *prevnode, *nextnode;
00166     dict_entry_t *de;
00167 
00168     /* No tree-search, use statically allocated wordlist. */
00169     if (!ngs->fwdtree)
00170         return;
00171 
00172     memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));
00173 
00174     /* Scan the backpointer table for all active words and record
00175      * their exit frames. */
00176     for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
00177         sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
00178         ef = bp->frame;
00179         wid = bp->wid;
00180 
00181         /*
00182          * NOTE: fwdflat_wordlist excludes <s>, <sil> and noise words;
00183          * it includes </s>.  That is, it includes anything to which a
00184          * transition can be made in the LM.
00185          */
00186         /* Ignore silence and <s> */
00187         if (ISA_FILLER_WORD(ngs, wid) || (wid == ps_search_start_wid(ngs)))
00188             continue;
00189 
00190         /* Look for it in the wordlist. */
00191         de = ps_search_dict(ngs)->dict_list[wid];
00192         for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
00193              node = node->next);
00194 
00195         /* Update last end frame. */
00196         if (node)
00197             node->lef = ef;
00198         else {
00199             /* New node; link to head of list */
00200             node = listelem_malloc(ngs->latnode_alloc);
00201             node->wid = wid;
00202             node->fef = node->lef = ef;
00203 
00204             node->next = ngs->frm_wordlist[sf];
00205             ngs->frm_wordlist[sf] = node;
00206         }
00207     }
00208 
00209     /* Eliminate "unlikely" words, for which there are too few end points */
00210     for (f = 0; f < ngs->n_frame; f++) {
00211         prevnode = NULL;
00212         for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
00213             nextnode = node->next;
00214             /* Word has too few endpoints */
00215             if ((node->lef - node->fef < ngs->min_ef_width) ||
00216                 /* Word is </s> and doesn't actually end in last frame */
00217                 ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
00218                 if (!prevnode)
00219                     ngs->frm_wordlist[f] = nextnode;
00220                 else
00221                     prevnode->next = nextnode;
00222                 listelem_free(ngs->latnode_alloc, node);
00223             }
00224             else
00225                 prevnode = node;
00226         }
00227     }
00228 
00229     /* Form overall wordlist for 2nd pass */
00230     nwd = 0;
00231     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00232     for (f = 0; f < ngs->n_frame; f++) {
00233         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00234             if (!bitvec_is_set(ngs->word_active, node->wid)) {
00235                 bitvec_set(ngs->word_active, node->wid);
00236                 ngs->fwdflat_wordlist[nwd++] = node->wid;
00237             }
00238         }
00239     }
00240     ngs->fwdflat_wordlist[nwd] = -1;
00241 }
00242 
00246 static void
00247 build_fwdflat_chan(ngram_search_t *ngs)
00248 {
00249     int32 i, wid, p;
00250     dict_entry_t *de;
00251     root_chan_t *rhmm;
00252     chan_t *hmm, *prevhmm;
00253 
00254     /* Build word HMMs for each word in the lattice. */
00255     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00256         wid = ngs->fwdflat_wordlist[i];
00257         de = ps_search_dict(ngs)->dict_list[wid];
00258 
00259         /* Omit single-phone words as they are permanently allocated */
00260         if (de->len == 1)
00261             continue;
00262 
00263         assert(de->mpx);
00264         assert(ngs->word_chan[wid] == NULL);
00265 
00266         /* Multiplex root HMM for first phone (one root per word, flat
00267          * lexicon) */
00268         rhmm = listelem_malloc(ngs->root_chan_alloc);
00269         rhmm->diphone = de->phone_ids[0];
00270         rhmm->ciphone = de->ci_phone_ids[0];
00271         rhmm->next = NULL;
00272         hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, rhmm->diphone, rhmm->ciphone);
00273 
00274         /* HMMs for word-internal phones */
00275         prevhmm = NULL;
00276         for (p = 1; p < de->len - 1; p++) {
00277             hmm = listelem_malloc(ngs->chan_alloc);
00278             hmm->ciphone = de->ci_phone_ids[p];
00279             hmm->info.rc_id = p + 1 - de->len;
00280             hmm->next = NULL;
00281             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, de->phone_ids[p], hmm->ciphone);
00282 
00283             if (prevhmm)
00284                 prevhmm->next = hmm;
00285             else
00286                 rhmm->next = hmm;
00287 
00288             prevhmm = hmm;
00289         }
00290 
00291         /* Right-context phones */
00292         ngram_search_alloc_all_rc(ngs, wid);
00293 
00294         /* Link in just allocated right-context phones */
00295         if (prevhmm)
00296             prevhmm->next = ngs->word_chan[wid];
00297         else
00298             rhmm->next = ngs->word_chan[wid];
00299         ngs->word_chan[wid] = (chan_t *) rhmm;
00300     }
00301 }
00302 
00303 void
00304 ngram_fwdflat_start(ngram_search_t *ngs)
00305 {
00306     root_chan_t *rhmm;
00307     int i;
00308 
00309     build_fwdflat_wordlist(ngs);
00310     build_fwdflat_chan(ngs);
00311 
00312     ngs->bpidx = 0;
00313     ngs->bss_head = 0;
00314 
00315     for (i = 0; i < ps_search_n_words(ngs); i++)
00316         ngs->word_lat_idx[i] = NO_BP;
00317 
00318     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00319     rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)];
00320     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00321     ngs->active_word_list[0][0] = ps_search_start_wid(ngs);
00322     ngs->n_active_word[0] = 1;
00323 
00324     ngs->best_score = 0;
00325     ngs->renormalized = FALSE;
00326 
00327     for (i = 0; i < ps_search_n_words(ngs); i++)
00328         ngs->last_ltrans[i].sf = -1;
00329 
00330     if (!ngs->fwdtree)
00331         ngs->n_frame = 0;
00332 
00333     ngs->st.n_fwdflat_chan = 0;
00334     ngs->st.n_fwdflat_words = 0;
00335     ngs->st.n_fwdflat_word_transition = 0;
00336     ngs->st.n_senone_active_utt = 0;
00337 }
00338 
00339 static void
00340 compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
00341 {
00342     int32 i, w;
00343     int32 *awl;
00344     root_chan_t *rhmm;
00345     chan_t *hmm;
00346 
00347     acmod_clear_active(ps_search_acmod(ngs));
00348 
00349     i = ngs->n_active_word[frame_idx & 0x1];
00350     awl = ngs->active_word_list[frame_idx & 0x1];
00351 
00352     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00353         rhmm = (root_chan_t *)ngs->word_chan[w];
00354         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00355             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00356         }
00357 
00358         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00359             if (hmm_frame(&hmm->hmm) == frame_idx) {
00360                 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00361             }
00362         }
00363     }
00364 }
00365 
00366 static void
00367 fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
00368 {
00369     int32 i, w, bestscore;
00370     int32 *awl;
00371     root_chan_t *rhmm;
00372     chan_t *hmm;
00373 
00374     i = ngs->n_active_word[frame_idx & 0x1];
00375     awl = ngs->active_word_list[frame_idx & 0x1];
00376     bestscore = WORST_SCORE;
00377 
00378     ngs->st.n_fwdflat_words += i;
00379 
00380     /* Scan all active words. */
00381     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00382         rhmm = (root_chan_t *) ngs->word_chan[w];
00383         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00384             int32 score = chan_v_eval(rhmm);
00385             if ((bestscore < score) && (w != ps_search_finish_wid(ngs)))
00386                 bestscore = score;
00387             ngs->st.n_fwdflat_chan++;
00388         }
00389 
00390         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00391             if (hmm_frame(&hmm->hmm) == frame_idx) {
00392                 int32 score = chan_v_eval(hmm);
00393                 if (bestscore < score)
00394                     bestscore = score;
00395                 ngs->st.n_fwdflat_chan++;
00396             }
00397         }
00398     }
00399 
00400     ngs->best_score = bestscore;
00401 }
00402 
00403 static void
00404 fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
00405 {
00406     int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
00407     int32 *awl;
00408     root_chan_t *rhmm;
00409     chan_t *hmm, *nexthmm;
00410     dict_entry_t *de;
00411 
00412     cf = frame_idx;
00413     nf = cf + 1;
00414     i = ngs->n_active_word[cf & 0x1];
00415     awl = ngs->active_word_list[cf & 0x1];
00416     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00417 
00418     thresh = ngs->best_score + ngs->fwdflatbeam;
00419     wordthresh = ngs->best_score + ngs->fwdflatwbeam;
00420     pip = ngs->pip;
00421 
00422     /* Scan all active words. */
00423     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00424         de = ps_search_dict(ngs)->dict_list[w];
00425 
00426         rhmm = (root_chan_t *) ngs->word_chan[w];
00427         /* Propagate active root channels */
00428         if (hmm_frame(&rhmm->hmm) == cf
00429             && hmm_bestscore(&rhmm->hmm) > thresh) {
00430             hmm_frame(&rhmm->hmm) = nf;
00431             bitvec_set(ngs->word_active, w);
00432 
00433             /* Transitions out of root channel */
00434             newscore = hmm_out_score(&rhmm->hmm);
00435             if (rhmm->next) {
00436                 assert(de->len > 1);
00437 
00438                 newscore += pip;
00439                 if (newscore > thresh) {
00440                     hmm = rhmm->next;
00441                     /* Enter all right context phones */
00442                     if (hmm->info.rc_id >= 0) {
00443                         for (; hmm; hmm = hmm->next) {
00444                             if ((hmm_frame(&hmm->hmm) < cf)
00445                                 || (hmm_in_score(&hmm->hmm) < newscore)) {
00446                                 hmm_enter(&hmm->hmm, newscore,
00447                                           hmm_out_history(&rhmm->hmm), nf);
00448                             }
00449                         }
00450                     }
00451                     /* Just a normal word internal phone */
00452                     else {
00453                         if ((hmm_frame(&hmm->hmm) < cf)
00454                             || (hmm_in_score(&hmm->hmm) < newscore)) {
00455                                 hmm_enter(&hmm->hmm, newscore,
00456                                           hmm_out_history(&rhmm->hmm), nf);
00457                         }
00458                     }
00459                 }
00460             }
00461             else {
00462                 assert(de->len == 1);
00463 
00464                 /* Word exit for single-phone words (where did their
00465                  * whmms come from?) */
00466                 if (newscore > wordthresh) {
00467                     ngram_search_save_bp(ngs, cf, w, newscore,
00468                                          hmm_out_history(&rhmm->hmm), 0);
00469                 }
00470             }
00471         }
00472 
00473         /* Transitions out of non-root channels. */
00474         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00475             if (hmm_frame(&hmm->hmm) >= cf) {
00476                 /* Propagate forward HMMs inside the beam. */
00477                 if (hmm_bestscore(&hmm->hmm) > thresh) {
00478                     hmm_frame(&hmm->hmm) = nf;
00479                     bitvec_set(ngs->word_active, w);
00480 
00481                     newscore = hmm_out_score(&hmm->hmm);
00482                     /* Word-internal phones */
00483                     if (hmm->info.rc_id < 0) {
00484                         newscore += pip;
00485                         if (newscore > thresh) {
00486                             nexthmm = hmm->next;
00487                             /* Enter all right-context phones. */
00488                             if (nexthmm->info.rc_id >= 0) {
00489                                 for (; nexthmm; nexthmm = nexthmm->next) {
00490                                     if ((hmm_frame(&nexthmm->hmm) < cf)
00491                                         || (hmm_in_score(&nexthmm->hmm)
00492                                             < newscore)) {
00493                                         hmm_enter(&nexthmm->hmm,
00494                                                   newscore,
00495                                                   hmm_out_history(&hmm->hmm),
00496                                                   nf);
00497                                     }
00498                                 }
00499                             }
00500                             /* Enter single word-internal phone. */
00501                             else {
00502                                 if ((hmm_frame(&nexthmm->hmm) < cf)
00503                                     || (hmm_in_score(&nexthmm->hmm)
00504                                         < newscore)) {
00505                                     hmm_enter(&nexthmm->hmm, newscore,
00506                                               hmm_out_history(&hmm->hmm), nf);
00507                                 }
00508                             }
00509                         }
00510                     }
00511                     /* Right-context phones - apply word beam and exit. */
00512                     else {
00513                         if (newscore > wordthresh) {
00514                             ngram_search_save_bp(ngs, cf, w, newscore,
00515                                                  hmm_out_history(&hmm->hmm),
00516                                                  hmm->info.rc_id);
00517                         }
00518                     }
00519                 }
00520                 /* Zero out inactive HMMs. */
00521                 else if (hmm_frame(&hmm->hmm) != nf) {
00522                     hmm_clear_scores(&hmm->hmm);
00523                 }
00524             }
00525         }
00526     }
00527 }
00528 
00529 static void
00530 get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
00531 {
00532     int32 f, sf, ef;
00533     ps_latnode_t *node;
00534 
00535     if (!ngs->fwdtree) {
00536         ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00537         return;
00538     }
00539 
00540     sf = frm - win;
00541     if (sf < 0)
00542         sf = 0;
00543     ef = frm + win;
00544     if (ef > ngs->n_frame)
00545         ef = ngs->n_frame;
00546 
00547     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00548     ngs->n_expand_words = 0;
00549 
00550     for (f = sf; f < ef; f++) {
00551         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00552             if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
00553                 ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
00554                 bitvec_set(ngs->expand_word_flag, node->wid);
00555             }
00556         }
00557     }
00558     ngs->expand_word_list[ngs->n_expand_words] = -1;
00559     ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00560 }
00561 
00562 static void
00563 fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
00564 {
00565     int32 cf, nf, b, thresh, pip, i, w, newscore;
00566     int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
00567     bptbl_t *bp;
00568     dict_entry_t *de, *newde;
00569     uint16 *rcpermtab;
00570     int32 *rcss;
00571     root_chan_t *rhmm;
00572     int32 *awl;
00573     float32 lwf;
00574 
00575     cf = frame_idx;
00576     nf = cf + 1;
00577     thresh = ngs->best_score + ngs->fwdflatbeam;
00578     pip = ngs->pip;
00579     best_silrc_score = WORST_SCORE;
00580     lwf = ngs->fwdflat_fwdtree_lw_ratio;
00581 
00582     /* Search for all words starting within a window of this frame.
00583      * These are the successors for words exiting now. */
00584     get_expand_wordlist(ngs, cf, ngs->max_sf_win);
00585 
00586     /* Scan words exited in current frame */
00587     for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
00588         bp = ngs->bp_table + b;
00589         ngs->word_lat_idx[bp->wid] = NO_BP;
00590 
00591         if (bp->wid == ps_search_finish_wid(ngs))
00592             continue;
00593 
00594         de = ps_search_dict(ngs)->dict_list[bp->wid];
00595         rcpermtab =
00596             (bp->r_diph >=
00597              0) ? ps_search_dict(ngs)->rcFwdPermTable[bp->r_diph] : ngs->zeroPermTab;
00598         rcss = ngs->bscore_stack + bp->s_idx;
00599 
00600         /* Transition to all successor words. */
00601         for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
00602             int32 n_used;
00603             w = ngs->expand_word_list[i];
00604             newde = ps_search_dict(ngs)->dict_list[w];
00605             /* Get the exit score we recorded in save_bwd_ptr(), or
00606              * something approximating it. */
00607             newscore = rcss[rcpermtab[newde->ci_phone_ids[0]]];
00608             /* FIXME: Floating point... */
00609             newscore += lwf
00610                 * ngram_tg_score(ngs->lmset, newde->wid, bp->real_wid,
00611                                  bp->prev_real_wid, &n_used);
00612             newscore += pip;
00613 
00614             /* Enter the next word */
00615             if (newscore > thresh) {
00616                 rhmm = (root_chan_t *) ngs->word_chan[w];
00617                 if ((hmm_frame(&rhmm->hmm) < cf)
00618                     || (hmm_in_score(&rhmm->hmm) < newscore)) {
00619                     hmm_enter(&rhmm->hmm, newscore, b, nf);
00620                     if (hmm_is_mpx(&rhmm->hmm)) {
00621                         rhmm->hmm.s.mpx_ssid[0] =
00622                             ps_search_dict(ngs)->lcFwdTable[rhmm->diphone]
00623                             [de->ci_phone_ids[de->len-1]];
00624                     }
00625 
00626                     bitvec_set(ngs->word_active, w);
00627                 }
00628             }
00629         }
00630 
00631         /* Get the best exit into silence. */
00632         if (best_silrc_score < rcss[rcpermtab[ps_search_acmod(ngs)->mdef->sil]]) {
00633             best_silrc_score = rcss[rcpermtab[ps_search_acmod(ngs)->mdef->sil]];
00634             best_silrc_bp = b;
00635         }
00636     }
00637 
00638     /* Transition to <sil> */
00639     newscore = best_silrc_score + ngs->silpen + pip;
00640     if ((newscore > thresh) && (newscore > WORST_SCORE)) {
00641         w = ps_search_silence_wid(ngs);
00642         rhmm = (root_chan_t *) ngs->word_chan[w];
00643         if ((hmm_frame(&rhmm->hmm) < cf)
00644             || (hmm_in_score(&rhmm->hmm) < newscore)) {
00645             hmm_enter(&rhmm->hmm, newscore,
00646                       best_silrc_bp, nf);
00647             bitvec_set(ngs->word_active, w);
00648         }
00649     }
00650     /* Transition to noise words */
00651     newscore = best_silrc_score + ngs->fillpen + pip;
00652     if ((newscore > thresh) && (newscore > WORST_SCORE)) {
00653         for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
00654             rhmm = (root_chan_t *) ngs->word_chan[w];
00655             /* Noise words that aren't a single phone will have NULL here. */
00656             if (rhmm == NULL)
00657                 continue;
00658             if ((hmm_frame(&rhmm->hmm) < cf)
00659                 || (hmm_in_score(&rhmm->hmm) < newscore)) {
00660                 hmm_enter(&rhmm->hmm, newscore,
00661                           best_silrc_bp, nf);
00662                 bitvec_set(ngs->word_active, w);
00663             }
00664         }
00665     }
00666 
00667     /* Reset initial channels of words that have become inactive even after word trans. */
00668     i = ngs->n_active_word[cf & 0x1];
00669     awl = ngs->active_word_list[cf & 0x1];
00670     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00671         rhmm = (root_chan_t *) ngs->word_chan[w];
00672         if (hmm_frame(&rhmm->hmm) == cf) {
00673             hmm_clear_scores(&rhmm->hmm);
00674         }
00675     }
00676 }
00677 
00678 static void
00679 fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00680 {
00681     root_chan_t *rhmm;
00682     chan_t *hmm;
00683     int32 i, cf, w, *awl;
00684 
00685     cf = frame_idx;
00686 
00687     /* Renormalize individual word channels */
00688     i = ngs->n_active_word[cf & 0x1];
00689     awl = ngs->active_word_list[cf & 0x1];
00690     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00691         rhmm = (root_chan_t *) ngs->word_chan[w];
00692         if (hmm_frame(&rhmm->hmm) == cf) {
00693             hmm_normalize(&rhmm->hmm, norm);
00694         }
00695         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00696             if (hmm_frame(&hmm->hmm) == cf) {
00697                 hmm_normalize(&hmm->hmm, norm);
00698             }
00699         }
00700     }
00701 
00702     ngs->renormalized = TRUE;
00703 }
00704 
00705 int
00706 ngram_fwdflat_search(ngram_search_t *ngs)
00707 {
00708     int16 const *senscr;
00709     int frame_idx, best_senid;
00710     int16 best_senscr;
00711     int32 nf, i, j;
00712     int32 *nawl;
00713 
00714     /* Determine if we actually have a frame to process. */
00715     if (ps_search_acmod(ngs)->n_feat_frame == 0)
00716         return 0;
00717 
00718     /* Activate our HMMs for the current frame if need be. */
00719     if (!ps_search_acmod(ngs)->compallsen)
00720         compute_fwdflat_sen_active(ngs, acmod_frame_idx(ps_search_acmod(ngs)));
00721 
00722     /* Compute GMM scores for the current frame. */
00723     senscr = acmod_score(ps_search_acmod(ngs), &frame_idx,
00724                          &best_senscr, &best_senid);
00725     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
00726 
00727     /* Mark backpointer table for current frame. */
00728     ngram_search_mark_bptable(ngs, frame_idx);
00729 
00730     /* Renormalize if necessary (FIXME: Make sure to test this) */
00731     if (ngs->best_score + (2 * ngs->beam) < WORST_SCORE) {
00732         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00733                frame_idx, ngs->best_score);
00734         fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score);
00735     }
00736 
00737     ngs->best_score = WORST_SCORE;
00738     hmm_context_set_senscore(ngs->hmmctx, senscr);
00739 
00740     /* Evaluate HMMs */
00741     fwdflat_eval_chan(ngs, frame_idx);
00742     /* Prune HMMs and do phone transitions. */
00743     fwdflat_prune_chan(ngs, frame_idx);
00744     /* Do word transitions. */
00745     fwdflat_word_transition(ngs, frame_idx);
00746 
00747     /* Create next active word list */
00748     nf = frame_idx + 1;
00749     nawl = ngs->active_word_list[nf & 0x1];
00750     for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00751         if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) {
00752             *(nawl++) = ngs->fwdflat_wordlist[i];
00753             j++;
00754         }
00755     }
00756     for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) {
00757         if (bitvec_is_set(ngs->word_active, i)) {
00758             *(nawl++) = i;
00759             j++;
00760         }
00761     }
00762     if (!ngs->fwdtree)
00763         ++ngs->n_frame;
00764     ngs->n_active_word[nf & 0x1] = j;
00765 
00766     /* Return the number of frames processed. */
00767     return 1;
00768 }
00769 
00773 static void
00774 destroy_fwdflat_wordlist(ngram_search_t *ngs)
00775 {
00776     ps_latnode_t *node, *tnode;
00777     int32 f;
00778 
00779     if (!ngs->fwdtree)
00780         return;
00781 
00782     for (f = 0; f < ngs->n_frame; f++) {
00783         for (node = ngs->frm_wordlist[f]; node; node = tnode) {
00784             tnode = node->next;
00785             listelem_free(ngs->latnode_alloc, node);
00786         }
00787     }
00788 }
00789 
00793 static void
00794 destroy_fwdflat_chan(ngram_search_t *ngs)
00795 {
00796     int32 i, wid;
00797     dict_entry_t *de;
00798 
00799     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00800         root_chan_t *rhmm;
00801         chan_t *thmm;
00802         wid = ngs->fwdflat_wordlist[i];
00803         de = ps_search_dict(ngs)->dict_list[wid];
00804 
00805         if (de->len == 1)
00806             continue;
00807 
00808         assert(de->mpx);
00809         assert(ngs->word_chan[wid] != NULL);
00810 
00811         /* The first HMM in ngs->word_chan[wid] was allocated with
00812          * ngs->root_chan_alloc, but this will attempt to free it
00813          * using ngs->chan_alloc, which will not work.  Therefore we
00814          * free it manually and move the list forward before handing
00815          * it off. */
00816         rhmm = (root_chan_t *)ngs->word_chan[wid];
00817         thmm = rhmm->next;
00818         listelem_free(ngs->root_chan_alloc, rhmm);
00819         ngs->word_chan[wid] = thmm;
00820         ngram_search_free_all_rc(ngs, wid);
00821     }
00822 }
00823 
00824 void
00825 ngram_fwdflat_finish(ngram_search_t *ngs)
00826 {
00827     int32 cf;
00828 
00829     destroy_fwdflat_chan(ngs);
00830     destroy_fwdflat_wordlist(ngs);
00831     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00832 
00833     /* This is the number of frames processed. */
00834     cf = acmod_frame_idx(ps_search_acmod(ngs));
00835     /* Add a mark in the backpointer table for one past the final frame. */
00836     ngram_search_mark_bptable(ngs, cf);
00837 
00838     /* Print out some statistics. */
00839     if (cf > 0) {
00840         E_INFO("%8d words recognized (%d/fr)\n",
00841                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
00842         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
00843                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
00844         E_INFO("%8d channels searched (%d/fr)\n",
00845                ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1));
00846         E_INFO("%8d words searched (%d/fr)\n",
00847                ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1));
00848         E_INFO("%8d word transitions (%d/fr)\n",
00849                ngs->st.n_fwdflat_word_transition,
00850                ngs->st.n_fwdflat_word_transition / (cf + 1));
00851     }
00852 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1