• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ngram_search_fwdtree.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 
00050 /* Local headers. */
00051 #include "ngram_search_fwdtree.h"
00052 
00053 /*
00054  * NOTE: this module assumes that the dictionary is organized as follows:
00055  *     Main, real dictionary words
00056  *     </s>
00057  *     <s>... (possibly more than one of these)
00058  *     <sil>
00059  *     noise-words...
00060  * In particular, note that </s> comes before <s> since </s> occurs in the LM, but
00061  * not <s> (well, there's no transition to <s> in the LM).
00062  *
00063  * This should probably be fixed at some point.
00064  */
00065 
00066 /* Turn this on to dump channels for debugging */
00067 #define __CHAN_DUMP__           0
00068 #if __CHAN_DUMP__
00069 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00070 #else
00071 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00072 #endif
00073 
00074 /*
00075  * Allocate that part of the search channel tree structure that is independent of the
00076  * LM in use.
00077  */
00078 static void
00079 init_search_tree(ngram_search_t *ngs)
00080 {
00081     int32 w, mpx, max_ph0, i, n_words, n_main_words;
00082     dict_entry_t *de;
00083 
00084     n_words = ps_search_n_words(ngs);
00085     n_main_words = dict_get_num_main_words(ps_search_dict(ngs));
00086     ngs->homophone_set = ckd_calloc(n_main_words, sizeof(*ngs->homophone_set));
00087 
00088     /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
00089     max_ph0 = -1;
00090     ngs->n_1ph_words = 0;
00091     mpx = ps_search_dict(ngs)->dict_list[0]->mpx;
00092     for (w = 0; w < n_main_words; w++) {
00093         de = ps_search_dict(ngs)->dict_list[w];
00094 
00095         /* Paranoia Central; this check can probably be removed (RKM) */
00096         if (de->mpx != mpx)
00097             E_FATAL("HMM tree words not all mpx or all non-mpx\n");
00098 
00099         if (de->len == 1)
00100             ngs->n_1ph_words++;
00101         else {
00102             if (max_ph0 < de->phone_ids[0])
00103                 max_ph0 = de->phone_ids[0];
00104         }
00105     }
00106 
00107     /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
00108     ngs->n_1ph_words += (n_words - n_main_words);
00109     ngs->n_root_chan_alloc = max_ph0 + 1;
00110     /* Verify that these are *actually* single-phone words, otherwise
00111      * really bad things will happen to us. */
00112     for (w = n_main_words; w < n_words; ++w) {
00113         de = ps_search_dict(ngs)->dict_list[w];
00114         if (de->len != 1) {
00115             E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
00116                    w, de->word);
00117             --ngs->n_1ph_words;
00118         }
00119     }
00120 
00121     /* Allocate and initialize root channels */
00122     ngs->root_chan =
00123         ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
00124     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00125         hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, mpx, -1, -1);
00126         ngs->root_chan[i].penult_phn_wid = -1;
00127         ngs->root_chan[i].next = NULL;
00128     }
00129 
00130     /* Allocate space for left-diphone -> root-chan map */
00131     ngs->first_phone_rchan_map =
00132         ckd_calloc(ngs->n_root_chan_alloc,
00133                    sizeof(*ngs->first_phone_rchan_map));
00134 
00135     /* Permanently allocate channels for single-phone words (1/word) */
00136     ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00137     i = 0;
00138     /* Don't allocate channels for placeholders!! */
00139     for (w = ps_search_dict(ngs)->last_dummy + 1; w < n_words; w++) {
00140         de = ps_search_dict(ngs)->dict_list[w];
00141         if (de->len != 1)
00142             continue;
00143 
00144         ngs->rhmm_1ph[i].diphone = de->phone_ids[0];
00145         ngs->rhmm_1ph[i].ciphone = de->ci_phone_ids[0];
00146         hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, de->mpx,
00147                  de->phone_ids[0], de->ci_phone_ids[0]);
00148         ngs->rhmm_1ph[i].next = NULL;
00149 
00150         ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00151         i++;
00152     }
00153 
00154     ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00155                                        sizeof(*ngs->single_phone_wid));
00156     E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00157            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00158 }
00159 
00160 /*
00161  * One-time initialization of internal channels in HMM tree.
00162  */
00163 static void
00164 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci)
00165 {
00166     hmm->next = NULL;
00167     hmm->alt = NULL;
00168     hmm->info.penult_phn_wid = -1;
00169     hmm->ciphone = ci;
00170     hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, ci);
00171 }
00172 
00173 /*
00174  * Allocate and initialize search channel-tree structure.
00175  * At this point, all the root-channels have been allocated and partly initialized
00176  * (as per init_search_tree()), and channels for all the single-phone words have been
00177  * allocated and initialized.  None of the interior channels of search-trees have
00178  * been allocated.
00179  * This routine may be called on every utterance, after reinit_search_tree() clears
00180  * the search tree created for the previous utterance.  Meant for reconfiguring the
00181  * search tree to suit the currently active LM.
00182  */
00183 static void
00184 create_search_tree(ngram_search_t *ngs)
00185 {
00186     dict_entry_t *de;
00187     chan_t *hmm;
00188     root_chan_t *rhmm;
00189     int32 w, i, j, p, ph;
00190     int32 n_words, n_main_words;
00191 
00192     n_words = ps_search_n_words(ngs);
00193     n_main_words = dict_get_num_main_words(ps_search_dict(ngs));
00194 
00195     E_INFO("Creating search tree\n");
00196 
00197     for (w = 0; w < n_main_words; w++)
00198         ngs->homophone_set[w] = -1;
00199 
00200     for (i = 0; i < ngs->n_root_chan_alloc; i++)
00201         ngs->first_phone_rchan_map[i] = -1;
00202 
00203     E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00204            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00205 
00206     ngs->n_1ph_LMwords = 0;
00207     ngs->n_root_chan = 0;
00208     ngs->n_nonroot_chan = 0;
00209 
00210     for (w = 0; w < n_main_words; w++) {
00211         de = ps_search_dict(ngs)->dict_list[w];
00212 
00213         /* Ignore dictionary words not in LM */
00214         if (!ngram_model_set_known_wid(ngs->lmset, de->wid))
00215             continue;
00216 
00217         /* Handle single-phone words individually; not in channel tree */
00218         if (de->len == 1) {
00219             ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
00220             continue;
00221         }
00222 
00223         /* Insert w into channel tree; first find or allocate root channel */
00224         if (ngs->first_phone_rchan_map[de->phone_ids[0]] < 0) {
00225             ngs->first_phone_rchan_map[de->phone_ids[0]] = ngs->n_root_chan;
00226             rhmm = &(ngs->root_chan[ngs->n_root_chan]);
00227             if (hmm_is_mpx(&rhmm->hmm))
00228                 rhmm->hmm.s.mpx_ssid[0] = de->phone_ids[0];
00229             else
00230                 rhmm->hmm.s.ssid = de->phone_ids[0];
00231             rhmm->hmm.tmatid = de->ci_phone_ids[0];
00232             rhmm->diphone = de->phone_ids[0];
00233             rhmm->ciphone = de->ci_phone_ids[0];
00234 
00235             ngs->n_root_chan++;
00236         }
00237         else
00238             rhmm = &(ngs->root_chan[ngs->first_phone_rchan_map[de->phone_ids[0]]]);
00239 
00240         /* Now, rhmm = root channel for w.  Go on to remaining phones */
00241         if (de->len == 2) {
00242             /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
00243             if ((j = rhmm->penult_phn_wid) < 0)
00244                 rhmm->penult_phn_wid = w;
00245             else {
00246                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00247                 ngs->homophone_set[j] = w;
00248             }
00249         }
00250         else {
00251             /* Add remaining phones, except the last, to tree */
00252             ph = de->phone_ids[1];
00253             hmm = rhmm->next;
00254             if (hmm == NULL) {
00255                 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
00256                 init_nonroot_chan(ngs, hmm, ph, de->ci_phone_ids[1]);
00257                 ngs->n_nonroot_chan++;
00258             }
00259             else {
00260                 chan_t *prev_hmm = NULL;
00261 
00262                 for (; hmm && (hmm->hmm.s.ssid != ph); hmm = hmm->alt)
00263                     prev_hmm = hmm;
00264                 if (!hmm) {     /* thanks, rkm! */
00265                     prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00266                     init_nonroot_chan(ngs, hmm, ph, de->ci_phone_ids[1]);
00267                     ngs->n_nonroot_chan++;
00268                 }
00269             }
00270             /* de->phone_ids[1] now in tree; pointed to by hmm */
00271 
00272             for (p = 2; p < de->len - 1; p++) {
00273                 ph = de->phone_ids[p];
00274                 if (!hmm->next) {
00275                     hmm->next = listelem_malloc(ngs->chan_alloc);
00276                     hmm = hmm->next;
00277                     init_nonroot_chan(ngs, hmm, ph, de->ci_phone_ids[p]);
00278                     ngs->n_nonroot_chan++;
00279                 }
00280                 else {
00281                     chan_t *prev_hmm = NULL;
00282 
00283                     for (hmm = hmm->next; hmm && (hmm->hmm.s.ssid != ph);
00284                          hmm = hmm->alt)
00285                         prev_hmm = hmm;
00286                     if (!hmm) { /* thanks, rkm! */
00287                         prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00288                         init_nonroot_chan(ngs, hmm, ph, de->ci_phone_ids[p]);
00289                         ngs->n_nonroot_chan++;
00290                     }
00291                 }
00292             }
00293 
00294             /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
00295             if ((j = hmm->info.penult_phn_wid) < 0)
00296                 hmm->info.penult_phn_wid = w;
00297             else {
00298                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00299                 ngs->homophone_set[j] = w;
00300             }
00301         }
00302     }
00303 
00304     /* FIXME: This depends on the ordering of the dictionary and is
00305      * thus rather fragile... */
00306     ngs->n_1ph_words = ngs->n_1ph_LMwords;
00307     ngs->n_1ph_LMwords++;            /* including </s> */
00308 
00309     for (w = n_main_words; w < n_words; ++w) {
00310         de = ps_search_dict(ngs)->dict_list[w];
00311         /* Skip anything that doesn't actually have a single phone. */
00312         if (de->len != 1)
00313             continue;
00314         /* If something isn't a filler word (which is very unlikely),
00315          * then skip it if it isn't in the language model. */
00316         if ((!ISA_FILLER_WORD(ngs, w))
00317             && (!ngram_model_set_known_wid(ngs->lmset, de->wid)))
00318             continue;
00319         ngs->single_phone_wid[ngs->n_1ph_words++] = w;
00320     }
00321 
00322     if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
00323         /* Give some room for channels for new words added dynamically at run time */
00324         ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
00325         E_INFO("max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
00326 
00327         /* Free old active channel list array if any and allocate new one */
00328         if (ngs->active_chan_list)
00329             ckd_free_2d(ngs->active_chan_list);
00330         ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
00331                                               sizeof(**ngs->active_chan_list));
00332     }
00333 
00334     E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00335            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00336 }
00337 
00338 static void
00339 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
00340 {
00341     chan_t *child, *sibling;
00342 
00343     /* First free all children under hmm */
00344     for (child = hmm->next; child; child = sibling) {
00345         sibling = child->alt;
00346         reinit_search_subtree(ngs, child);
00347     }
00348 
00349     /* Now free hmm */
00350     hmm_deinit(&hmm->hmm);
00351     listelem_free(ngs->chan_alloc, hmm);
00352 }
00353 
00354 /*
00355  * Delete search tree by freeing all interior channels within search tree and
00356  * restoring root channel state to the init state (i.e., just after init_search_tree()).
00357  */
00358 static void
00359 reinit_search_tree(ngram_search_t *ngs)
00360 {
00361     int32 i;
00362     chan_t *hmm, *sibling;
00363 
00364     for (i = 0; i < ngs->n_root_chan; i++) {
00365         hmm = ngs->root_chan[i].next;
00366 
00367         while (hmm) {
00368             sibling = hmm->alt;
00369             reinit_search_subtree(ngs, hmm);
00370             hmm = sibling;
00371         }
00372 
00373         ngs->root_chan[i].penult_phn_wid = -1;
00374         ngs->root_chan[i].next = NULL;
00375     }
00376     ngs->n_nonroot_chan = 0;
00377 }
00378 
00379 void
00380 ngram_fwdtree_init(ngram_search_t *ngs)
00381 {
00382     /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
00383     ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
00384                                 sizeof(*ngs->bestbp_rc));
00385     ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00386                                    sizeof(*ngs->lastphn_cand));
00387     init_search_tree(ngs);
00388     create_search_tree(ngs);
00389 }
00390 
00391 void
00392 ngram_fwdtree_deinit(ngram_search_t *ngs)
00393 {
00394     int i, w, n_words;
00395 
00396     n_words = ps_search_n_words(ngs);
00397     /* Reset non-root channels. */
00398     reinit_search_tree(ngs);
00399 
00400     /* Now deallocate all the root channels too. */
00401     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00402         hmm_deinit(&ngs->root_chan[i].hmm);
00403     }
00404     if (ngs->rhmm_1ph) {
00405         for (i = w = 0; w < n_words; ++w) {
00406             if (ps_search_dict(ngs)->dict_list[w]->len != 1)
00407                 continue;
00408             hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00409             ++i;
00410         }
00411         ckd_free(ngs->rhmm_1ph);
00412     }
00413     ngs->n_nonroot_chan = 0;
00414     ckd_free(ngs->first_phone_rchan_map);
00415     ckd_free(ngs->root_chan);
00416     ckd_free(ngs->homophone_set);
00417     ckd_free(ngs->single_phone_wid);
00418     ngs->max_nonroot_chan = 0;
00419     ckd_free_2d(ngs->active_chan_list);
00420     ckd_free(ngs->cand_sf);
00421     ckd_free(ngs->bestbp_rc);
00422     ckd_free(ngs->lastphn_cand);
00423 }
00424 
00425 int
00426 ngram_fwdtree_reinit(ngram_search_t *ngs)
00427 {
00428     reinit_search_tree(ngs);
00429     create_search_tree(ngs);
00430     return 0;
00431 }
00432 
00433 void
00434 ngram_fwdtree_start(ngram_search_t *ngs)
00435 {
00436     ps_search_t *base = (ps_search_t *)ngs;
00437     int32 i, w, n_words;
00438     root_chan_t *rhmm;
00439 
00440     n_words = ps_search_n_words(ngs);
00441 
00442     /* Reset utterance statistics. */
00443     memset(&ngs->st, 0, sizeof(ngs->st));
00444 
00445     /* Reset backpointer table. */
00446     ngs->bpidx = 0;
00447     ngs->bss_head = 0;
00448 
00449     /* Reset word lattice. */
00450     for (i = 0; i < n_words; ++i)
00451         ngs->word_lat_idx[i] = NO_BP;
00452 
00453     /* Reset active HMM and word lists. */
00454     ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
00455     ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
00456 
00457     /* Reset scores. */
00458     ngs->best_score = 0;
00459     ngs->renormalized = 0;
00460 
00461     /* Reset other stuff. */
00462     for (i = 0; i < n_words; i++)
00463         ngs->last_ltrans[i].sf = -1;
00464     ngs->n_frame = 0;
00465 
00466     /* Clear the hypothesis string. */
00467     ckd_free(base->hyp_str);
00468     base->hyp_str = NULL;
00469 
00470     /* Reset the permanently allocated single-phone words, since they
00471      * may have junk left over in them from FWDFLAT. */
00472     for (i = 0; i < ngs->n_1ph_words; i++) {
00473         w = ngs->single_phone_wid[i];
00474         rhmm = (root_chan_t *) ngs->word_chan[w];
00475         hmm_clear(&rhmm->hmm);
00476     }
00477 
00478     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00479     rhmm = (root_chan_t *) ngs->word_chan[dict_to_id(ps_search_dict(ngs), "<s>")];
00480     hmm_clear(&rhmm->hmm);
00481     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00482 }
00483 
00484 /*
00485  * Mark the active senones for all senones belonging to channels that are active in the
00486  * current frame.
00487  */
00488 static void
00489 compute_sen_active(ngram_search_t *ngs, int frame_idx)
00490 {
00491     root_chan_t *rhmm;
00492     chan_t *hmm, **acl;
00493     int32 i, w, *awl;
00494 
00495     acmod_clear_active(ps_search_acmod(ngs));
00496 
00497     /* Flag active senones for root channels */
00498     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00499         if (hmm_frame(&rhmm->hmm) == frame_idx)
00500             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00501     }
00502 
00503     /* Flag active senones for nonroot channels in HMM tree */
00504     i = ngs->n_active_chan[frame_idx & 0x1];
00505     acl = ngs->active_chan_list[frame_idx & 0x1];
00506     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00507         acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00508     }
00509 
00510     /* Flag active senones for individual word channels */
00511     i = ngs->n_active_word[frame_idx & 0x1];
00512     awl = ngs->active_word_list[frame_idx & 0x1];
00513     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00514         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00515             acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00516         }
00517     }
00518     for (i = 0; i < ngs->n_1ph_words; i++) {
00519         w = ngs->single_phone_wid[i];
00520         rhmm = (root_chan_t *) ngs->word_chan[w];
00521 
00522         if (hmm_frame(&rhmm->hmm) == frame_idx)
00523             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00524     }
00525 }
00526 
00527 static void
00528 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00529 {
00530     root_chan_t *rhmm;
00531     chan_t *hmm, **acl;
00532     int32 i, w, *awl;
00533 
00534     /* Renormalize root channels */
00535     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00536         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00537             hmm_normalize(&rhmm->hmm, norm);
00538         }
00539     }
00540 
00541     /* Renormalize nonroot channels in HMM tree */
00542     i = ngs->n_active_chan[frame_idx & 0x1];
00543     acl = ngs->active_chan_list[frame_idx & 0x1];
00544     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00545         hmm_normalize(&hmm->hmm, norm);
00546     }
00547 
00548     /* Renormalize individual word channels */
00549     i = ngs->n_active_word[frame_idx & 0x1];
00550     awl = ngs->active_word_list[frame_idx & 0x1];
00551     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00552         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00553             hmm_normalize(&hmm->hmm, norm);
00554         }
00555     }
00556     for (i = 0; i < ngs->n_1ph_words; i++) {
00557         w = ngs->single_phone_wid[i];
00558         rhmm = (root_chan_t *) ngs->word_chan[w];
00559         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00560             hmm_normalize(&rhmm->hmm, norm);
00561         }
00562     }
00563 
00564     ngs->renormalized = TRUE;
00565 }
00566 
00567 static int32
00568 eval_root_chan(ngram_search_t *ngs, int frame_idx)
00569 {
00570     root_chan_t *rhmm;
00571     int32 i, bestscore;
00572 
00573     bestscore = WORST_SCORE;
00574     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00575         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00576             int32 score = chan_v_eval(rhmm);
00577             if (bestscore < score)
00578                 bestscore = score;
00579             ++ngs->st.n_root_chan_eval;
00580         }
00581     }
00582     return (bestscore);
00583 }
00584 
00585 static int32
00586 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00587 {
00588     chan_t *hmm, **acl;
00589     int32 i, bestscore;
00590 
00591     i = ngs->n_active_chan[frame_idx & 0x1];
00592     acl = ngs->active_chan_list[frame_idx & 0x1];
00593     bestscore = WORST_SCORE;
00594     ngs->st.n_nonroot_chan_eval += i;
00595 
00596     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00597         int32 score = chan_v_eval(hmm);
00598         assert(hmm_frame(&hmm->hmm) == frame_idx);
00599         if (bestscore < score)
00600             bestscore = score;
00601     }
00602 
00603     return bestscore;
00604 }
00605 
00606 static int32
00607 eval_word_chan(ngram_search_t *ngs, int frame_idx)
00608 {
00609     root_chan_t *rhmm;
00610     chan_t *hmm;
00611     int32 i, w, bestscore, *awl, j, k;
00612 
00613     k = 0;
00614     bestscore = WORST_SCORE;
00615     awl = ngs->active_word_list[frame_idx & 0x1];
00616 
00617     i = ngs->n_active_word[frame_idx & 0x1];
00618     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00619         assert(bitvec_is_set(ngs->word_active, w));
00620         bitvec_clear(ngs->word_active, w);
00621         assert(ngs->word_chan[w] != NULL);
00622 
00623         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00624             int32 score;
00625 
00626             assert(hmm_frame(&hmm->hmm) == frame_idx);
00627             score = chan_v_eval(hmm);
00628             /*printf("eval word chan %d score %d\n", w, score); */
00629 
00630             if (bestscore < score)
00631                 bestscore = score;
00632 
00633             k++;
00634         }
00635     }
00636 
00637     /* Similarly for statically allocated single-phone words */
00638     j = 0;
00639     for (i = 0; i < ngs->n_1ph_words; i++) {
00640         int32 score;
00641 
00642         w = ngs->single_phone_wid[i];
00643         rhmm = (root_chan_t *) ngs->word_chan[w];
00644         if (hmm_frame(&rhmm->hmm) < frame_idx)
00645             continue;
00646 
00647         score = chan_v_eval(rhmm);
00648         /* printf("eval 1ph word chan %d score %d\n", w, score); */
00649         if (bestscore < score && w != ps_search_finish_wid(ngs))
00650             bestscore = score;
00651 
00652         j++;
00653     }
00654 
00655     ngs->st.n_last_chan_eval += k + j;
00656     ngs->st.n_nonroot_chan_eval += k + j;
00657     ngs->st.n_word_lastchan_eval +=
00658         ngs->n_active_word[frame_idx & 0x1] + j;
00659 
00660     return bestscore;
00661 }
00662 
00663 static int32
00664 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
00665 {
00666     int32 bs;
00667 
00668     hmm_context_set_senscore(ngs->hmmctx, senone_scores);
00669     ngs->best_score = eval_root_chan(ngs, frame_idx);
00670     if ((bs = eval_nonroot_chan(ngs, frame_idx)) > ngs->best_score)
00671         ngs->best_score = bs;
00672     if ((bs = eval_word_chan(ngs, frame_idx)) > ngs->best_score)
00673         ngs->best_score = bs;
00674     ngs->last_phone_best_score = bs;
00675 
00676     return ngs->best_score;
00677 }
00678 
00679 /*
00680  * Prune currently active root channels for next frame.  Also, perform exit
00681  * transitions out of them and activate successors.
00682  * score[] of pruned root chan set to WORST_SCORE elsewhere.
00683  */
00684 static void
00685 prune_root_chan(ngram_search_t *ngs, int frame_idx)
00686 {
00687     root_chan_t *rhmm;
00688     chan_t *hmm;
00689     int32 i, nf, w;
00690     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00691     chan_t **nacl;              /* next active list */
00692     lastphn_cand_t *candp;
00693 
00694     nf = frame_idx + 1;
00695     thresh = ngs->best_score + ngs->dynamic_beam;
00696     newphone_thresh = ngs->best_score + ngs->pbeam;
00697     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00698     nacl = ngs->active_chan_list[nf & 0x1];
00699 
00700     for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
00701         /* First check if this channel was active in current frame */
00702         if (hmm_frame(&rhmm->hmm) < frame_idx)
00703             continue;
00704 
00705         if (hmm_bestscore(&rhmm->hmm) > thresh) {
00706             hmm_frame(&rhmm->hmm) = nf;  /* rhmm will be active in next frame */
00707 
00708             /* transitions out of this root channel */
00709             newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
00710             if (newphone_score > newphone_thresh) {
00711                 /* transition to all next-level channels in the HMM tree */
00712                 for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
00713                     if ((hmm_frame(&hmm->hmm) < frame_idx)
00714                         || (hmm_in_score(&hmm->hmm)
00715                             < newphone_score)) {
00716                         hmm_enter(&hmm->hmm, newphone_score,
00717                                   hmm_out_history(&rhmm->hmm), nf);
00718                         *(nacl++) = hmm;
00719                     }
00720                 }
00721 
00722                 /*
00723                  * Transition to last phone of all words for which this is the
00724                  * penultimate phone (the last phones may need multiple right contexts).
00725                  * Remember to remove the temporary newword_penalty.
00726                  */
00727                 if (newphone_score > lastphn_thresh) {
00728                     for (w = rhmm->penult_phn_wid; w >= 0;
00729                          w = ngs->homophone_set[w]) {
00730                         candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00731                         ngs->n_lastphn_cand++;
00732                         candp->wid = w;
00733                         candp->score =
00734                             newphone_score - ngs->nwpen;
00735                         candp->bp = hmm_out_history(&rhmm->hmm);
00736                     }
00737                 }
00738             }
00739         }
00740     }
00741     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00742 }
00743 
00744 /*
00745  * Prune currently active nonroot channels in HMM tree for next frame.  Also, perform
00746  * exit transitions out of such channels and activate successors.
00747  */
00748 static void
00749 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00750 {
00751     chan_t *hmm, *nexthmm;
00752     int32 nf, w, i;
00753     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00754     chan_t **acl, **nacl;       /* active list, next active list */
00755     lastphn_cand_t *candp;
00756     dict_entry_t *de;
00757 
00758     nf = frame_idx + 1;
00759 
00760     thresh = ngs->best_score + ngs->dynamic_beam;
00761     newphone_thresh = ngs->best_score + ngs->pbeam;
00762     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00763 
00764     acl = ngs->active_chan_list[frame_idx & 0x1];   /* currently active HMMs in tree */
00765     nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
00766 
00767     for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
00768          --i, hmm = *(acl++)) {
00769         assert(hmm_frame(&hmm->hmm) >= frame_idx);
00770 
00771         if (hmm_bestscore(&hmm->hmm) > thresh) {
00772             /* retain this channel in next frame */
00773             if (hmm_frame(&hmm->hmm) != nf) {
00774                 hmm_frame(&hmm->hmm) = nf;
00775                 *(nacl++) = hmm;
00776             }
00777 
00778             /* transitions out of this channel */
00779             newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
00780             if (newphone_score > newphone_thresh) {
00781                 /* transition to all next-level channel in the HMM tree */
00782                 for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
00783                     if ((hmm_frame(&nexthmm->hmm) < frame_idx)
00784                         || (hmm_in_score(&nexthmm->hmm) < newphone_score)) {
00785                         if (hmm_frame(&nexthmm->hmm) != nf) {
00786                             /* Keep this HMM on the active list */
00787                             *(nacl++) = nexthmm;
00788                         }
00789                         hmm_enter(&nexthmm->hmm, newphone_score,
00790                                   hmm_out_history(&hmm->hmm), nf);
00791                     }
00792                 }
00793 
00794                 /*
00795                  * Transition to last phone of all words for which this is the
00796                  * penultimate phone (the last phones may need multiple right contexts).
00797                  * Remember to remove the temporary newword_penalty.
00798                  */
00799                 if (newphone_score > lastphn_thresh) {
00800                     for (w = hmm->info.penult_phn_wid; w >= 0;
00801                          w = ngs->homophone_set[w]) {
00802                         de = ps_search_dict(ngs)->dict_list[w];
00803                         candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00804                         ngs->n_lastphn_cand++;
00805                         candp->wid = w;
00806                         candp->score =
00807                             newphone_score - ngs->nwpen;
00808                         candp->bp = hmm_out_history(&hmm->hmm);
00809                     }
00810                 }
00811             }
00812         }
00813         else if (hmm_frame(&hmm->hmm) != nf) {
00814             hmm_clear_scores(&hmm->hmm);
00815         }
00816     }
00817     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00818 }
00819 
00820 /*
00821  * Execute the transition into the last phone for all candidates words emerging from
00822  * the HMM tree.  Attach LM scores to such transitions.
00823  * (Executed after pruning root and non-root, but before pruning word-chan.)
00824  */
00825 static void
00826 last_phone_transition(ngram_search_t *ngs, int frame_idx)
00827 {
00828     int32 i, j, k, nf, bp, bplast, w;
00829     lastphn_cand_t *candp;
00830     int32 *nawl;
00831     int32 thresh;
00832     uint16 *rcpermtab, ciph0;
00833     int32 bestscore, dscr;
00834     dict_entry_t *de;
00835     chan_t *hmm;
00836     bptbl_t *bpe;
00837     int32 n_cand_sf = 0;
00838 
00839     nf = frame_idx + 1;
00840     nawl = ngs->active_word_list[nf & 0x1];
00841     ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
00842 
00843     /* For each candidate word (entering its last phone) */
00844     /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
00845     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00846         /* Backpointer entry for it. */
00847         bpe = &(ngs->bp_table[candp->bp]);
00848         /* Right context phone table. */
00849         rcpermtab =
00850             (bpe->r_diph >= 0)
00851             ? ps_search_dict(ngs)->rcFwdPermTable[bpe->r_diph]
00852             : ngs->zeroPermTab;
00853 
00854         /* Subtract starting score for candidate, leave it with only word score */
00855         de = ps_search_dict(ngs)->dict_list[candp->wid];
00856         ciph0 = de->ci_phone_ids[0];
00857         candp->score -= ngs->bscore_stack[bpe->s_idx + rcpermtab[ciph0]];
00858 
00859         /*
00860          * If this candidate not occurred in an earlier frame, prepare for finding
00861          * best transition score into last phone; sort by start frame.
00862          */
00863         /* i.e. if we don't have an entry in last_ltrans for this
00864          * <word,sf>, then create one */
00865         if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
00866             /* Look for an entry in cand_sf matching the backpointer
00867              * for this candidate. */
00868             for (j = 0; j < n_cand_sf; j++) {
00869                 if (ngs->cand_sf[j].bp_ef == bpe->frame)
00870                     break;
00871             }
00872             /* Oh, we found one, so chain onto it. */
00873             if (j < n_cand_sf)
00874                 candp->next = ngs->cand_sf[j].cand;
00875             else {
00876                 /* Nope, let's make a new one, allocating cand_sf if necessary. */
00877                 if (n_cand_sf >= ngs->cand_sf_alloc) {
00878                     if (ngs->cand_sf_alloc == 0) {
00879                         ngs->cand_sf =
00880                             ckd_calloc(CAND_SF_ALLOCSIZE,
00881                                        sizeof(*ngs->cand_sf));
00882                         ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
00883                     }
00884                     else {
00885                         ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
00886                         ngs->cand_sf = ckd_realloc(ngs->cand_sf,
00887                                                    ngs->cand_sf_alloc
00888                                                    * sizeof(*ngs->cand_sf));
00889                         E_INFO("cand_sf[] increased to %d entries\n",
00890                                ngs->cand_sf_alloc);
00891                     }
00892                 }
00893 
00894                 /* Use the newly created cand_sf. */
00895                 j = n_cand_sf++;
00896                 candp->next = -1; /* End of the chain. */
00897                 ngs->cand_sf[j].bp_ef = bpe->frame;
00898             }
00899             /* Update it to point to this candidate. */
00900             ngs->cand_sf[j].cand = i;
00901 
00902             ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
00903             ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
00904         }
00905     }
00906 
00907     /* Compute best LM score and bp for new cands entered in the sorted lists above */
00908     for (i = 0; i < n_cand_sf; i++) {
00909         /* For the i-th unique end frame... */
00910         bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
00911         bplast = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1] - 1;
00912 
00913         for (bpe = &(ngs->bp_table[bp]); bp <= bplast; bp++, bpe++) {
00914             if (!bpe->valid)
00915                 continue;
00916 
00917             /* For each bp entry in the i-th end frame... */
00918             rcpermtab = (bpe->r_diph >= 0) ?
00919                 ps_search_dict(ngs)->rcFwdPermTable[bpe->r_diph] : ngs->zeroPermTab;
00920 
00921             /* For each candidate at the start frame find bp->cand transition-score */
00922             for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
00923                 int32 n_used;
00924                 candp = &(ngs->lastphn_cand[j]);
00925                 de = ps_search_dict(ngs)->dict_list[candp->wid];
00926                 ciph0 = de->ci_phone_ids[0];
00927 
00928                 dscr = ngs->bscore_stack[bpe->s_idx + rcpermtab[ciph0]];
00929                 dscr += ngram_tg_score(ngs->lmset, de->wid, bpe->real_wid,
00930                                        bpe->prev_real_wid, &n_used);
00931 
00932                 if (ngs->last_ltrans[candp->wid].dscr < dscr) {
00933                     ngs->last_ltrans[candp->wid].dscr = dscr;
00934                     ngs->last_ltrans[candp->wid].bp = bp;
00935                 }
00936             }
00937         }
00938     }
00939 
00940     /* Update best transitions for all candidates; also update best lastphone score */
00941     bestscore = ngs->last_phone_best_score;
00942     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00943         candp->score += ngs->last_ltrans[candp->wid].dscr;
00944         candp->bp = ngs->last_ltrans[candp->wid].bp;
00945 
00946         if (bestscore < candp->score)
00947             bestscore = candp->score;
00948     }
00949     ngs->last_phone_best_score = bestscore;
00950 
00951     /* At this pt, we know the best entry score (with LM component) for all candidates */
00952     thresh = bestscore + ngs->lponlybeam;
00953     for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
00954         if (candp->score > thresh) {
00955             w = candp->wid;
00956 
00957             ngram_search_alloc_all_rc(ngs, w);
00958 
00959             k = 0;
00960             for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00961                 if ((hmm_frame(&hmm->hmm) < frame_idx)
00962                     || (hmm_in_score(&hmm->hmm) < candp->score)) {
00963                     assert(hmm_frame(&hmm->hmm) != nf);
00964                     hmm_enter(&hmm->hmm,
00965                               candp->score, candp->bp, nf);
00966                     k++;
00967                 }
00968             }
00969             if (k > 0) {
00970                 assert(bitvec_is_clear(ngs->word_active, w));
00971                 assert(ps_search_dict(ngs)->dict_list[w]->len > 1);
00972                 *(nawl++) = w;
00973                 bitvec_set(ngs->word_active, w);
00974             }
00975         }
00976     }
00977     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
00978 }
00979 
00980 /*
00981  * Prune currently active word channels for next frame.  Also, perform exit
00982  * transitions out of such channels and active successors.
00983  */
00984 static void
00985 prune_word_chan(ngram_search_t *ngs, int frame_idx)
00986 {
00987     root_chan_t *rhmm;
00988     chan_t *hmm, *thmm;
00989     chan_t **phmmp;             /* previous HMM-pointer */
00990     int32 nf, w, i, k;
00991     int32 newword_thresh, lastphn_thresh;
00992     int32 *awl, *nawl;
00993 
00994     nf = frame_idx + 1;
00995     newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
00996     lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
00997 
00998     awl = ngs->active_word_list[frame_idx & 0x1];
00999     nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
01000 
01001     /* Dynamically allocated last channels of multi-phone words */
01002     for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
01003          --i, w = *(awl++)) {
01004         k = 0;
01005         phmmp = &(ngs->word_chan[w]);
01006         for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
01007             assert(hmm_frame(&hmm->hmm) >= frame_idx);
01008 
01009             thmm = hmm->next;
01010             if (hmm_bestscore(&hmm->hmm) > lastphn_thresh) {
01011                 /* retain this channel in next frame */
01012                 hmm_frame(&hmm->hmm) = nf;
01013                 k++;
01014                 phmmp = &(hmm->next);
01015 
01016                 /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01017                 if (hmm_out_score(&hmm->hmm) > newword_thresh) {
01018                     /* can exit channel and recognize word */
01019                     ngram_search_save_bp(ngs, frame_idx, w,
01020                                  hmm_out_score(&hmm->hmm),
01021                                  hmm_out_history(&hmm->hmm),
01022                                  hmm->info.rc_id);
01023                 }
01024             }
01025             else if (hmm_frame(&hmm->hmm) == nf) {
01026                 phmmp = &(hmm->next);
01027             }
01028             else {
01029                 hmm_deinit(&hmm->hmm);
01030                 listelem_free(ngs->chan_alloc, hmm);
01031                 *phmmp = thmm;
01032             }
01033         }
01034         if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
01035             assert(ps_search_dict(ngs)->dict_list[w]->len > 1);
01036             *(nawl++) = w;
01037             bitvec_set(ngs->word_active, w);
01038         }
01039     }
01040     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01041 
01042     /*
01043      * Prune permanently allocated single-phone channels.
01044      * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
01045      */
01046     for (i = 0; i < ngs->n_1ph_words; i++) {
01047         w = ngs->single_phone_wid[i];
01048         rhmm = (root_chan_t *) ngs->word_chan[w];
01049         if (hmm_frame(&rhmm->hmm) < frame_idx)
01050             continue;
01051         if (hmm_bestscore(&rhmm->hmm) > lastphn_thresh) {
01052             hmm_frame(&rhmm->hmm) = nf;
01053 
01054             /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01055             if (hmm_out_score(&rhmm->hmm) > newword_thresh) {
01056                 ngram_search_save_bp(ngs, frame_idx, w,
01057                              hmm_out_score(&rhmm->hmm),
01058                              hmm_out_history(&rhmm->hmm), 0);
01059             }
01060         }
01061     }
01062 }
01063 
01064 static void
01065 prune_channels(ngram_search_t *ngs, int frame_idx)
01066 {
01067     /* Clear last phone candidate list. */
01068     ngs->n_lastphn_cand = 0;
01069     /* Set the dynamic beam based on maxhmmpf here. */
01070     ngs->dynamic_beam = ngs->beam;
01071     if (ngs->maxhmmpf != -1
01072         && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
01073         /* Build a histogram to approximately prune them. */
01074         int32 bins[256], bw, nhmms, i;
01075         root_chan_t *rhmm;
01076         chan_t **acl, *hmm;
01077 
01078         /* Bins go from zero (best score) to edge of beam. */
01079         bw = -ngs->beam / 256;
01080         memset(bins, 0, sizeof(bins));
01081         /* For each active root channel. */
01082         for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
01083             int32 b;
01084 
01085             /* Put it in a bin according to its bestscore. */
01086             b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
01087             if (b >= 256)
01088                 b = 255;
01089             ++bins[b];
01090         }
01091         /* For each active non-root channel. */
01092         acl = ngs->active_chan_list[frame_idx & 0x1];       /* currently active HMMs in tree */
01093         for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
01094              i > 0; --i, hmm = *(acl++)) {
01095             int32 b;
01096 
01097             /* Put it in a bin according to its bestscore. */
01098             b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
01099             if (b >= 256)
01100                 b = 255;
01101             ++bins[b];
01102         }
01103         /* Walk down the bins to find the new beam. */
01104         for (i = nhmms = 0; i < 256; ++i) {
01105             nhmms += bins[i];
01106             if (nhmms > ngs->maxhmmpf)
01107                 break;
01108         }
01109         ngs->dynamic_beam = -(i * bw);
01110     }
01111 
01112     prune_root_chan(ngs, frame_idx);
01113     prune_nonroot_chan(ngs, frame_idx);
01114     last_phone_transition(ngs, frame_idx);
01115     prune_word_chan(ngs, frame_idx);
01116 }
01117 
01118 /*
01119  * Limit the number of word exits in each frame to maxwpf.  And also limit the number of filler
01120  * words to 1.
01121  */
01122 static void
01123 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
01124 {
01125     int32 bp, n;
01126     int32 bestscr, worstscr;
01127     bptbl_t *bpe, *bestbpe, *worstbpe;
01128 
01129     /* Don't prune if no pruing. */
01130     if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
01131         return;
01132 
01133     /* Allow only one filler word exit (the best) per frame */
01134     bestscr = (int32) 0x80000000;
01135     bestbpe = NULL;
01136     n = 0;
01137     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01138         bpe = &(ngs->bp_table[bp]);
01139         /* FIXME: Not the ideal way to tell if this is a filler word. */
01140         if (ISA_FILLER_WORD(ngs, bpe->wid)) {
01141             if (bpe->score > bestscr) {
01142                 bestscr = bpe->score;
01143                 bestbpe = bpe;
01144             }
01145             bpe->valid = FALSE; /* Flag to indicate invalidation */
01146             n++;                /* No. of filler words */
01147         }
01148     }
01149     /* Restore bestbpe to valid state */
01150     if (bestbpe != NULL) {
01151         bestbpe->valid = TRUE;
01152         --n;
01153     }
01154 
01155     /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
01156     n = (ngs->bpidx
01157          - ngs->bp_table_idx[frame_idx]) - n;  /* No. of entries after limiting fillers */
01158     for (; n > ngs->maxwpf; --n) {
01159         /* Find worst BPTable entry */
01160         worstscr = (int32) 0x7fffffff;
01161         worstbpe = NULL;
01162         for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
01163             bpe = &(ngs->bp_table[bp]);
01164             if (bpe->valid && (bpe->score < worstscr)) {
01165                 worstscr = bpe->score;
01166                 worstbpe = bpe;
01167             }
01168         }
01169         /* FIXME: Don't panic! */
01170         if (worstbpe == NULL)
01171             E_FATAL("PANIC: No worst BPtable entry remaining\n");
01172         worstbpe->valid = 0;
01173     }
01174 }
01175 
01176 static void
01177 word_transition(ngram_search_t *ngs, int frame_idx)
01178 {
01179     int32 i, k, bp, w, nf;
01180     int32 rc;
01181     int32 *rcss;                /* right context score stack */
01182     uint16 *rcpermtab;
01183     int32 thresh, newscore;
01184     bptbl_t *bpe;
01185     dict_entry_t *pde, *de;     /* previous dict entry, dict entry */
01186     root_chan_t *rhmm;
01187     struct bestbp_rc_s *bestbp_rc_ptr;
01188     int32 last_ciph;
01189     int32 ssid;
01190 
01191     /*
01192      * Transition to start of new word instances (HMM tree roots); but only if words
01193      * other than </s> finished here.
01194      * But, first, find the best starting score for each possible right context phone.
01195      */
01196     for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
01197         ngs->bestbp_rc[i].score = WORST_SCORE;
01198     k = 0;
01199     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01200         bpe = &(ngs->bp_table[bp]);
01201         ngs->word_lat_idx[bpe->wid] = NO_BP;
01202 
01203         if (bpe->wid == ps_search_finish_wid(ngs))
01204             continue;
01205         k++;
01206 
01207         de = ps_search_dict(ngs)->dict_list[bpe->wid];
01208         rcpermtab =
01209             (bpe->r_diph >= 0)
01210             ? ps_search_dict(ngs)->rcFwdPermTable[bpe->r_diph]
01211             : ngs->zeroPermTab;
01212         last_ciph = de->ci_phone_ids[de->len - 1];
01213 
01214         rcss = &(ngs->bscore_stack[bpe->s_idx]);
01215         for (rc = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; rc >= 0; --rc) {
01216             if (rcss[rcpermtab[rc]] > ngs->bestbp_rc[rc].score) {
01217                 ngs->bestbp_rc[rc].score = rcss[rcpermtab[rc]];
01218                 ngs->bestbp_rc[rc].path = bp;
01219                 ngs->bestbp_rc[rc].lc = last_ciph;
01220             }
01221         }
01222     }
01223     if (k == 0)
01224         return;
01225 
01226     nf = frame_idx + 1;
01227     thresh = ngs->best_score + ngs->dynamic_beam;
01228     /*
01229      * Hypothesize successors to words finished in this frame.
01230      * Main dictionary, multi-phone words transition to HMM-trees roots.
01231      */
01232     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01233         bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
01234 
01235         newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
01236         if (newscore > thresh) {
01237             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01238                 || (hmm_in_score(&rhmm->hmm) < newscore)) {
01239                 ssid =
01240                     ps_search_dict(ngs)->lcFwdTable[rhmm->diphone][bestbp_rc_ptr->lc];
01241                 hmm_enter(&rhmm->hmm, newscore,
01242                           bestbp_rc_ptr->path, nf);
01243                 if (hmm_is_mpx(&rhmm->hmm)) {
01244                     rhmm->hmm.s.mpx_ssid[0] = ssid;
01245                 }
01246             }
01247         }
01248     }
01249 
01250     /*
01251      * Single phone words; no right context for these.  Cannot use bestbp_rc as
01252      * LM scores have to be included.  First find best transition to these words.
01253      */
01254     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01255         w = ngs->single_phone_wid[i];
01256         ngs->last_ltrans[w].dscr = (int32) 0x80000000;
01257     }
01258     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01259         bpe = &(ngs->bp_table[bp]);
01260         if (!bpe->valid)
01261             continue;
01262 
01263         rcpermtab =
01264             (bpe->r_diph >=
01265              0) ? ps_search_dict(ngs)->rcFwdPermTable[bpe->r_diph] : ngs->zeroPermTab;
01266         rcss = ngs->bscore_stack + bpe->s_idx;
01267 
01268         for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01269             int32 n_used;
01270             w = ngs->single_phone_wid[i];
01271             de = ps_search_dict(ngs)->dict_list[w];
01272 
01273             newscore = rcss[rcpermtab[de->ci_phone_ids[0]]];
01274             newscore += ngram_tg_score(ngs->lmset, de->wid, bpe->real_wid,
01275                                        bpe->prev_real_wid, &n_used);
01276 
01277             if (ngs->last_ltrans[w].dscr < newscore) {
01278                 ngs->last_ltrans[w].dscr = newscore;
01279                 ngs->last_ltrans[w].bp = bp;
01280             }
01281         }
01282     }
01283 
01284     /* Now transition to in-LM single phone words */
01285     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01286         w = ngs->single_phone_wid[i];
01287         rhmm = (root_chan_t *) ngs->word_chan[w];
01288         if ((newscore = ngs->last_ltrans[w].dscr + ngs->pip) > thresh) {
01289             bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
01290             pde = ps_search_dict(ngs)->dict_list[bpe->wid];
01291 
01292             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01293                 || (hmm_in_score(&rhmm->hmm) < newscore)) {
01294                 hmm_enter(&rhmm->hmm,
01295                           newscore, ngs->last_ltrans[w].bp, nf);
01296                 if (hmm_is_mpx(&rhmm->hmm)) {
01297                     rhmm->hmm.s.mpx_ssid[0] =
01298                         ps_search_dict(ngs)->lcFwdTable[rhmm->diphone][pde->ci_phone_ids[pde->len - 1]];
01299                 }
01300             }
01301         }
01302     }
01303 
01304     /* Remaining words: <sil>, noise words.  No mpx for these! */
01305     bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
01306     newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
01307     if (newscore > thresh) {
01308         w = ps_search_silence_wid(ngs);
01309         rhmm = (root_chan_t *) ngs->word_chan[w];
01310         if ((hmm_frame(&rhmm->hmm) < frame_idx)
01311             || (hmm_in_score(&rhmm->hmm) < newscore)) {
01312             hmm_enter(&rhmm->hmm,
01313                       newscore, bestbp_rc_ptr->path, nf);
01314         }
01315     }
01316     newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
01317     if (newscore > thresh) {
01318         /* FIXME FIXME: This depends on having the noise words
01319          * immediately following silence in the dictionary... */
01320         for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
01321             rhmm = (root_chan_t *) ngs->word_chan[w];
01322             /* If this was not actually a single-phone word, rhmm will be NULL. */
01323             if (rhmm == NULL)
01324                 continue;
01325             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01326                 || (hmm_in_score(&rhmm->hmm) < newscore)) {
01327                 hmm_enter(&rhmm->hmm,
01328                           newscore, bestbp_rc_ptr->path, nf);
01329             }
01330         }
01331     }
01332 }
01333 
01334 static void
01335 deactivate_channels(ngram_search_t *ngs, int frame_idx)
01336 {
01337     root_chan_t *rhmm;
01338     int i;
01339 
01340     /* Clear score[] of pruned root channels */
01341     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01342         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01343             hmm_clear_scores(&rhmm->hmm);
01344         }
01345     }
01346     /* Clear score[] of pruned single-phone channels */
01347     for (i = 0; i < ngs->n_1ph_words; i++) {
01348         int32 w = ngs->single_phone_wid[i];
01349         rhmm = (root_chan_t *) ngs->word_chan[w];
01350         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01351             hmm_clear_scores(&rhmm->hmm);
01352         }
01353     }
01354 }
01355 
01356 int
01357 ngram_fwdtree_search(ngram_search_t *ngs)
01358 {
01359     int16 const *senscr;
01360     int frame_idx, best_senid;
01361     int16 best_senscr;
01362 
01363     /* Determine if we actually have a frame to process. */
01364     if (ps_search_acmod(ngs)->n_feat_frame == 0)
01365         return 0;
01366 
01367     /* Activate our HMMs for the current frame if need be. */
01368     if (!ps_search_acmod(ngs)->compallsen)
01369         compute_sen_active(ngs, acmod_frame_idx(ps_search_acmod(ngs)));
01370 
01371     /* Compute GMM scores for the current frame. */
01372     senscr = acmod_score(ps_search_acmod(ngs), &frame_idx,
01373                          &best_senscr, &best_senid);
01374     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
01375 
01376     /* Mark backpointer table for current frame. */
01377     ngram_search_mark_bptable(ngs, frame_idx);
01378 
01379     /* Renormalize if necessary (FIXME: Make sure to test this) */
01380     if (ngs->best_score + (2 * ngs->beam) < WORST_SCORE) {
01381         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
01382                frame_idx, ngs->best_score);
01383         renormalize_scores(ngs, frame_idx, ngs->best_score);
01384     }
01385 
01386     /* Evaluate HMMs */
01387     evaluate_channels(ngs, senscr, frame_idx);
01388     /* Prune HMMs and do phone transitions. */
01389     prune_channels(ngs, frame_idx);
01390     /* Do absolute pruning on word exits. */
01391     bptable_maxwpf(ngs, frame_idx);
01392     /* Do word transitions. */
01393     word_transition(ngs, frame_idx);
01394     /* Deactivate pruned HMMs. */
01395     deactivate_channels(ngs, frame_idx);
01396 
01397     ++ngs->n_frame;
01398     /* Return the number of frames processed. */
01399     return 1;
01400 }
01401 
01402 void
01403 ngram_fwdtree_finish(ngram_search_t *ngs)
01404 {
01405     int32 i, w, cf, *awl;
01406     root_chan_t *rhmm;
01407     chan_t *hmm, **acl;
01408 
01409     /* This is the number of frames processed. */
01410     cf = acmod_frame_idx(ps_search_acmod(ngs));
01411     /* Add a mark in the backpointer table for one past the final frame. */
01412     ngram_search_mark_bptable(ngs, cf);
01413 
01414     /* Deactivate channels lined up for the next frame */
01415     /* First, root channels of HMM tree */
01416     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01417         hmm_clear(&rhmm->hmm);
01418     }
01419 
01420     /* nonroot channels of HMM tree */
01421     i = ngs->n_active_chan[cf & 0x1];
01422     acl = ngs->active_chan_list[cf & 0x1];
01423     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
01424         hmm_clear(&hmm->hmm);
01425     }
01426 
01427     /* word channels */
01428     i = ngs->n_active_word[cf & 0x1];
01429     awl = ngs->active_word_list[cf & 0x1];
01430     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
01431         /* Don't accidentally free single-phone words! */
01432         if (ps_search_dict(ngs)->dict_list[w]->len == 1)
01433             continue;
01434         bitvec_clear(ngs->word_active, w);
01435         if (ngs->word_chan[w] == NULL)
01436             continue;
01437         ngram_search_free_all_rc(ngs, w);
01438     }
01439 
01440     /*
01441      * The previous search code did a postprocessing of the
01442      * backpointer table here, but we will postpone this until it is
01443      * absolutely necessary, i.e. when generating a word graph.
01444      * Likewise we don't actually have to decide what the exit word is
01445      * until somebody requests a backtrace.
01446      */
01447 
01448     /* Print out some statistics. */
01449     if (cf > 0) {
01450         E_INFO("%8d words recognized (%d/fr)\n",
01451                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
01452         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
01453                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
01454         E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
01455                ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
01456                (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
01457                ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
01458         E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
01459                ngs->st.n_word_lastchan_eval,
01460                ngs->st.n_word_lastchan_eval / (cf + 1));
01461         E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
01462                ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
01463     }
01464 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1