PocketSphinx  0.6
src/libpocketsphinx/ngram_search.c
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <sphinxbase/ckd_alloc.h>
00048 #include <sphinxbase/listelem_alloc.h>
00049 #include <sphinxbase/err.h>
00050 
00051 /* Local headers. */
00052 #include "pocketsphinx_internal.h"
00053 #include "ps_lattice_internal.h"
00054 #include "ngram_search.h"
00055 #include "ngram_search_fwdtree.h"
00056 #include "ngram_search_fwdflat.h"
00057 
00058 static int ngram_search_start(ps_search_t *search);
00059 static int ngram_search_step(ps_search_t *search, int frame_idx);
00060 static int ngram_search_finish(ps_search_t *search);
00061 static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
00062 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score);
00063 static int32 ngram_search_prob(ps_search_t *search);
00064 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
00065 
00066 static ps_searchfuncs_t ngram_funcs = {
00067     /* name: */   "ngram",
00068     /* start: */  ngram_search_start,
00069     /* step: */   ngram_search_step,
00070     /* finish: */ ngram_search_finish,
00071     /* reinit: */ ngram_search_reinit,
00072     /* free: */   ngram_search_free,
00073     /* lattice: */  ngram_search_lattice,
00074     /* hyp: */      ngram_search_hyp,
00075     /* prob: */     ngram_search_prob,
00076     /* seg_iter: */ ngram_search_seg_iter,
00077 };
00078 
00079 static void
00080 ngram_search_update_widmap(ngram_search_t *ngs)
00081 {
00082     const char **words;
00083     int32 i, n_words;
00084 
00085     /* It's okay to include fillers since they won't be in the LM */
00086     n_words = ps_search_n_words(ngs);
00087     words = ckd_calloc(n_words, sizeof(*words));
00088     /* This will include alternates, again, that's okay since they aren't in the LM */
00089     for (i = 0; i < n_words; ++i)
00090         words[i] = (const char *)dict_wordstr(ps_search_dict(ngs), i);
00091     ngram_model_set_map_words(ngs->lmset, words, n_words);
00092     ckd_free(words);
00093 }
00094 
00095 static void
00096 ngram_search_calc_beams(ngram_search_t *ngs)
00097 {
00098     cmd_ln_t *config;
00099     acmod_t *acmod;
00100 
00101     config = ps_search_config(ngs);
00102     acmod = ps_search_acmod(ngs);
00103 
00104     /* Log beam widths. */
00105     ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
00106     ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
00107     ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
00108     ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
00109     ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
00110     ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
00111     ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
00112 
00113     /* Absolute pruning parameters. */
00114     ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
00115     ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
00116 
00117     /* Various penalties which may or may not be useful. */
00118     ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
00119     ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
00120     ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
00121     ngs->silpen = ngs->pip
00122         + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
00123     ngs->fillpen = ngs->pip
00124         + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
00125 
00126     /* Language weight ratios for fwdflat and bestpath search. */
00127     ngs->fwdflat_fwdtree_lw_ratio =
00128         cmd_ln_float32_r(config, "-fwdflatlw")
00129         / cmd_ln_float32_r(config, "-lw");
00130     ngs->bestpath_fwdtree_lw_ratio =
00131         cmd_ln_float32_r(config, "-bestpathlw")
00132         / cmd_ln_float32_r(config, "-lw");
00133 
00134     /* Acoustic score scale for posterior probabilities. */
00135     ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
00136 }
00137 
00138 ps_search_t *
00139 ngram_search_init(cmd_ln_t *config,
00140                   acmod_t *acmod,
00141                   dict_t *dict,
00142                   dict2pid_t *d2p)
00143 {
00144     ngram_search_t *ngs;
00145     const char *path;
00146 
00147     ngs = ckd_calloc(1, sizeof(*ngs));
00148     ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
00149     ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00150                                    acmod->tmat->tp, NULL, acmod->mdef->sseq);
00151     if (ngs->hmmctx == NULL) {
00152         ps_search_free(ps_search_base(ngs));
00153         return NULL;
00154     }
00155     ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
00156     ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
00157     ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
00158 
00159     /* Calculate various beam widths and such. */
00160     ngram_search_calc_beams(ngs);
00161 
00162     /* Allocate a billion different tables for stuff. */
00163     ngs->word_chan = ckd_calloc(dict_size(dict),
00164                                 sizeof(*ngs->word_chan));
00165     ngs->word_lat_idx = ckd_calloc(dict_size(dict),
00166                                    sizeof(*ngs->word_lat_idx));
00167     ngs->word_active = bitvec_alloc(dict_size(dict));
00168     ngs->last_ltrans = ckd_calloc(dict_size(dict),
00169                                   sizeof(*ngs->last_ltrans));
00170 
00171     /* FIXME: All these structures need to be made dynamic with
00172      * garbage collection. */
00173     ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
00174     ngs->bp_table = ckd_calloc(ngs->bp_table_size,
00175                                sizeof(*ngs->bp_table));
00176     /* FIXME: This thing is frickin' huge. */
00177     ngs->bscore_stack_size = ngs->bp_table_size * 20;
00178     ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
00179                                    sizeof(*ngs->bscore_stack));
00180     ngs->n_frame_alloc = 256;
00181     ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
00182                                    sizeof(*ngs->bp_table_idx));
00183     ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
00184 
00185     /* Allocate active word list array */
00186     ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
00187                                           sizeof(**ngs->active_word_list));
00188 
00189     /* Load language model(s) */
00190     if ((path = cmd_ln_str_r(config, "-lmctl"))) {
00191         ngs->lmset = ngram_model_set_read(config, path, acmod->lmath);
00192         if (ngs->lmset == NULL) {
00193             E_ERROR("Failed to read language model control file: %s\n",
00194                     path);
00195             goto error_out;
00196         }
00197         /* Set the default language model if needed. */
00198         if ((path = cmd_ln_str_r(config, "-lmname"))) {
00199             ngram_model_set_select(ngs->lmset, path);
00200         }
00201     }
00202     else if ((path = cmd_ln_str_r(config, "-lm"))) {
00203         static const char *name = "default";
00204         ngram_model_t *lm;
00205 
00206         lm = ngram_model_read(config, path, NGRAM_AUTO, acmod->lmath);
00207         if (lm == NULL) {
00208             E_ERROR("Failed to read language model file: %s\n", path);
00209             goto error_out;
00210         }
00211         ngs->lmset = ngram_model_set_init(config,
00212                                           &lm, (char **)&name,
00213                                           NULL, 1);
00214         if (ngs->lmset == NULL) {
00215             E_ERROR("Failed to initialize language model set\n");
00216             goto error_out;
00217         }
00218     }
00219     if (ngs->lmset != NULL
00220         && ngram_wid(ngs->lmset, S3_FINISH_WORD) == ngram_unknown_wid(ngs->lmset)) {
00221         E_ERROR("Language model/set does not contain </s>, recognition will fail\n");
00222         goto error_out;
00223     }
00224 
00225     /* Create word mappings. */
00226     ngram_search_update_widmap(ngs);
00227 
00228     /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
00229     if (cmd_ln_boolean_r(config, "-fwdtree")) {
00230         ngram_fwdtree_init(ngs);
00231         ngs->fwdtree = TRUE;
00232         ngs->fwdtree_perf.name = "fwdtree";
00233         ptmr_init(&ngs->fwdtree_perf);
00234     }
00235     if (cmd_ln_boolean_r(config, "-fwdflat")) {
00236         ngram_fwdflat_init(ngs);
00237         ngs->fwdflat = TRUE;
00238         ngs->fwdflat_perf.name = "fwdflat";
00239         ptmr_init(&ngs->fwdflat_perf);
00240     }
00241     if (cmd_ln_boolean_r(config, "-bestpath")) {
00242         ngs->bestpath = TRUE;
00243         ngs->bestpath_perf.name = "bestpath";
00244         ptmr_init(&ngs->bestpath_perf);
00245     }
00246 
00247     return (ps_search_t *)ngs;
00248 
00249 error_out:
00250     ngram_search_free((ps_search_t *)ngs);
00251     return NULL;
00252 }
00253 
00254 static int
00255 ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
00256 {
00257     ngram_search_t *ngs = (ngram_search_t *)search;
00258     int old_n_words;
00259     int rv = 0;
00260 
00261     /* Update the number of words. */
00262     old_n_words = search->n_words;
00263     if (old_n_words != dict_size(dict)) {
00264         search->n_words = dict_size(dict);
00265         /* Reallocate these temporary arrays. */
00266         ckd_free(ngs->word_lat_idx);
00267         ckd_free(ngs->word_active);
00268         ckd_free(ngs->last_ltrans);
00269         ckd_free_2d(ngs->active_word_list);
00270         ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
00271         ngs->word_active = bitvec_alloc(search->n_words);
00272         ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
00273         ngs->active_word_list
00274             = ckd_calloc_2d(2, search->n_words,
00275                             sizeof(**ngs->active_word_list));
00276     }
00277 
00278     /* Free old dict2pid, dict */
00279     ps_search_base_reinit(search, dict, d2p);
00280     
00281     if (ngs->lmset == NULL)
00282         return;
00283 
00284     /* Update beam widths. */
00285     ngram_search_calc_beams(ngs);
00286 
00287     /* Update word mappings. */
00288     ngram_search_update_widmap(ngs);
00289 
00290     /* Now rebuild lextrees. */
00291     if (ngs->fwdtree) {
00292         if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
00293             return rv;
00294     }
00295     if (ngs->fwdflat) {
00296         if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
00297             return rv;
00298     }
00299 
00300     return rv;
00301 }
00302 
00303 void
00304 ngram_search_free(ps_search_t *search)
00305 {
00306     ngram_search_t *ngs = (ngram_search_t *)search;
00307 
00308     ps_search_deinit(search);
00309     if (ngs->fwdtree)
00310         ngram_fwdtree_deinit(ngs);
00311     if (ngs->fwdflat)
00312         ngram_fwdflat_deinit(ngs);
00313     if (ngs->bestpath) {
00314         double n_speech = (double)ngs->n_tot_frame
00315             / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
00316 
00317         E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
00318                ngs->bestpath_perf.t_tot_cpu,
00319                ngs->bestpath_perf.t_tot_cpu / n_speech);
00320         E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
00321                ngs->bestpath_perf.t_tot_elapsed,
00322                ngs->bestpath_perf.t_tot_elapsed / n_speech);
00323     }
00324 
00325     hmm_context_free(ngs->hmmctx);
00326     listelem_alloc_free(ngs->chan_alloc);
00327     listelem_alloc_free(ngs->root_chan_alloc);
00328     listelem_alloc_free(ngs->latnode_alloc);
00329     ngram_model_free(ngs->lmset);
00330 
00331     ckd_free(ngs->word_chan);
00332     ckd_free(ngs->word_lat_idx);
00333     bitvec_free(ngs->word_active);
00334     ckd_free(ngs->bp_table);
00335     ckd_free(ngs->bscore_stack);
00336     if (ngs->bp_table_idx != NULL)
00337         ckd_free(ngs->bp_table_idx - 1);
00338     ckd_free_2d(ngs->active_word_list);
00339     ckd_free(ngs->last_ltrans);
00340     ckd_free(ngs);
00341 }
00342 
00343 int
00344 ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
00345 {
00346     if (frame_idx >= ngs->n_frame_alloc) {
00347         ngs->n_frame_alloc *= 2;
00348         ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
00349                                         (ngs->n_frame_alloc + 1)
00350                                         * sizeof(*ngs->bp_table_idx));
00351         if (ngs->frm_wordlist) {
00352             ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
00353                                             ngs->n_frame_alloc
00354                                             * sizeof(*ngs->frm_wordlist));
00355         }
00356         ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
00357     }
00358     ngs->bp_table_idx[frame_idx] = ngs->bpidx;
00359     return ngs->bpidx;
00360 }
00361 
00362 static void
00363 set_real_wid(ngram_search_t *ngs, int32 bp)
00364 {
00365     bptbl_t *ent, *prev;
00366 
00367     assert(bp != NO_BP);
00368     ent = ngs->bp_table + bp;
00369     if (ent->bp == NO_BP)
00370         prev = NULL;
00371     else
00372         prev = ngs->bp_table + ent->bp;
00373 
00374     /* Propagate lm state for fillers, rotate it for words. */
00375     if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
00376         if (prev != NULL) {
00377             ent->real_wid = prev->real_wid;
00378             ent->prev_real_wid = prev->prev_real_wid;
00379         }
00380         else {
00381             ent->real_wid = dict_basewid(ps_search_dict(ngs),
00382                                          ent->wid);
00383             ent->prev_real_wid = BAD_S3WID;
00384         }
00385     }
00386     else {
00387         ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
00388         if (prev != NULL)
00389             ent->prev_real_wid = prev->real_wid;
00390         else
00391             ent->prev_real_wid = BAD_S3WID;
00392     }
00393 }
00394 
00395 void
00396 ngram_search_save_bp(ngram_search_t *ngs, int frame_idx,
00397                      int32 w, int32 score, int32 path, int32 rc)
00398 {
00399     int32 bp;
00400 
00401     /* Look for an existing exit for this word in this frame.  The
00402      * only reason one would exist is from a different right context
00403      * triphone, but of course that happens quite frequently. */
00404     bp = ngs->word_lat_idx[w];
00405     if (bp != NO_BP) {
00406         /* Keep only the best scoring one, we will reconstruct the
00407          * others from the right context scores - usually the history
00408          * is not lost. */
00409         if (ngs->bp_table[bp].score WORSE_THAN score) {
00410             assert(path != bp); /* Pathological. */
00411             if (ngs->bp_table[bp].bp != path) {
00412                 int32 bplh[2], newlh[2];
00413                 /* But, sometimes, the history *is* lost.  If we wanted to
00414                  * do exact language model scoring we'd have to preserve
00415                  * these alternate histories. */
00416                 E_DEBUG(2,("Updating path history %d => %d frame %d\n",
00417                            ngs->bp_table[bp].bp, path, frame_idx));
00418                 bplh[0] = ngs->bp_table[bp].bp == -1
00419                     ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
00420                 bplh[1] = ngs->bp_table[bp].bp == -1
00421                     ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
00422                 newlh[0] = path == -1
00423                     ? -1 : ngs->bp_table[path].prev_real_wid;
00424                 newlh[1] = path == -1
00425                     ? -1 : ngs->bp_table[path].real_wid;
00426                 /* Actually it's worth checking how often the actual
00427                  * language model state changes. */
00428                 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
00429                     /* It's fairly rare that the actual language model
00430                      * state changes, but it does happen some
00431                      * times. */
00432                     E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
00433                                 dict_wordstr(ps_search_dict(ngs), bplh[0]),
00434                                 dict_wordstr(ps_search_dict(ngs), bplh[1]),
00435                                 dict_wordstr(ps_search_dict(ngs), newlh[0]),
00436                                 dict_wordstr(ps_search_dict(ngs), newlh[1]),
00437                                 frame_idx));
00438                     set_real_wid(ngs, bp);
00439                 }
00440                 ngs->bp_table[bp].bp = path;
00441             }
00442             ngs->bp_table[bp].score = score;
00443         }
00444         /* But do keep track of scores for all right contexts, since
00445          * we need them to determine the starting path scores for any
00446          * successors of this word exit. */
00447         if (ngs->bp_table[bp].s_idx != -1)
00448             ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
00449     }
00450     else {
00451         int32 i, rcsize;
00452         bptbl_t *be;
00453 
00454         /* This might happen if recognition fails. */
00455         if (ngs->bpidx == NO_BP) {
00456             E_ERROR("No entries in backpointer table!");
00457             return;
00458         }
00459 
00460         /* Expand the backpointer tables if necessary. */
00461         if (ngs->bpidx >= ngs->bp_table_size) {
00462             ngs->bp_table_size *= 2;
00463             ngs->bp_table = ckd_realloc(ngs->bp_table,
00464                                         ngs->bp_table_size
00465                                         * sizeof(*ngs->bp_table));
00466             E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
00467         }
00468         if (ngs->bss_head >= ngs->bscore_stack_size
00469             - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
00470             ngs->bscore_stack_size *= 2;
00471             ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
00472                                             ngs->bscore_stack_size
00473                                             * sizeof(*ngs->bscore_stack));
00474             E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
00475         }
00476 
00477         ngs->word_lat_idx[w] = ngs->bpidx;
00478         be = &(ngs->bp_table[ngs->bpidx]);
00479         be->wid = w;
00480         be->frame = frame_idx;
00481         be->bp = path;
00482         be->score = score;
00483         be->s_idx = ngs->bss_head;
00484         be->valid = TRUE;
00485         assert(path != ngs->bpidx);
00486 
00487         /* DICT2PID */
00488         /* Get diphone ID for final phone and number of ssids corresponding to it. */
00489         be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
00490         if (dict_is_single_phone(ps_search_dict(ngs), w)) {
00491             be->last2_phone = -1;
00492             be->s_idx = -1;
00493             rcsize = 0;
00494         }
00495         else {
00496             be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
00497             rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
00498                                     be->last_phone, be->last2_phone)->n_ssid;
00499         }
00500         /* Allocate some space on the bscore_stack for all of these triphones. */
00501         for (i = 0; i < rcsize; ++i)
00502             ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
00503         if (rcsize)
00504             ngs->bscore_stack[ngs->bss_head + rc] = score;
00505         set_real_wid(ngs, ngs->bpidx);
00506 
00507         ngs->bpidx++;
00508         ngs->bss_head += rcsize;
00509     }
00510 }
00511 
00512 int
00513 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
00514 {
00515     /* End of backpointers for this frame. */
00516     int end_bpidx;
00517     int best_exit, bp;
00518     int32 best_score;
00519 
00520     /* No hypothesis means no exit node! */
00521     if (ngs->n_frame == 0)
00522         return NO_BP;
00523 
00524     if (frame_idx == -1 || frame_idx >= ngs->n_frame)
00525         frame_idx = ngs->n_frame - 1;
00526     end_bpidx = ngs->bp_table_idx[frame_idx];
00527 
00528     best_score = WORST_SCORE;
00529     best_exit = NO_BP;
00530 
00531     /* Scan back to find a frame with some backpointers in it. */
00532     while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
00533         --frame_idx;
00534     /* This is NOT an error, it just means there is no hypothesis yet. */
00535     if (frame_idx < 0)
00536         return NO_BP;
00537 
00538     /* Now find the entry for </s> OR the best scoring entry. */
00539     assert(end_bpidx < ngs->bp_table_size);
00540     for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
00541         if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
00542             || ngs->bp_table[bp].score BETTER_THAN best_score) {
00543             best_score = ngs->bp_table[bp].score;
00544             best_exit = bp;
00545         }
00546         if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
00547             break;
00548     }
00549 
00550     if (out_best_score) *out_best_score = best_score;
00551     return best_exit;
00552 }
00553 
00554 char const *
00555 ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
00556 {
00557     ps_search_t *base = ps_search_base(ngs);
00558     char *c;
00559     size_t len;
00560     int bp;
00561 
00562     if (bpidx == NO_BP)
00563         return NULL;
00564 
00565     bp = bpidx;
00566     len = 0;
00567     while (bp != NO_BP) {
00568         bptbl_t *be = &ngs->bp_table[bp];
00569         bp = be->bp;
00570         if (dict_real_word(ps_search_dict(ngs), be->wid))
00571             len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
00572     }
00573 
00574     ckd_free(base->hyp_str);
00575     if (len == 0) {
00576         base->hyp_str = NULL;
00577         return base->hyp_str;
00578     }
00579     base->hyp_str = ckd_calloc(1, len);
00580 
00581     bp = bpidx;
00582     c = base->hyp_str + len - 1;
00583     while (bp != NO_BP) {
00584         bptbl_t *be = &ngs->bp_table[bp];
00585         size_t len;
00586 
00587         bp = be->bp;
00588         if (dict_real_word(ps_search_dict(ngs), be->wid)) {
00589             len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
00590             c -= len;
00591             memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
00592             if (c > base->hyp_str) {
00593                 --c;
00594                 *c = ' ';
00595             }
00596         }
00597     }
00598 
00599     return base->hyp_str;
00600 }
00601 
00602 void
00603 ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
00604 {
00605     chan_t *hmm, *thmm;
00606     xwdssid_t *rssid;
00607     int32 i, tmatid, ciphone;
00608 
00609     /* DICT2PID */
00610     /* Get pointer to array of triphones for final diphone. */
00611     assert(!dict_is_single_phone(ps_search_dict(ngs), w));
00612     ciphone = dict_last_phone(ps_search_dict(ngs),w);
00613     rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
00614                            ciphone,
00615                            dict_second_last_phone(ps_search_dict(ngs),w));
00616     tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
00617     hmm = ngs->word_chan[w];
00618     if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
00619         hmm = listelem_malloc(ngs->chan_alloc);
00620         hmm->next = ngs->word_chan[w];
00621         ngs->word_chan[w] = hmm;
00622 
00623         hmm->info.rc_id = 0;
00624         hmm->ciphone = ciphone;
00625         hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
00626         E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
00627                    rssid->ssid[0], hmm->ciphone,
00628                    dict_second_last_phone(ps_search_dict(ngs),w),
00629                    dict_wordstr(ps_search_dict(ngs),w)));
00630     }
00631     for (i = 1; i < rssid->n_ssid; ++i) {
00632         if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
00633             thmm = listelem_malloc(ngs->chan_alloc);
00634             thmm->next = hmm->next;
00635             hmm->next = thmm;
00636             hmm = thmm;
00637 
00638             hmm->info.rc_id = i;
00639             hmm->ciphone = ciphone;
00640             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
00641             E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
00642                        i, rssid->ssid[i], hmm->ciphone,
00643                        dict_second_last_phone(ps_search_dict(ngs),w),
00644                        dict_wordstr(ps_search_dict(ngs),w)));
00645         }
00646         else
00647             hmm = hmm->next;
00648     }
00649 }
00650 
00651 void
00652 ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
00653 {
00654     chan_t *hmm, *thmm;
00655 
00656     for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
00657         thmm = hmm->next;
00658         hmm_deinit(&hmm->hmm);
00659         listelem_free(ngs->chan_alloc, hmm);
00660     }
00661     ngs->word_chan[w] = NULL;
00662 }
00663 
00664 int32
00665 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
00666 {
00667     /* DICT2PID */
00668     /* Get the mapping from right context phone ID to index in the
00669      * right context table and the bscore_stack. */
00670     if (pbe->last2_phone == -1) {
00671         /* No right context for single phone predecessor words. */
00672         return pbe->score;
00673     }
00674     else {
00675         xwdssid_t *rssid;
00676         /* Find the index for the last diphone of the previous word +
00677          * the first phone of the current word. */
00678         rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
00679                                pbe->last_phone, pbe->last2_phone);
00680         /* This may be WORST_SCORE, which means that there was no exit
00681          * with rcphone as right context. */
00682         return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
00683     }
00684 }
00685 
00686 /*
00687  * Compute acoustic and LM scores for a BPTable entry (segment).
00688  */
00689 void
00690 ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
00691                         int32 *out_ascr, int32 *out_lscr)
00692 {
00693     bptbl_t *pbe;
00694     int32 start_score;
00695 
00696     /* Start of utterance. */
00697     if (be->bp == NO_BP) {
00698         *out_ascr = be->score;
00699         *out_lscr = 0;
00700         return;
00701     }
00702 
00703     /* Otherwise, calculate lscr and ascr. */
00704     pbe = ngs->bp_table + be->bp;
00705     start_score = ngram_search_exit_score(ngs, pbe,
00706                                  dict_first_phone(ps_search_dict(ngs),be->wid));
00707     assert(start_score BETTER_THAN WORST_SCORE);
00708 
00709     /* FIXME: These result in positive acoustic scores when filler
00710        words have non-filler pronunciations.  That whole business
00711        is still pretty much broken but at least it doesn't
00712        segfault. */
00713     if (be->wid == ps_search_silence_wid(ngs)) {
00714         *out_lscr = ngs->silpen;
00715     }
00716     else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
00717         *out_lscr = ngs->fillpen;
00718     }
00719     else {
00720         int32 n_used;
00721         *out_lscr = ngram_tg_score(ngs->lmset,
00722                                    be->real_wid,
00723                                    pbe->real_wid,
00724                                    pbe->prev_real_wid,
00725                                    &n_used)>>SENSCR_SHIFT;
00726         *out_lscr = *out_lscr * lwf;
00727     }
00728     *out_ascr = be->score - start_score - *out_lscr;
00729 }
00730 
00731 static int
00732 ngram_search_start(ps_search_t *search)
00733 {
00734     ngram_search_t *ngs = (ngram_search_t *)search;
00735 
00736     ngs->done = FALSE;
00737     ngram_model_flush(ngs->lmset);
00738     if (ngs->fwdtree)
00739         ngram_fwdtree_start(ngs);
00740     else if (ngs->fwdflat)
00741         ngram_fwdflat_start(ngs);
00742     else
00743         return -1;
00744     return 0;
00745 }
00746 
00747 static int
00748 ngram_search_step(ps_search_t *search, int frame_idx)
00749 {
00750     ngram_search_t *ngs = (ngram_search_t *)search;
00751 
00752     if (ngs->fwdtree)
00753         return ngram_fwdtree_search(ngs, frame_idx);
00754     else if (ngs->fwdflat)
00755         return ngram_fwdflat_search(ngs, frame_idx);
00756     else
00757         return -1;
00758 }
00759 
00760 void
00761 dump_bptable(ngram_search_t *ngs)
00762 {
00763     int i;
00764     E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
00765     for (i = 0; i < ngs->bpidx; ++i) {
00766         bptbl_t *bpe = ngs->bp_table + i;
00767         int j, rcsize;
00768 
00769         E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
00770                     i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
00771                     (bpe->bp == -1
00772                      ? 0 : ngs->bp_table[bpe->bp].frame + 1),
00773                     bpe->frame, bpe->score, bpe->bp,
00774                     bpe->real_wid, bpe->prev_real_wid);
00775 
00776         if (bpe->last2_phone == -1)
00777             rcsize = 0;
00778         else
00779             rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
00780                                     bpe->last_phone, bpe->last2_phone)->n_ssid;
00781         if (rcsize) {
00782             E_INFOCONT("\tbss");
00783             for (j = 0; j < rcsize; ++j)
00784                 if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
00785                     E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
00786         }
00787         E_INFOCONT("\n");
00788     }
00789 }
00790 
00791 static int
00792 ngram_search_finish(ps_search_t *search)
00793 {
00794     ngram_search_t *ngs = (ngram_search_t *)search;
00795 
00796     ngs->n_tot_frame += ngs->n_frame;
00797     if (ngs->fwdtree) {
00798         ngram_fwdtree_finish(ngs);
00799         /* dump_bptable(ngs); */
00800 
00801         /* Now do fwdflat search in its entirety, if requested. */
00802         if (ngs->fwdflat) {
00803             int i;
00804             /* Rewind the acoustic model. */
00805             if (acmod_rewind(ps_search_acmod(ngs)) < 0)
00806                 return -1;
00807             /* Now redo search. */
00808             ngram_fwdflat_start(ngs);
00809             i = 0;
00810             while (ps_search_acmod(ngs)->n_feat_frame > 0) {
00811                 int nfr;
00812                 if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
00813                     return nfr;
00814                 acmod_advance(ps_search_acmod(ngs));
00815                 ++i;
00816             }
00817             ngram_fwdflat_finish(ngs);
00818             /* And now, we should have a result... */
00819             /* dump_bptable(ngs); */
00820         }
00821     }
00822     else if (ngs->fwdflat) {
00823         ngram_fwdflat_finish(ngs);
00824     }
00825 
00826     /* Mark the current utterance as done. */
00827     ngs->done = TRUE;
00828     return 0;
00829 }
00830 
00831 static ps_latlink_t *
00832 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
00833 {
00834     ngram_search_t *ngs = (ngram_search_t *)search;
00835 
00836     if (search->last_link == NULL) {
00837         search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
00838                                                 ngs->bestpath_fwdtree_lw_ratio,
00839                                                 ngs->ascale);
00840         if (search->last_link == NULL)
00841             return NULL;
00842         /* Also calculate betas so we can fill in the posterior
00843          * probability field in the segmentation. */
00844         if (search->post == 0)
00845             search->post = ps_lattice_posterior(search->dag, ngs->lmset,
00846                                                 ngs->ascale);
00847     }
00848     if (out_score)
00849         *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
00850     return search->last_link;
00851 }
00852 
00853 static char const *
00854 ngram_search_hyp(ps_search_t *search, int32 *out_score)
00855 {
00856     ngram_search_t *ngs = (ngram_search_t *)search;
00857 
00858     /* Only do bestpath search if the utterance is complete. */
00859     if (ngs->bestpath && ngs->done) {
00860         ps_lattice_t *dag;
00861         ps_latlink_t *link;
00862         char const *hyp;
00863         double n_speech;
00864 
00865         ptmr_reset(&ngs->bestpath_perf);
00866         ptmr_start(&ngs->bestpath_perf);
00867         if ((dag = ngram_search_lattice(search)) == NULL)
00868             return NULL;
00869         if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
00870             return NULL;
00871         hyp = ps_lattice_hyp(dag, link);
00872         ptmr_stop(&ngs->bestpath_perf);
00873         n_speech = (double)dag->n_frames
00874             / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
00875         E_INFO("bestpath %.2f CPU %.3f xRT\n",
00876                ngs->bestpath_perf.t_cpu,
00877                ngs->bestpath_perf.t_cpu / n_speech);
00878         E_INFO("bestpath %.2f wall %.3f xRT\n",
00879                ngs->bestpath_perf.t_elapsed,
00880                ngs->bestpath_perf.t_elapsed / n_speech);
00881         return hyp;
00882     }
00883     else {
00884         int32 bpidx;
00885 
00886         /* fwdtree and fwdflat use same backpointer table. */
00887         bpidx = ngram_search_find_exit(ngs, -1, out_score);
00888         if (bpidx != NO_BP)
00889             return ngram_search_bp_hyp(ngs, bpidx);
00890     }
00891 
00892     return NULL;
00893 }
00894 
00895 static void
00896 ngram_search_bp2itor(ps_seg_t *seg, int bp)
00897 {
00898     ngram_search_t *ngs = (ngram_search_t *)seg->search;
00899     bptbl_t *be, *pbe;
00900 
00901     be = &ngs->bp_table[bp];
00902     pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
00903     seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
00904     seg->ef = be->frame;
00905     seg->sf = pbe ? pbe->frame + 1 : 0;
00906     seg->prob = 0; /* Bogus value... */
00907     /* Compute acoustic and LM scores for this segment. */
00908     if (pbe == NULL) {
00909         seg->ascr = be->score;
00910         seg->lscr = 0;
00911         seg->lback = 0;
00912     }
00913     else {
00914         int32 start_score;
00915 
00916         /* Find ending path score of previous word. */
00917         start_score = ngram_search_exit_score(ngs, pbe,
00918                                      dict_first_phone(ps_search_dict(ngs), be->wid));
00919         assert(start_score BETTER_THAN WORST_SCORE);
00920         if (be->wid == ps_search_silence_wid(ngs)) {
00921             seg->lscr = ngs->silpen;
00922         }
00923         else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
00924             seg->lscr = ngs->fillpen;
00925         }
00926         else {
00927             seg->lscr = ngram_tg_score(ngs->lmset,
00928                                        be->real_wid,
00929                                        pbe->real_wid,
00930                                        pbe->prev_real_wid,
00931                                        &seg->lback)>>SENSCR_SHIFT;
00932             seg->lscr = (int32)(seg->lscr * seg->lwf);
00933         }
00934         seg->ascr = be->score - start_score - seg->lscr;
00935     }
00936 }
00937 
00938 static void
00939 ngram_bp_seg_free(ps_seg_t *seg)
00940 {
00941     bptbl_seg_t *itor = (bptbl_seg_t *)seg;
00942     
00943     ckd_free(itor->bpidx);
00944     ckd_free(itor);
00945 }
00946 
00947 static ps_seg_t *
00948 ngram_bp_seg_next(ps_seg_t *seg)
00949 {
00950     bptbl_seg_t *itor = (bptbl_seg_t *)seg;
00951 
00952     if (++itor->cur == itor->n_bpidx) {
00953         ngram_bp_seg_free(seg);
00954         return NULL;
00955     }
00956 
00957     ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
00958     return seg;
00959 }
00960 
00961 static ps_segfuncs_t ngram_bp_segfuncs = {
00962     /* seg_next */ ngram_bp_seg_next,
00963     /* seg_free */ ngram_bp_seg_free
00964 };
00965 
00966 static ps_seg_t *
00967 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
00968 {
00969     bptbl_seg_t *itor;
00970     int bp, cur;
00971 
00972     /* Calling this an "iterator" is a bit of a misnomer since we have
00973      * to get the entire backtrace in order to produce it.  On the
00974      * other hand, all we actually need is the bptbl IDs, and we can
00975      * allocate a fixed-size array of them. */
00976     itor = ckd_calloc(1, sizeof(*itor));
00977     itor->base.vt = &ngram_bp_segfuncs;
00978     itor->base.search = ps_search_base(ngs);
00979     itor->base.lwf = lwf;
00980     itor->n_bpidx = 0;
00981     bp = bpidx;
00982     while (bp != NO_BP) {
00983         bptbl_t *be = &ngs->bp_table[bp];
00984         bp = be->bp;
00985         ++itor->n_bpidx;
00986     }
00987     if (itor->n_bpidx == 0) {
00988         ckd_free(itor);
00989         return NULL;
00990     }
00991     itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
00992     cur = itor->n_bpidx - 1;
00993     bp = bpidx;
00994     while (bp != NO_BP) {
00995         bptbl_t *be = &ngs->bp_table[bp];
00996         itor->bpidx[cur] = bp;
00997         bp = be->bp;
00998         --cur;
00999     }
01000 
01001     /* Fill in relevant fields for first element. */
01002     ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
01003 
01004     return (ps_seg_t *)itor;
01005 }
01006 
01007 static ps_seg_t *
01008 ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
01009 {
01010     ngram_search_t *ngs = (ngram_search_t *)search;
01011 
01012     /* Only do bestpath search if the utterance is done. */
01013     if (ngs->bestpath && ngs->done) {
01014         ps_lattice_t *dag;
01015         ps_latlink_t *link;
01016         double n_speech;
01017         ps_seg_t *itor;
01018 
01019         ptmr_reset(&ngs->bestpath_perf);
01020         ptmr_start(&ngs->bestpath_perf);
01021         if ((dag = ngram_search_lattice(search)) == NULL)
01022             return NULL;
01023         if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
01024             return NULL;
01025         itor = ps_lattice_seg_iter(dag, link,
01026                                    ngs->bestpath_fwdtree_lw_ratio);
01027         ptmr_stop(&ngs->bestpath_perf);
01028         n_speech = (double)dag->n_frames
01029             / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
01030         E_INFO("bestpath %.2f CPU %.3f xRT\n",
01031                ngs->bestpath_perf.t_cpu,
01032                ngs->bestpath_perf.t_cpu / n_speech);
01033         E_INFO("bestpath %.2f wall %.3f xRT\n",
01034                ngs->bestpath_perf.t_elapsed,
01035                ngs->bestpath_perf.t_elapsed / n_speech);
01036         return itor;
01037     }
01038     else {
01039         int32 bpidx;
01040 
01041         /* fwdtree and fwdflat use same backpointer table. */
01042         bpidx = ngram_search_find_exit(ngs, -1, out_score);
01043         return ngram_search_bp_iter(ngs, bpidx,
01044                                     /* but different language weights... */
01045                                     (ngs->done && ngs->fwdflat)
01046                                     ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
01047     }
01048 
01049     return NULL;
01050 }
01051 
01052 static int32
01053 ngram_search_prob(ps_search_t *search)
01054 {
01055     ngram_search_t *ngs = (ngram_search_t *)search;
01056 
01057     /* Only do bestpath search if the utterance is done. */
01058     if (ngs->bestpath && ngs->done) {
01059         ps_lattice_t *dag;
01060         ps_latlink_t *link;
01061 
01062         if ((dag = ngram_search_lattice(search)) == NULL)
01063             return 0;
01064         if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
01065             return 0;
01066         return search->post;
01067     }
01068     else {
01069         /* FIXME: Give some kind of good estimate here, eventually. */
01070         return 0;
01071     }
01072 }
01073 
01074 static void
01075 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
01076 {
01077     bptbl_t *bp_ptr;
01078     int32 i;
01079 
01080     for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
01081         int32 sf, ef, wid;
01082         ps_latnode_t *node;
01083 
01084         /* Skip invalid backpointers (these result from -maxwpf pruning) */
01085         if (!bp_ptr->valid)
01086             continue;
01087 
01088         sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
01089         ef = bp_ptr->frame;
01090         wid = bp_ptr->wid;
01091 
01092         assert(ef < dag->n_frames);
01093         /* Skip non-final </s> entries. */
01094         if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
01095             continue;
01096 
01097         /* Skip if word not in LM */
01098         if ((!dict_filler_word(ps_search_dict(ngs), wid))
01099             && (!ngram_model_set_known_wid(ngs->lmset,
01100                                            dict_basewid(ps_search_dict(ngs), wid))))
01101             continue;
01102 
01103         /* See if bptbl entry <wid,sf> already in lattice */
01104         for (node = dag->nodes; node; node = node->next) {
01105             if ((node->wid == wid) && (node->sf == sf))
01106                 break;
01107         }
01108 
01109         /* For the moment, store bptbl indices in node.{fef,lef} */
01110         if (node)
01111             node->lef = i;
01112         else {
01113             /* New node; link to head of list */
01114             node = listelem_malloc(dag->latnode_alloc);
01115             node->wid = wid;
01116             node->sf = sf; /* This is a frame index. */
01117             node->fef = node->lef = i; /* These are backpointer indices (argh) */
01118             node->reachable = FALSE;
01119             node->entries = NULL;
01120             node->exits = NULL;
01121 
01122             /* NOTE: This creates the list of nodes in reverse
01123              * topological order, i.e. a node always precedes its
01124              * antecedents in this list. */
01125             node->next = dag->nodes;
01126             dag->nodes = node;
01127             ++dag->n_nodes;
01128         }
01129     }
01130 }
01131 
01132 static ps_latnode_t *
01133 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
01134 {
01135     ps_latnode_t *node;
01136 
01137     /* Find start node <s>.0 */
01138     for (node = dag->nodes; node; node = node->next) {
01139         if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
01140             break;
01141     }
01142     if (!node) {
01143         /* This is probably impossible. */
01144         E_ERROR("Couldn't find <s> in first frame\n");
01145         return NULL;
01146     }
01147     return node;
01148 }
01149 
01150 static ps_latnode_t *
01151 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
01152 {
01153     ps_latnode_t *node;
01154     int32 ef, bestbp, bp, bestscore;
01155 
01156     /* Find final node </s>.last_frame; nothing can follow this node */
01157     for (node = dag->nodes; node; node = node->next) {
01158         int32 lef = ngs->bp_table[node->lef].frame;
01159         if ((node->wid == ps_search_finish_wid(ngs))
01160             && (lef == dag->n_frames - 1))
01161             break;
01162     }
01163     if (node != NULL)
01164         return node;
01165 
01166     /* It is quite likely that no </s> exited in the last frame.  So,
01167      * find the node corresponding to the best exit. */
01168     /* Find the last frame containing a word exit. */
01169     for (ef = dag->n_frames - 1;
01170          ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
01171          --ef);
01172     if (ef < 0) {
01173         E_ERROR("Empty backpointer table: can not build DAG.\n");
01174         return NULL;
01175     }
01176 
01177     /* Find best word exit in that frame. */
01178     bestscore = WORST_SCORE;
01179     bestbp = NO_BP;
01180     for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
01181         int32 n_used, l_scr, wid, prev_wid;
01182         wid = ngs->bp_table[bp].real_wid;
01183         prev_wid = ngs->bp_table[bp].prev_real_wid;
01184         /* Always prefer </s>, of which there will only be one per frame. */
01185         if (wid == ps_search_finish_wid(ngs)) {
01186             bestbp = bp;
01187             break;
01188         }
01189         l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
01190                                wid, prev_wid, &n_used) >>SENSCR_SHIFT;
01191         l_scr = l_scr * lwf;
01192         if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
01193             bestscore = ngs->bp_table[bp].score + l_scr;
01194             bestbp = bp;
01195         }
01196     }
01197     if (bestbp == NO_BP) {
01198         E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
01199         return NULL;
01200     }
01201     E_INFO("</s> not found in last frame, using %s.%d instead\n",
01202            dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
01203 
01204     /* Now find the node that corresponds to it. */
01205     for (node = dag->nodes; node; node = node->next) {
01206         if (node->lef == bestbp)
01207             return node;
01208     }
01209 
01210     /* FIXME: This seems to happen a lot! */
01211     E_ERROR("Failed to find DAG node corresponding to %s\n",
01212            dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
01213     return NULL;
01214 }
01215 
01216 /*
01217  * Build lattice from bptable.
01218  */
01219 ps_lattice_t *
01220 ngram_search_lattice(ps_search_t *search)
01221 {
01222     int32 i, score, ascr, lscr;
01223     ps_latnode_t *node, *from, *to;
01224     ngram_search_t *ngs;
01225     ps_lattice_t *dag;
01226     int min_endfr, nlink;
01227     float lwf;
01228 
01229     ngs = (ngram_search_t *)search;
01230     min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
01231 
01232     /* If the best score is WORST_SCORE or worse, there is no way to
01233      * make a lattice. */
01234     if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
01235         return NULL;
01236 
01237     /* Check to see if a lattice has previously been created over the
01238      * same number of frames, and reuse it if so. */
01239     if (search->dag && search->dag->n_frames == ngs->n_frame)
01240         return search->dag;
01241 
01242     /* Nope, create a new one. */
01243     ps_lattice_free(search->dag);
01244     search->dag = NULL;
01245     dag = ps_lattice_init_search(search, ngs->n_frame);
01246     /* Compute these such that they agree with the fwdtree language weight. */
01247     lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
01248     create_dag_nodes(ngs, dag);
01249     if ((dag->start = find_start_node(ngs, dag)) == NULL)
01250         goto error_out;
01251     if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
01252         goto error_out;
01253     E_INFO("lattice start node %s.%d end node %s.%d\n",
01254            dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
01255            dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
01256 
01257     ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
01258                             &dag->final_node_ascr, &lscr);
01259 
01260     /*
01261      * At this point, dag->nodes is ordered such that nodes earlier in
01262      * the list can follow (in time) those later in the list, but not
01263      * vice versa (see above - also note that adjacency is purely
01264      * determined by time which is why we can make this claim).  Now
01265      * create precedence links and simultanesously mark all nodes that
01266      * can reach dag->end.  (All nodes are reached from dag->start
01267      * simply by definition - they were created that way).
01268      *
01269      * Note that this also means that any nodes before dag->end in the
01270      * list can be discarded, meaning that dag->end will always be
01271      * equal to dag->nodes (FIXME: except when loading from a file but
01272      * we can fix that...)
01273      */
01274     i = 0;
01275     while (dag->nodes && dag->nodes != dag->end) {
01276         ps_latnode_t *next = dag->nodes->next;
01277         listelem_free(dag->latnode_alloc, dag->nodes);
01278         dag->nodes = next;
01279         ++i;
01280     }
01281     E_INFO("Eliminated %d nodes before end node\n", i);
01282     dag->end->reachable = TRUE;
01283     nlink = 0;
01284     for (to = dag->end; to; to = to->next) {
01285         int fef, lef;
01286 
01287         /* Skip if not reachable; it will never be reachable from dag->end */
01288         if (!to->reachable)
01289             continue;
01290 
01291         /* Prune nodes with too few endpoints - heuristic
01292            borrowed from Sphinx3 */
01293         fef = ngs->bp_table[to->fef].frame;
01294         lef = ngs->bp_table[to->lef].frame;
01295         if (to != dag->end && lef - fef < min_endfr) {
01296             to->reachable = FALSE;
01297             continue;
01298         }
01299 
01300         /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
01301         for (from = to->next; from; from = from->next) {
01302             bptbl_t *from_bpe;
01303 
01304             fef = ngs->bp_table[from->fef].frame;
01305             lef = ngs->bp_table[from->lef].frame;
01306 
01307             if ((to->sf <= fef) || (to->sf > lef + 1))
01308                 continue;
01309             if (lef - fef < min_endfr) {
01310                 assert(!from->reachable);
01311                 continue;
01312             }
01313 
01314             /* Find bptable entry for "from" that exactly precedes "to" */
01315             i = from->fef;
01316             from_bpe = ngs->bp_table + i;
01317             for (; i <= from->lef; i++, from_bpe++) {
01318                 if (from_bpe->wid != from->wid)
01319                     continue;
01320                 if (from_bpe->frame >= to->sf - 1)
01321                     break;
01322             }
01323 
01324             if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
01325                 continue;
01326 
01327             /* Find acoustic score from.sf->to.sf-1 with right context = to */
01328             /* This gives us from_bpe's best acoustic score. */
01329             ngram_compute_seg_score(ngs, from_bpe, lwf,
01330                                     &ascr, &lscr);
01331             /* Now find the exact path score for from->to, including
01332              * the appropriate final triphone.  In fact this might not
01333              * exist. */
01334             score = ngram_search_exit_score(ngs, from_bpe,
01335                                             dict_first_phone(ps_search_dict(ngs), to->wid));
01336             /* Does not exist.  Can't create a link here. */
01337             if (score == WORST_SCORE)
01338                 continue;
01339             /* Adjust the arc score to match the correct triphone. */
01340             else
01341                 score = ascr + (score - from_bpe->score);
01342             if (score BETTER_THAN 0) {
01343                 /* Scores must be negative, or Bad Things will happen.
01344                    In general, they are, except in corner cases
01345                    involving filler words.  We don't want to throw any
01346                    links away so we'll keep these, but with some
01347                    arbitrarily improbable but recognizable score. */
01348                 ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
01349                 ++nlink;
01350                 from->reachable = TRUE;
01351             }
01352             else if (score BETTER_THAN WORST_SCORE) {
01353                 ps_lattice_link(dag, from, to, score, from_bpe->frame);
01354                 ++nlink;
01355                 from->reachable = TRUE;
01356             }
01357         }
01358     }
01359 
01360     /* There must be at least one path between dag->start and dag->end */
01361     if (!dag->start->reachable) {
01362         E_ERROR("End node of lattice isolated; unreachable\n");
01363         goto error_out;
01364     }
01365 
01366     for (node = dag->nodes; node; node = node->next) {
01367         /* Change node->{fef,lef} from bptbl indices to frames. */
01368         node->fef = ngs->bp_table[node->fef].frame;
01369         node->lef = ngs->bp_table[node->lef].frame;
01370         /* Find base wid for nodes. */
01371         node->basewid = dict_basewid(search->dict, node->wid);
01372     }
01373 
01374     /* Link nodes with alternate pronunciations at the same timepoint. */
01375     for (node = dag->nodes; node; node = node->next) {
01376         ps_latnode_t *alt;
01377         /* Scan forward to find the next alternate, then stop. */
01378         for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
01379             if (alt->basewid == node->basewid) {
01380                 alt->alt = node->alt;
01381                 node->alt = alt;
01382                 break;
01383             }
01384         }
01385     }
01386     E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
01387 
01388     /* Minor hack: If the final node is a filler word and not </s>,
01389      * then set its base word ID to </s>, so that the language model
01390      * scores won't be screwed up. */
01391     if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
01392         dag->end->basewid = ps_search_finish_wid(ngs);
01393 
01394     /* Free nodes unreachable from dag->end and their links */
01395     ps_lattice_delete_unreachable(dag);
01396 
01397     /* Build links around silence and filler words, since they do not
01398      * exist in the language model. */
01399     ps_lattice_bypass_fillers(dag, ngs->silpen, ngs->fillpen);
01400 
01401     search->dag = dag;
01402     return dag;
01403 
01404 error_out:
01405     ps_lattice_free(dag);
01406     return NULL;
01407 }