• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ngram_search.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 
00050 /* Local headers. */
00051 #include "pocketsphinx_internal.h"
00052 #include "ps_lattice_internal.h"
00053 #include "ngram_search.h"
00054 #include "ngram_search_fwdtree.h"
00055 #include "ngram_search_fwdflat.h"
00056 
00057 static int ngram_search_start(ps_search_t *search);
00058 static int ngram_search_step(ps_search_t *search);
00059 static int ngram_search_finish(ps_search_t *search);
00060 static int ngram_search_reinit(ps_search_t *search);
00061 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score);
00062 static int32 ngram_search_prob(ps_search_t *search);
00063 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
00064 
00065 static ps_searchfuncs_t ngram_funcs = {
00066     /* name: */   "ngram",
00067     /* start: */  ngram_search_start,
00068     /* step: */   ngram_search_step,
00069     /* finish: */ ngram_search_finish,
00070     /* reinit: */ ngram_search_reinit,
00071     /* free: */   ngram_search_free,
00072     /* lattice: */  ngram_search_lattice,
00073     /* hyp: */      ngram_search_hyp,
00074     /* prob: */     ngram_search_prob,
00075     /* seg_iter: */ ngram_search_seg_iter,
00076 };
00077 
00078 static void
00079 ngram_search_update_widmap(ngram_search_t *ngs)
00080 {
00081     char const **words;
00082     int32 i, n_words;
00083 
00084     /* It's okay to include fillers since they won't be in the LM */
00085     n_words = ps_search_n_words(ngs);
00086     words = ckd_calloc(n_words, sizeof(*words));
00087     /* This will include alternates, again, that's okay since they aren't in the LM */
00088     for (i = 0; i < n_words; ++i)
00089         words[i] = (const char *)dict_word_str(ps_search_dict(ngs), i);
00090     ngram_model_set_map_words(ngs->lmset, words, n_words);
00091     ckd_free(words);
00092 }
00093 
00094 static void
00095 ngram_search_calc_beams(ngram_search_t *ngs)
00096 {
00097     cmd_ln_t *config;
00098     acmod_t *acmod;
00099 
00100     config = ps_search_config(ngs);
00101     acmod = ps_search_acmod(ngs);
00102 
00103     /* Log beam widths. */
00104     ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"));
00105     ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"));
00106     ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"));
00107     ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"));
00108     ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"));
00109     ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"));
00110     ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"));
00111 
00112     /* Absolute pruning parameters. */
00113     ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
00114     ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
00115 
00116     /* Various penalties which may or may not be useful. */
00117     ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"));
00118     ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen"));
00119     ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"));
00120     ngs->silpen = ngs->pip
00121         + logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"));
00122     ngs->fillpen = ngs->pip
00123         + logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"));
00124 
00125     /* Language weight ratios for fwdflat and bestpath search. */
00126     ngs->fwdflat_fwdtree_lw_ratio =
00127         cmd_ln_float32_r(config, "-fwdflatlw")
00128         / cmd_ln_float32_r(config, "-lw");
00129     ngs->bestpath_fwdtree_lw_ratio =
00130         cmd_ln_float32_r(config, "-bestpathlw")
00131         / cmd_ln_float32_r(config, "-lw");
00132 
00133     /* Acoustic score scale for posterior probabilities. */
00134     ngs->ascale = 1.0f / cmd_ln_float32_r(config, "-ascale");
00135 }
00136 
00137 ps_search_t *
00138 ngram_search_init(cmd_ln_t *config,
00139                   acmod_t *acmod,
00140                   dict_t *dict)
00141 {
00142     ngram_search_t *ngs;
00143     const char *path;
00144 
00145     ngs = ckd_calloc(1, sizeof(*ngs));
00146     ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict);
00147     ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00148                                    acmod->tmat->tp, NULL, acmod->mdef->sseq);
00149     ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
00150     ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
00151     ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
00152 
00153     /* Calculate various beam widths and such. */
00154     ngram_search_calc_beams(ngs);
00155 
00156     /* Allocate a billion different tables for stuff. */
00157     ngs->word_chan = ckd_calloc(dict->dict_entry_count,
00158                                 sizeof(*ngs->word_chan));
00159     ngs->word_lat_idx = ckd_calloc(dict->dict_entry_count,
00160                                    sizeof(*ngs->word_lat_idx));
00161     ngs->zeroPermTab = ckd_calloc(bin_mdef_n_ciphone(acmod->mdef),
00162                                   sizeof(*ngs->zeroPermTab));
00163     ngs->word_active = bitvec_alloc(dict->dict_entry_count);
00164     ngs->last_ltrans = ckd_calloc(dict->dict_entry_count,
00165                                   sizeof(*ngs->last_ltrans));
00166 
00167     /* FIXME: All these structures need to be made dynamic with
00168      * garbage collection. */
00169     ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
00170     ngs->bp_table = ckd_calloc(ngs->bp_table_size,
00171                                sizeof(*ngs->bp_table));
00172     /* FIXME: This thing is frickin' huge. */
00173     ngs->bscore_stack_size = ngs->bp_table_size * 20;
00174     ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
00175                                    sizeof(*ngs->bscore_stack));
00176     ngs->n_frame_alloc = 256;
00177     ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
00178                                    sizeof(*ngs->bp_table_idx));
00179     ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
00180 
00181     /* Allocate active word list array */
00182     ngs->active_word_list = ckd_calloc_2d(2, dict->dict_entry_count,
00183                                           sizeof(**ngs->active_word_list));
00184 
00185     /* Load language model(s) */
00186     if ((path = cmd_ln_str_r(config, "-lmctl"))) {
00187         ngs->lmset = ngram_model_set_read(config, path, acmod->lmath);
00188         if (ngs->lmset == NULL) {
00189             E_ERROR("Failed to read language model control file: %s\n",
00190                     path);
00191             goto error_out;
00192         }
00193         /* Set the default language model if needed. */
00194         if ((path = cmd_ln_str_r(config, "-lmname"))) {
00195             ngram_model_set_select(ngs->lmset, path);
00196         }
00197     }
00198     else if ((path = cmd_ln_str_r(config, "-lm"))) {
00199         static const char *name = "default";
00200         ngram_model_t *lm;
00201 
00202         lm = ngram_model_read(config, path, NGRAM_AUTO, acmod->lmath);
00203         if (lm == NULL) {
00204             E_ERROR("Failed to read language model file: %s\n", path);
00205             goto error_out;
00206         }
00207         ngs->lmset = ngram_model_set_init(config,
00208                                           &lm, (char **)&name,
00209                                           NULL, 1);
00210         if (ngs->lmset == NULL) {
00211             E_ERROR("Failed to initialize language model set\n");
00212             goto error_out;
00213         }
00214     }
00215 
00216     /* Create word mappings. */
00217     ngram_search_update_widmap(ngs);
00218 
00219     /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
00220     if (cmd_ln_boolean_r(config, "-fwdtree")) {
00221         ngram_fwdtree_init(ngs);
00222         ngs->fwdtree = TRUE;
00223     }
00224     if (cmd_ln_boolean_r(config, "-fwdflat")) {
00225         ngram_fwdflat_init(ngs);
00226         ngs->fwdflat = TRUE;
00227     }
00228     if (cmd_ln_boolean_r(config, "-bestpath")) {
00229         ngs->bestpath = TRUE;
00230     }
00231     return (ps_search_t *)ngs;
00232 
00233 error_out:
00234     ngram_search_free((ps_search_t *)ngs);
00235     return NULL;
00236 }
00237 
00238 static int
00239 ngram_search_reinit(ps_search_t *search)
00240 {
00241     ngram_search_t *ngs = (ngram_search_t *)search;
00242     int rv = 0;
00243 
00244     /*
00245      * NOTE!!! This is not a general-purpose reinit function.  It only
00246      * deals with updates to the language model set and beam widths.
00247      */
00248 
00249     /* Update beam widths. */
00250     ngram_search_calc_beams(ngs);
00251 
00252     /* Update word mappings. */
00253     ngram_search_update_widmap(ngs);
00254 
00255     /* Now rebuild lextrees or what have you. */
00256     if (ngs->fwdtree) {
00257         if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
00258             return rv;
00259     }
00260     if (ngs->fwdflat) {
00261         if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
00262             return rv;
00263     }
00264 
00265     return rv;
00266 }
00267 
00268 void
00269 ngram_search_free(ps_search_t *search)
00270 {
00271     ngram_search_t *ngs = (ngram_search_t *)search;
00272 
00273     ps_search_deinit(search);
00274     if (ngs->fwdtree)
00275         ngram_fwdtree_deinit(ngs);
00276     if (ngs->fwdflat)
00277         ngram_fwdflat_deinit(ngs);
00278 
00279     hmm_context_free(ngs->hmmctx);
00280     listelem_alloc_free(ngs->chan_alloc);
00281     listelem_alloc_free(ngs->root_chan_alloc);
00282     listelem_alloc_free(ngs->latnode_alloc);
00283     ngram_model_free(ngs->lmset);
00284 
00285     ckd_free(ngs->word_chan);
00286     ckd_free(ngs->word_lat_idx);
00287     ckd_free(ngs->zeroPermTab);
00288     bitvec_free(ngs->word_active);
00289     ckd_free(ngs->bp_table);
00290     ckd_free(ngs->bscore_stack);
00291     ckd_free(ngs->bp_table_idx - 1);
00292     ckd_free_2d(ngs->active_word_list);
00293     ckd_free(ngs->last_ltrans);
00294     ckd_free(ngs);
00295 }
00296 
00297 int
00298 ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
00299 {
00300     if (frame_idx >= ngs->n_frame_alloc) {
00301         ngs->n_frame_alloc *= 2;
00302         ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
00303                                         (ngs->n_frame_alloc + 1)
00304                                         * sizeof(*ngs->bp_table_idx));
00305         if (ngs->frm_wordlist) {
00306             ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
00307                                             ngs->n_frame_alloc
00308                                             * sizeof(*ngs->frm_wordlist));
00309         }
00310         ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
00311     }
00312     ngs->bp_table_idx[frame_idx] = ngs->bpidx;
00313     return ngs->bpidx;
00314 }
00315 
00319 static void
00320 cache_bptable_paths(ngram_search_t *ngs, int32 bp)
00321 {
00322     int32 w, prev_bp;
00323     bptbl_t *bpe;
00324 
00325     bpe = &(ngs->bp_table[bp]);
00326     prev_bp = bp;
00327     w = bpe->wid;
00328     /* FIXME: This isn't the ideal way to tell if it's a filler. */
00329     while (w >= ps_search_silence_wid(ngs)) {
00330         prev_bp = ngs->bp_table[prev_bp].bp;
00331         w = ngs->bp_table[prev_bp].wid;
00332     }
00333     bpe->real_wid = dict_base_wid(ps_search_dict(ngs), w);
00334 
00335     prev_bp = ngs->bp_table[prev_bp].bp;
00336     bpe->prev_real_wid =
00337         (prev_bp != NO_BP) ? ngs->bp_table[prev_bp].real_wid : -1;
00338 }
00339 
00340 void
00341 ngram_search_save_bp(ngram_search_t *ngs, int frame_idx,
00342                      int32 w, int32 score, int32 path, int32 rc)
00343 {
00344     int32 _bp_;
00345 
00346     _bp_ = ngs->word_lat_idx[w];
00347     if (_bp_ != NO_BP) {
00348         if (ngs->bp_table[_bp_].score < score) {
00349             if (ngs->bp_table[_bp_].bp != path) {
00350                 ngs->bp_table[_bp_].bp = path;
00351                 cache_bptable_paths(ngs, _bp_);
00352             }
00353             ngs->bp_table[_bp_].score = score;
00354         }
00355         ngs->bscore_stack[ngs->bp_table[_bp_].s_idx + rc] = score;
00356     }
00357     else {
00358         int32 i, rcsize, *bss;
00359         dict_entry_t *de;
00360         bptbl_t *bpe;
00361 
00362         /* Expand the backpointer tables if necessary. */
00363         if (ngs->bpidx >= ngs->bp_table_size) {
00364             ngs->bp_table_size *= 2;
00365             ngs->bp_table = ckd_realloc(ngs->bp_table,
00366                                         ngs->bp_table_size
00367                                         * sizeof(*ngs->bp_table));
00368             E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
00369         }
00370         if (ngs->bss_head >= ngs->bscore_stack_size
00371             - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
00372             ngs->bscore_stack_size *= 2;
00373             ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
00374                                             ngs->bscore_stack_size
00375                                             * sizeof(*ngs->bscore_stack));
00376             E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
00377         }
00378 
00379         de = ps_search_dict(ngs)->dict_list[w];
00380         ngs->word_lat_idx[w] = ngs->bpidx;
00381         bpe = &(ngs->bp_table[ngs->bpidx]);
00382         bpe->wid = w;
00383         bpe->frame = frame_idx;
00384         bpe->bp = path;
00385         bpe->score = score;
00386         bpe->s_idx = ngs->bss_head;
00387         bpe->valid = TRUE;
00388 
00389         if ((de->len != 1) && (de->mpx)) {
00390             bpe->r_diph = de->phone_ids[de->len - 1];
00391             rcsize = ps_search_dict(ngs)->rcFwdSizeTable[bpe->r_diph];
00392         }
00393         else {
00394             bpe->r_diph = -1;
00395             rcsize = 1;
00396         }
00397         for (i = rcsize, bss = ngs->bscore_stack + ngs->bss_head; i > 0; --i, bss++)
00398             *bss = WORST_SCORE;
00399         ngs->bscore_stack[ngs->bss_head + rc] = score;
00400         cache_bptable_paths(ngs, ngs->bpidx);
00401 
00402         ngs->bpidx++;
00403         ngs->bss_head += rcsize;
00404     }
00405 }
00406 
00407 int
00408 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
00409 {
00410     /* End of backpointers for this frame. */
00411     int end_bpidx;
00412     int best_exit, bp;
00413     int32 best_score;
00414 
00415     /* No hypothesis means no exit node! */
00416     if (ngs->n_frame == 0)
00417         return NO_BP;
00418 
00419     if (frame_idx == -1 || frame_idx >= ngs->n_frame)
00420         frame_idx = ngs->n_frame - 1;
00421     end_bpidx = ngs->bp_table_idx[frame_idx];
00422 
00423     /* FIXME: WORST_SCORE has to go away and be replaced with a log-zero number. */
00424     best_score = WORST_SCORE;
00425     best_exit = NO_BP;
00426 
00427     /* Scan back to find a frame with some backpointers in it. */
00428     while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
00429         --frame_idx;
00430     /* This is NOT an error, it just means there is no hypothesis yet. */
00431     if (frame_idx < 0)
00432         return NO_BP;
00433 
00434     /* Now find the entry for </s> OR the best scoring entry. */
00435     assert(end_bpidx < ngs->bp_table_size);
00436     for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
00437         if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
00438             || ngs->bp_table[bp].score > best_score) {
00439             best_score = ngs->bp_table[bp].score;
00440             best_exit = bp;
00441         }
00442         if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
00443             break;
00444     }
00445 
00446     if (out_best_score) *out_best_score = best_score;
00447     return best_exit;
00448 }
00449 
00450 char const *
00451 ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
00452 {
00453     ps_search_t *base = ps_search_base(ngs);
00454     char *c;
00455     size_t len;
00456     int bp;
00457 
00458     if (bpidx == NO_BP)
00459         return NULL;
00460 
00461     bp = bpidx;
00462     len = 0;
00463     while (bp != NO_BP) {
00464         bptbl_t *be = &ngs->bp_table[bp];
00465         bp = be->bp;
00466         if (ISA_REAL_WORD(ngs, be->wid))
00467             len += strlen(dict_base_str(ps_search_dict(ngs), be->wid)) + 1;
00468     }
00469 
00470     ckd_free(base->hyp_str);
00471     base->hyp_str = ckd_calloc(1, len);
00472     bp = bpidx;
00473     c = base->hyp_str + len - 1;
00474     while (bp != NO_BP) {
00475         bptbl_t *be = &ngs->bp_table[bp];
00476         size_t len;
00477 
00478         bp = be->bp;
00479         if (ISA_REAL_WORD(ngs, be->wid)) {
00480             len = strlen(dict_base_str(ps_search_dict(ngs), be->wid));
00481             c -= len;
00482             memcpy(c, dict_base_str(ps_search_dict(ngs), be->wid), len);
00483             if (c > base->hyp_str) {
00484                 --c;
00485                 *c = ' ';
00486             }
00487         }
00488     }
00489 
00490     return base->hyp_str;
00491 }
00492 
00493 void
00494 ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
00495 {
00496     dict_entry_t *de;
00497     chan_t *hmm, *thmm;
00498     uint16 *sseq_rc;             /* list of sseqid for all possible right context for w */
00499     int32 i;
00500 
00501     de = ps_search_dict(ngs)->dict_list[w];
00502 
00503     assert(de->mpx);
00504 
00505     sseq_rc = ps_search_dict(ngs)->rcFwdTable[de->phone_ids[de->len - 1]];
00506 
00507     hmm = ngs->word_chan[w];
00508     if ((hmm == NULL) || (hmm->hmm.s.ssid != *sseq_rc)) {
00509         hmm = listelem_malloc(ngs->chan_alloc);
00510         hmm->next = ngs->word_chan[w];
00511         ngs->word_chan[w] = hmm;
00512 
00513         hmm->info.rc_id = 0;
00514         hmm->ciphone = de->ci_phone_ids[de->len - 1];
00515         hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, *sseq_rc, hmm->ciphone);
00516     }
00517     for (i = 1, sseq_rc++; *sseq_rc != 65535; sseq_rc++, i++) {
00518         if ((hmm->next == NULL) || (hmm->next->hmm.s.ssid != *sseq_rc)) {
00519             thmm = listelem_malloc(ngs->chan_alloc);
00520             thmm->next = hmm->next;
00521             hmm->next = thmm;
00522             hmm = thmm;
00523 
00524             hmm->info.rc_id = i;
00525             hmm->ciphone = de->ci_phone_ids[de->len - 1];
00526             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, *sseq_rc, hmm->ciphone);
00527         }
00528         else
00529             hmm = hmm->next;
00530     }
00531 }
00532 
00533 void
00534 ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
00535 {
00536     chan_t *hmm, *thmm;
00537 
00538     for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
00539         thmm = hmm->next;
00540         hmm_deinit(&hmm->hmm);
00541         listelem_free(ngs->chan_alloc, hmm);
00542     }
00543     ngs->word_chan[w] = NULL;
00544 }
00545 
00546 /*
00547  * Compute acoustic and LM scores for each BPTable entry (segment).
00548  */
00549 void
00550 ngram_compute_seg_scores(ngram_search_t *ngs, float32 lwf)
00551 {
00552     int32 bp, start_score;
00553     bptbl_t *bpe, *p_bpe;
00554     uint16 *rcpermtab;
00555     dict_entry_t *de;
00556 
00557     for (bp = 0; bp < ngs->bpidx; bp++) {
00558         bpe = ngs->bp_table + bp;
00559 
00560         /* Start of utterance. */
00561         if (bpe->bp == NO_BP) {
00562             bpe->ascr = bpe->score;
00563             bpe->lscr = 0;
00564             continue;
00565         }
00566 
00567         /* Otherwise, calculate lscr and ascr. */
00568         de = ps_search_dict(ngs)->dict_list[bpe->wid];
00569         p_bpe = ngs->bp_table + bpe->bp;
00570         rcpermtab = (p_bpe->r_diph >= 0) ?
00571             ps_search_dict(ngs)->rcFwdPermTable[p_bpe->r_diph] : ngs->zeroPermTab;
00572         start_score =
00573             ngs->bscore_stack[p_bpe->s_idx + rcpermtab[de->ci_phone_ids[0]]];
00574         /* FIXME: WORST_SCORE shouldn't propagate, but sometimes it
00575            does.  We cannot allow it to go any further because that
00576            will result in positive acoustic scores.  Tracing the
00577            source of this may be a bit involved. */
00578         if (start_score == WORST_SCORE)
00579             start_score = 0;
00580 
00581         /* FIXME: These result in positive acoustic scores when filler
00582            words have non-filler pronunciations.  That whole business
00583            is still pretty much broken but at least it doesn't
00584            segfault. */
00585         if (bpe->wid == ps_search_silence_wid(ngs)) {
00586             bpe->lscr = ngs->silpen;
00587         }
00588         else if (ISA_FILLER_WORD(ngs, bpe->wid)) {
00589             bpe->lscr = ngs->fillpen;
00590         }
00591         else {
00592             int32 n_used;
00593             bpe->lscr = ngram_tg_score(ngs->lmset, de->wid,
00594                                        p_bpe->real_wid,
00595                                        p_bpe->prev_real_wid, &n_used);
00596                         /* FIXME: Floating-point conversion = SLOW */
00597             bpe->lscr = (int32)(bpe->lscr * lwf);
00598         }
00599         bpe->ascr = bpe->score - start_score - bpe->lscr;
00600     }
00601 }
00602 
00603 static int
00604 ngram_search_start(ps_search_t *search)
00605 {
00606     ngram_search_t *ngs = (ngram_search_t *)search;
00607 
00608     ngs->done = FALSE;
00609     ngram_model_flush(ngs->lmset);
00610     if (ngs->fwdtree)
00611         ngram_fwdtree_start(ngs);
00612     else if (ngs->fwdflat)
00613         ngram_fwdflat_start(ngs);
00614     else
00615         return -1;
00616     return 0;
00617 }
00618 
00619 static int
00620 ngram_search_step(ps_search_t *search)
00621 {
00622     ngram_search_t *ngs = (ngram_search_t *)search;
00623 
00624     if (ngs->fwdtree)
00625         return ngram_fwdtree_search(ngs);
00626     else if (ngs->fwdflat)
00627         return ngram_fwdflat_search(ngs);
00628     else
00629         return -1;
00630 }
00631 
00632 static int
00633 ngram_search_finish(ps_search_t *search)
00634 {
00635     ngram_search_t *ngs = (ngram_search_t *)search;
00636 
00637     if (ngs->fwdtree) {
00638         ngram_fwdtree_finish(ngs);
00639 
00640         /* Now do fwdflat search in its entirety, if requested. */
00641         if (ngs->fwdflat) {
00642             int nfr;
00643 
00644             /* Rewind the acoustic model. */
00645             acmod_rewind(ps_search_acmod(ngs));
00646             /* Now redo search. */
00647             ngram_fwdflat_start(ngs);
00648             while ((nfr = ngram_fwdflat_search(ngs)) > 0) {
00649                 /* Do nothing! */
00650             }
00651             if (nfr < 0)
00652                 return nfr;
00653             ngram_fwdflat_finish(ngs);
00654             /* And now, we should have a result... */
00655         }
00656     }
00657     else if (ngs->fwdflat) {
00658         ngram_fwdflat_finish(ngs);
00659     }
00660 
00661     /* Mark the current utterance as done. */
00662     ngs->done = TRUE;
00663     return 0;
00664 }
00665 
00666 static ps_latlink_t *
00667 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
00668 {
00669     ngram_search_t *ngs = (ngram_search_t *)search;
00670 
00671     if (search->last_link == NULL) {
00672         search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
00673                                                 ngs->bestpath_fwdtree_lw_ratio,
00674                                                 ngs->ascale);
00675         if (search->last_link == NULL)
00676             return NULL;
00677         /* Also calculate betas so we can fill in the posterior
00678          * probability field in the segmentation. */
00679         if (search->post == 0)
00680             search->post = ps_lattice_posterior(search->dag, ngs->lmset,
00681                                                 ngs->ascale);
00682     }
00683     if (out_score)
00684         *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
00685     return search->last_link;
00686 }
00687 
00688 static char const *
00689 ngram_search_hyp(ps_search_t *search, int32 *out_score)
00690 {
00691     ngram_search_t *ngs = (ngram_search_t *)search;
00692 
00693     /* Only do bestpath search if the utterance is complete. */
00694     if (ngs->bestpath && ngs->done) {
00695         ps_lattice_t *dag;
00696         ps_latlink_t *link;
00697 
00698         if ((dag = ngram_search_lattice(search)) == NULL)
00699             return NULL;
00700         if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
00701             return NULL;
00702         return ps_lattice_hyp(dag, link);
00703     }
00704     else {
00705         int32 bpidx;
00706 
00707         /* fwdtree and fwdflat use same backpointer table. */
00708         bpidx = ngram_search_find_exit(ngs, -1, out_score);
00709         if (bpidx != NO_BP)
00710             return ngram_search_bp_hyp(ngs, bpidx);
00711     }
00712 
00713     return NULL;
00714 }
00715 
00716 static void
00717 ngram_search_bp2itor(ps_seg_t *seg, int bp)
00718 {
00719     ngram_search_t *ngs = (ngram_search_t *)seg->search;
00720     bptbl_t *be, *pbe;
00721 
00722     be = &ngs->bp_table[bp];
00723     pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
00724     seg->word = dict_word_str(ps_search_dict(ngs), be->wid);
00725     seg->ef = be->frame;
00726     seg->sf = pbe ? pbe->frame + 1 : 0;
00727     seg->prob = 0; /* Bogus value... */
00728     /* Compute acoustic and LM scores for this segment. */
00729     if (pbe == NULL) {
00730         seg->ascr = be->ascr = be->score;
00731         seg->lscr = be->lscr = 0;
00732         seg->lback = 0;
00733     }
00734     else {
00735         dict_entry_t *de;
00736         uint16 *rcpermtab;
00737         int32 start_score;
00738 
00739         de = seg->search->dict->dict_list[be->wid];
00740         /* Find ending path score of previous word. */
00741         rcpermtab = (pbe->r_diph >= 0)
00742             ? seg->search->dict->rcFwdPermTable[pbe->r_diph]
00743             : ngs->zeroPermTab;
00744         start_score = ngs->bscore_stack[pbe->s_idx + rcpermtab[de->ci_phone_ids[0]]];
00745         if (be->wid == ps_search_silence_wid(ngs)) {
00746             be->lscr = ngs->silpen;
00747         }
00748         else if (ISA_FILLER_WORD(ngs, be->wid)) {
00749             be->lscr = ngs->fillpen;
00750         }
00751         else {
00752             be->lscr = ngram_tg_score(ngs->lmset, de->wid,
00753                                       pbe->real_wid,
00754                                       pbe->prev_real_wid, &seg->lback);
00755             be->lscr = (int32)(be->lscr * seg->lwf);
00756         }
00757         be->ascr = be->score - start_score - be->lscr;
00758         seg->ascr = be->ascr;
00759         seg->lscr = be->lscr;
00760     }
00761 }
00762 
00763 static void
00764 ngram_bp_seg_free(ps_seg_t *seg)
00765 {
00766     bptbl_seg_t *itor = (bptbl_seg_t *)seg;
00767     
00768     ckd_free(itor->bpidx);
00769     ckd_free(itor);
00770 }
00771 
00772 static ps_seg_t *
00773 ngram_bp_seg_next(ps_seg_t *seg)
00774 {
00775     bptbl_seg_t *itor = (bptbl_seg_t *)seg;
00776 
00777     if (++itor->cur == itor->n_bpidx) {
00778         ngram_bp_seg_free(seg);
00779         return NULL;
00780     }
00781 
00782     ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
00783     return seg;
00784 }
00785 
00786 static ps_segfuncs_t ngram_bp_segfuncs = {
00787     /* seg_next */ ngram_bp_seg_next,
00788     /* seg_free */ ngram_bp_seg_free
00789 };
00790 
00791 static ps_seg_t *
00792 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
00793 {
00794     bptbl_seg_t *itor;
00795     int bp, cur;
00796 
00797     /* Calling this an "iterator" is a bit of a misnomer since we have
00798      * to get the entire backtrace in order to produce it.  On the
00799      * other hand, all we actually need is the bptbl IDs, and we can
00800      * allocate a fixed-size array of them. */
00801     itor = ckd_calloc(1, sizeof(*itor));
00802     itor->base.vt = &ngram_bp_segfuncs;
00803     itor->base.search = ps_search_base(ngs);
00804     itor->base.lwf = lwf;
00805     itor->n_bpidx = 0;
00806     bp = bpidx;
00807     while (bp != NO_BP) {
00808         bptbl_t *be = &ngs->bp_table[bp];
00809         bp = be->bp;
00810         ++itor->n_bpidx;
00811     }
00812     if (itor->n_bpidx == 0) {
00813         ckd_free(itor);
00814         return NULL;
00815     }
00816     itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
00817     cur = itor->n_bpidx - 1;
00818     bp = bpidx;
00819     while (bp != NO_BP) {
00820         bptbl_t *be = &ngs->bp_table[bp];
00821         itor->bpidx[cur] = bp;
00822         bp = be->bp;
00823         --cur;
00824     }
00825 
00826     /* Fill in relevant fields for first element. */
00827     ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
00828 
00829     return (ps_seg_t *)itor;
00830 }
00831 
00832 static ps_seg_t *
00833 ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
00834 {
00835     ngram_search_t *ngs = (ngram_search_t *)search;
00836 
00837     /* Only do bestpath search if the utterance is done. */
00838     if (ngs->bestpath && ngs->done) {
00839         ps_lattice_t *dag;
00840         ps_latlink_t *link;
00841 
00842         if ((dag = ngram_search_lattice(search)) == NULL)
00843             return NULL;
00844         if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
00845             return NULL;
00846         return ps_lattice_seg_iter(dag, link,
00847                                    ngs->bestpath_fwdtree_lw_ratio);
00848     }
00849     else {
00850         int32 bpidx;
00851 
00852         /* fwdtree and fwdflat use same backpointer table. */
00853         bpidx = ngram_search_find_exit(ngs, -1, out_score);
00854         return ngram_search_bp_iter(ngs, bpidx,
00855                                     /* but different language weights... */
00856                                     (ngs->done && ngs->fwdflat)
00857                                     ? ngs->fwdflat_fwdtree_lw_ratio : 1.0f);
00858     }
00859 
00860     return NULL;
00861 }
00862 
00863 static int32
00864 ngram_search_prob(ps_search_t *search)
00865 {
00866     ngram_search_t *ngs = (ngram_search_t *)search;
00867 
00868     /* Only do bestpath search if the utterance is done. */
00869     if (ngs->bestpath && ngs->done) {
00870         ps_lattice_t *dag;
00871         ps_latlink_t *link;
00872 
00873         if ((dag = ngram_search_lattice(search)) == NULL)
00874             return 0;
00875         if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
00876             return 0;
00877         return search->post;
00878     }
00879     else {
00880         /* FIXME: Give some kind of good estimate here, eventually. */
00881         return 0;
00882     }
00883 }
00884 
00885 static void
00886 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
00887 {
00888     bptbl_t *bp_ptr;
00889     int32 i;
00890 
00891     for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
00892         int32 sf, ef, wid;
00893         ps_latnode_t *node;
00894 
00895         if (!bp_ptr->valid)
00896             continue;
00897 
00898         sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
00899         ef = bp_ptr->frame;
00900         wid = bp_ptr->wid;
00901 
00902         assert(ef < dag->n_frames);
00903         /* Skip non-final </s> entries. */
00904         if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
00905             continue;
00906 
00907         /* Skip if word not in LM */
00908         if ((!ISA_FILLER_WORD(ngs, wid))
00909             && (!ngram_model_set_known_wid(ngs->lmset,
00910                                            dict_base_wid(ps_search_dict(ngs), wid))))
00911             continue;
00912 
00913         /* See if bptbl entry <wid,sf> already in lattice */
00914         for (node = dag->nodes; node; node = node->next) {
00915             if ((node->wid == wid) && (node->sf == sf))
00916                 break;
00917         }
00918 
00919         /* For the moment, store bptbl indices in node.{fef,lef} */
00920         if (node)
00921             node->lef = i;
00922         else {
00923             /* New node; link to head of list */
00924             node = listelem_malloc(dag->latnode_alloc);
00925             node->wid = wid;
00926             node->sf = sf; /* This is a frame index. */
00927             node->fef = node->lef = i; /* These are backpointer indices (argh) */
00928             node->reachable = FALSE;
00929             node->entries = NULL;
00930             node->exits = NULL;
00931 
00932             node->next = dag->nodes;
00933             dag->nodes = node;
00934         }
00935     }
00936 }
00937 
00938 static ps_latnode_t *
00939 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
00940 {
00941     ps_latnode_t *node;
00942 
00943     /* Find start node <s>.0 */
00944     for (node = dag->nodes; node; node = node->next) {
00945         if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
00946             break;
00947     }
00948     if (!node) {
00949         /* This is probably impossible. */
00950         E_ERROR("Couldn't find <s> in first frame\n");
00951         return NULL;
00952     }
00953     return node;
00954 }
00955 
00956 static ps_latnode_t *
00957 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
00958 {
00959     ps_latnode_t *node;
00960     int32 ef, bestbp, bp, bestscore;
00961 
00962     /* Find final node </s>.last_frame; nothing can follow this node */
00963     for (node = dag->nodes; node; node = node->next) {
00964         int32 lef = ngs->bp_table[node->lef].frame;
00965         if ((node->wid == ps_search_finish_wid(ngs))
00966             && (lef == dag->n_frames - 1))
00967             break;
00968     }
00969     if (node != NULL)
00970         return node;
00971 
00972     /* It is quite likely that no </s> exited in the last frame.  So,
00973      * find the node corresponding to the best exit. */
00974     /* Find the last frame containing a word exit. */
00975     for (ef = dag->n_frames - 1;
00976          ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
00977          --ef);
00978     if (ef < 0) {
00979         E_ERROR("Empty backpointer table: can not build DAG.\n");
00980         return NULL;
00981     }
00982 
00983     /* Find best word exit in that frame. */
00984     bestscore = WORST_SCORE;
00985     bestbp = NO_BP;
00986     for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
00987         int32 n_used, l_scr;
00988         l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
00989                                ngs->bp_table[bp].real_wid,
00990                                ngs->bp_table[bp].prev_real_wid,
00991                                &n_used);
00992                 /* FIXME: Floating-point conversion = SLOW */
00993         l_scr = (int32)(l_scr * lwf);
00994 
00995         if (ngs->bp_table[bp].score + l_scr > bestscore) {
00996             bestscore = ngs->bp_table[bp].score + l_scr;
00997             bestbp = bp;
00998         }
00999     }
01000     E_WARN("</s> not found in last frame, using %s instead\n",
01001            dict_base_str(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
01002 
01003     /* Now find the node that corresponds to it. */
01004     for (node = dag->nodes; node; node = node->next) {
01005         if (node->lef == bestbp)
01006             return node;
01007     }
01008 
01009     /* FIXME: This seems to happen a lot! */
01010     E_ERROR("Failed to find DAG node corresponding to %s\n",
01011            dict_base_str(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
01012     return NULL;
01013 }
01014 
01015 /*
01016  * Build lattice from bptable.
01017  */
01018 ps_lattice_t *
01019 ngram_search_lattice(ps_search_t *search)
01020 {
01021     int32 i, ef, lef, score, bss_offset;
01022     ps_latnode_t *node, *from, *to;
01023     ngram_search_t *ngs;
01024     ps_lattice_t *dag;
01025 
01026     ngs = (ngram_search_t *)search;
01027 
01028     /* Check to see if a lattice has previously been created over the
01029      * same number of frames, and reuse it if so. */
01030     if (search->dag && search->dag->n_frames == ngs->n_frame)
01031         return search->dag;
01032 
01033     /* Nope, create a new one. */
01034     ps_lattice_free(search->dag);
01035     search->dag = NULL;
01036     dag = ps_lattice_init_search(search, ngs->n_frame);
01037     /* Compute these such that they agree with the fwdtree language weight. */
01038     ngram_compute_seg_scores(ngs,
01039                              ngs->fwdflat
01040                              ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
01041     create_dag_nodes(ngs, dag);
01042     if ((dag->start = find_start_node(ngs, dag)) == NULL)
01043         goto error_out;
01044     if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
01045         goto error_out;
01046     E_INFO("lattice start node %s.%d end node %s.%d\n",
01047            dict_word_str(search->dict, dag->start->wid), dag->start->sf,
01048            dict_word_str(search->dict, dag->end->wid), dag->end->sf);
01049 
01050     dag->final_node_ascr = ngs->bp_table[dag->end->lef].ascr;
01051 
01052     /*
01053      * At this point, dag->nodes is ordered such that nodes earlier in
01054      * the list can follow (in time) those later in the list, but not
01055      * vice versa.  Now create precedence links and simultanesously
01056      * mark all nodes that can reach dag->end.  (All nodes are reached
01057      * from dag->start; no problem there.)
01058      */
01059     dag->end->reachable = TRUE;
01060     for (to = dag->end; to; to = to->next) {
01061         /* Skip if not reachable; it will never be reachable from dag->end */
01062         if (!to->reachable)
01063             continue;
01064 
01065         /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
01066         for (from = to->next; from; from = from->next) {
01067             bptbl_t *bp_ptr;
01068 
01069             ef = ngs->bp_table[from->fef].frame;
01070             lef = ngs->bp_table[from->lef].frame;
01071 
01072             if ((to->sf <= ef) || (to->sf > lef + 1))
01073                 continue;
01074 
01075             /* Find bptable entry for "from" that exactly precedes "to" */
01076             i = from->fef;
01077             bp_ptr = ngs->bp_table + i;
01078             for (; i <= from->lef; i++, bp_ptr++) {
01079                 if (bp_ptr->wid != from->wid)
01080                     continue;
01081                 if (bp_ptr->frame >= to->sf - 1)
01082                     break;
01083             }
01084 
01085             if ((i > from->lef) || (bp_ptr->frame != to->sf - 1))
01086                 continue;
01087 
01088             /* Find acoustic score from.sf->to.sf-1 with right context = to */
01089             if (bp_ptr->r_diph >= 0)
01090                 bss_offset =
01091                     search->dict->rcFwdPermTable[bp_ptr->r_diph]
01092                     [search->dict->dict_list[to->wid]->ci_phone_ids[0]];
01093             else
01094                 bss_offset = 0;
01095             score =
01096                 (ngs->bscore_stack[bp_ptr->s_idx + bss_offset] - bp_ptr->score) +
01097                 bp_ptr->ascr;
01098             if (score > 0) {
01099                 /* Scores must be negative, or Bad Things will happen.
01100                    In general, they are, except in corner cases
01101                    involving filler words.  We don't want to throw any
01102                    links away so we'll keep these, but with some
01103                    arbitrarily improbable but recognizable score. */
01104                 ps_lattice_link(dag, from, to, -424242, bp_ptr->frame);
01105                 from->reachable = TRUE;
01106             }
01107             else if (score > WORST_SCORE) {
01108                 ps_lattice_link(dag, from, to, score, bp_ptr->frame);
01109                 from->reachable = TRUE;
01110             }
01111         }
01112     }
01113 
01114     /* There must be at least one path between dag->start and dag->end */
01115     if (!dag->start->reachable) {
01116         E_ERROR("End node of lattice isolated; unreachable\n");
01117         goto error_out;
01118     }
01119 
01120     for (node = dag->nodes; node; node = node->next) {
01121         /* Change node->{fef,lef} from bptbl indices to frames. */
01122         node->fef = ngs->bp_table[node->fef].frame;
01123         node->lef = ngs->bp_table[node->lef].frame;
01124         /* Find base wid for nodes. */
01125         node->basewid = dict_base_wid(search->dict, node->wid);
01126     }
01127 
01128     /* Minor hack: If the final node is a filler word and not </s>,
01129      * then set its base word ID to </s>, so that the language model
01130      * scores won't be screwed up. */
01131     if (ISA_FILLER_WORD(ngs, dag->end->wid))
01132         dag->end->basewid = ps_search_finish_wid(ngs);
01133 
01134     /* Free nodes unreachable from dag->end and their links */
01135     ps_lattice_delete_unreachable(dag);
01136 
01137     /* Build links around silence and filler words, since they do not
01138      * exist in the language model. */
01139     ps_lattice_bypass_fillers(dag, ngs->silpen, ngs->fillpen);
01140 
01141     search->dag = dag;
01142     return dag;
01143 
01144 error_out:
01145     ps_lattice_free(dag);
01146     return NULL;
01147 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1