PocketSphinx  0.6
fsg_search.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 /*
36  * fsg_search.c -- Search structures for FSM decoding.
37  *
38  * **********************************************
39  * CMU ARPA Speech Project
40  *
41  * Copyright (c) 2004 Carnegie Mellon University.
42  * ALL RIGHTS RESERVED.
43  * **********************************************
44  *
45  * HISTORY
46  *
47  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
48  * Started.
49  */
50 
51 /* System headers. */
52 #include <stdio.h>
53 #include <string.h>
54 #include <assert.h>
55 
56 /* SphinxBase headers. */
57 #include <sphinxbase/err.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/strfuncs.h>
60 #include <sphinxbase/cmd_ln.h>
61 
62 /* Local headers. */
63 #include "pocketsphinx_internal.h"
64 #include "ps_lattice_internal.h"
65 #include "fsg_search_internal.h"
66 #include "fsg_history.h"
67 #include "fsg_lextree.h"
68 
69 /* Turn this on for detailed debugging dump */
70 #define __FSG_DBG__ 0
71 #define __FSG_DBG_CHAN__ 0
72 
73 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search, int32 *out_score);
74 static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
75 static int fsg_search_prob(ps_search_t *search);
76 
77 static ps_searchfuncs_t fsg_funcs = {
78  /* name: */ "fsg",
79  /* start: */ fsg_search_start,
80  /* step: */ fsg_search_step,
81  /* finish: */ fsg_search_finish,
82  /* reinit: */ fsg_search_reinit,
83  /* free: */ fsg_search_free,
84  /* lattice: */ fsg_search_lattice,
85  /* hyp: */ fsg_search_hyp,
86  /* prob: */ fsg_search_prob,
87  /* seg_iter: */ fsg_search_seg_iter,
88 };
89 
91 fsg_search_init(cmd_ln_t *config,
92  acmod_t *acmod,
93  dict_t *dict,
94  dict2pid_t *d2p)
95 {
96  fsg_search_t *fsgs;
97  char const *path;
98 
99  fsgs = ckd_calloc(1, sizeof(*fsgs));
100  ps_search_init(ps_search_base(fsgs), &fsg_funcs, config, acmod, dict, d2p);
101 
102  /* Initialize HMM context. */
103  fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
104  acmod->tmat->tp, NULL, acmod->mdef->sseq);
105  if (fsgs->hmmctx == NULL) {
106  ps_search_free(ps_search_base(fsgs));
107  return NULL;
108  }
109 
110  /* Intialize the search history object */
111  fsgs->history = fsg_history_init(NULL, dict);
112  fsgs->frame = -1;
113 
114  /* Initialize FSG table. */
115  fsgs->fsgs = hash_table_new(5, HASH_CASE_YES);
116 
117  /* Get search pruning parameters */
118  fsgs->beam_factor = 1.0f;
119  fsgs->beam = fsgs->beam_orig
120  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))
121  >> SENSCR_SHIFT;
122  fsgs->pbeam = fsgs->pbeam_orig
123  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))
124  >> SENSCR_SHIFT;
125  fsgs->wbeam = fsgs->wbeam_orig
126  = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))
127  >> SENSCR_SHIFT;
128 
129  /* LM related weights/penalties */
130  fsgs->lw = cmd_ln_float32_r(config, "-lw");
131  fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
132  * fsgs->lw)
133  >> SENSCR_SHIFT;
134  fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
135  * fsgs->lw)
136  >> SENSCR_SHIFT;
137 
138  /* Best path search (and confidence annotation)? */
139  if (cmd_ln_boolean_r(config, "-bestpath"))
140  fsgs->bestpath = TRUE;
141 
142  /* Acoustic score scale for posterior probabilities. */
143  fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
144 
145  E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
146  fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
147  fsgs->wip, fsgs->pip);
148 
149  /* Load an FSG if one was specified in config */
150  if ((path = cmd_ln_str_r(config, "-fsg"))) {
151  fsg_model_t *fsg;
152 
153  if ((fsg = fsg_model_readfile(path, acmod->lmath, fsgs->lw)) == NULL)
154  goto error_out;
155  if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
156  fsg_model_free(fsg);
157  goto error_out;
158  }
159  if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
160  goto error_out;
161  if (fsg_search_reinit(ps_search_base(fsgs),
162  ps_search_dict(fsgs),
163  ps_search_dict2pid(fsgs)) < 0)
164  goto error_out;
165  }
166  /* Or load a JSGF grammar */
167  else if ((path = cmd_ln_str_r(config, "-jsgf"))) {
168  fsg_model_t *fsg;
169  jsgf_rule_t *rule;
170  char const *toprule;
171 
172  if ((fsgs->jsgf = jsgf_parse_file(path, NULL)) == NULL)
173  goto error_out;
174 
175  rule = NULL;
176  /* Take the -toprule if specified. */
177  if ((toprule = cmd_ln_str_r(config, "-toprule"))) {
178  char *anglerule;
179  anglerule = string_join("<", toprule, ">", NULL);
180  rule = jsgf_get_rule(fsgs->jsgf, anglerule);
181  ckd_free(anglerule);
182  if (rule == NULL) {
183  E_ERROR("Start rule %s not found\n", toprule);
184  goto error_out;
185  }
186  }
187  /* Otherwise, take the first public rule. */
188  else {
189  jsgf_rule_iter_t *itor;
190 
191  for (itor = jsgf_rule_iter(fsgs->jsgf); itor;
192  itor = jsgf_rule_iter_next(itor)) {
193  rule = jsgf_rule_iter_rule(itor);
194  if (jsgf_rule_public(rule))
195  break;
196  }
197  if (rule == NULL) {
198  E_ERROR("No public rules found in %s\n", path);
199  goto error_out;
200  }
201  }
202  fsg = jsgf_build_fsg(fsgs->jsgf, rule, acmod->lmath, fsgs->lw);
203  if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
204  fsg_model_free(fsg);
205  goto error_out;
206  }
207  if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
208  goto error_out;
209  if (fsg_search_reinit(ps_search_base(fsgs),
210  ps_search_dict(fsgs),
211  ps_search_dict2pid(fsgs)) < 0)
212  goto error_out;
213  }
214 
215  return ps_search_base(fsgs);
216 
217 error_out:
218  fsg_search_free(ps_search_base(fsgs));
219  return NULL;
220 }
221 
222 void
223 fsg_search_free(ps_search_t *search)
224 {
225  fsg_search_t *fsgs = (fsg_search_t *)search;
226  hash_iter_t *itor;
227 
228  ps_search_deinit(search);
229  if (fsgs->jsgf)
230  jsgf_grammar_free(fsgs->jsgf);
231  fsg_lextree_free(fsgs->lextree);
232  if (fsgs->history) {
233  fsg_history_reset(fsgs->history);
234  fsg_history_set_fsg(fsgs->history, NULL, NULL);
235  fsg_history_free(fsgs->history);
236  }
237  if (fsgs->fsgs) {
238  for (itor = hash_table_iter(fsgs->fsgs);
239  itor; itor = hash_table_iter_next(itor)) {
240  fsg_model_t *fsg = (fsg_model_t *) hash_entry_val(itor->ent);
241  fsg_model_free(fsg);
242  }
243  hash_table_free(fsgs->fsgs);
244  }
245  hmm_context_free(fsgs->hmmctx);
246  ckd_free(fsgs);
247 }
248 
249 int
250 fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
251 {
252  fsg_search_t *fsgs = (fsg_search_t *)search;
253 
254  /* Free the old lextree */
255  if (fsgs->lextree)
256  fsg_lextree_free(fsgs->lextree);
257 
258  /* Free old dict2pid, dict */
259  ps_search_base_reinit(search, dict, d2p);
260 
261  /* Nothing to update */
262  if (fsgs->fsg == NULL)
263  return;
264 
265  /* Update the number of words (not used by this module though). */
266  search->n_words = dict_size(dict);
267 
268  /* Allocate new lextree for the given FSG */
269  fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p,
270  ps_search_acmod(fsgs)->mdef,
271  fsgs->hmmctx, fsgs->wip, fsgs->pip);
272 
273  /* Inform the history module of the new fsg */
274  fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict);
275 
276  return 0;
277 }
278 
279 
280 static int
281 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
282 {
283  dict_t *dict;
284  int32 wid;
285  int n_sil;
286 
287  dict = ps_search_dict(fsgs);
288  /*
289  * NOTE: Unlike N-Gram search, we do not use explicit start and
290  * end symbols. This is because the start and end nodes are
291  * defined in the grammar. We do add silence/filler self-loops to
292  * all states in order to allow for silence between words and at
293  * the beginning and end of utterances.
294  *
295  * This has some implications for word graph generation, namely,
296  * that there can (and usually will) be multiple start and end
297  * states in the word graph. We therefore do add explicit start
298  * and end nodes to the graph.
299  */
300  /* Add silence self-loops to all states. */
301  fsg_model_add_silence(fsg, "<sil>", -1,
302  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
303  n_sil = 0;
304  /* Add self-loops for all other fillers. */
305  for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
306  char const *word = dict_wordstr(dict, wid);
307  if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
308  continue;
309  fsg_model_add_silence(fsg, word, -1,
310  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
311  ++n_sil;
312  }
313 
314  return n_sil;
315 }
316 
317 /* Scans the dictionary and check if all words are present. */
318 static int
319 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
320 {
321  dict_t *dict;
322  int i;
323 
324  dict = ps_search_dict(fsgs);
325  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
326  char const *word;
327  int32 wid;
328 
329  word = fsg_model_word_str(fsg, i);
330  wid = dict_wordid(dict, word);
331  if (wid == BAD_S3WID) {
332  E_ERROR("The word '%s' is missing in the dictionary\n", word);
333  return FALSE;
334  }
335  }
336 
337  return TRUE;
338 }
339 
340 static int
341 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
342 {
343  dict_t *dict;
344  int n_alt, n_word;
345  int i;
346 
347  dict = ps_search_dict(fsgs);
348  /* Scan FSG's vocabulary for words that have alternate pronunciations. */
349  n_alt = 0;
350  n_word = fsg_model_n_word(fsg);
351  for (i = 0; i < n_word; ++i) {
352  char const *word;
353  int32 wid;
354 
355  word = fsg_model_word_str(fsg, i);
356  wid = dict_wordid(dict, word);
357  if (wid != BAD_S3WID) {
358  while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) {
359  n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
360  }
361  }
362  }
363 
364  E_INFO("Added %d alternate word transitions\n", n_alt);
365  return n_alt;
366 }
367 
368 fsg_model_t *
369 fsg_set_get_fsg(fsg_search_t *fsgs, const char *name)
370 {
371  void *val;
372 
373  if (hash_table_lookup(fsgs->fsgs, name, &val) < 0)
374  return NULL;
375  return (fsg_model_t *)val;
376 }
377 
378 fsg_model_t *
379 fsg_set_add(fsg_search_t *fsgs, char const *name, fsg_model_t *fsg)
380 {
381  if (name == NULL)
382  name = fsg_model_name(fsg);
383 
384  if (!fsg_search_check_dict(fsgs, fsg))
385  return NULL;
386 
387  /* Add silence transitions and alternate words. */
388  if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusefiller")
389  && !fsg_model_has_sil(fsg))
390  fsg_search_add_silences(fsgs, fsg);
391  if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusealtpron")
392  && !fsg_model_has_alt(fsg))
393  fsg_search_add_altpron(fsgs, fsg);
394 
395  return (fsg_model_t *)hash_table_enter(fsgs->fsgs, name, fsg);
396 }
397 
398 
399 fsg_model_t *
400 fsg_set_remove_byname(fsg_search_t *fsgs, char const *key)
401 {
402  fsg_model_t *oldfsg;
403  void *val;
404 
405  /* Look for the matching FSG. */
406  if (hash_table_lookup(fsgs->fsgs, key, &val) < 0) {
407  E_ERROR("FSG `%s' to be deleted not found\n", key);
408  return NULL;
409  }
410  oldfsg = val;
411 
412  /* Remove it from the FSG table. */
413  hash_table_delete(fsgs->fsgs, key);
414  /* If this was the currently active FSG, also delete other stuff */
415  if (fsgs->fsg == oldfsg) {
416  fsg_lextree_free(fsgs->lextree);
417  fsgs->lextree = NULL;
418  fsg_history_set_fsg(fsgs->history, NULL, NULL);
419  fsgs->fsg = NULL;
420  }
421  return oldfsg;
422 }
423 
424 
425 fsg_model_t *
426 fsg_set_remove(fsg_search_t *fsgs, fsg_model_t *fsg)
427 {
428  char const *key;
429  hash_iter_t *itor;
430 
431  key = NULL;
432  for (itor = hash_table_iter(fsgs->fsgs);
433  itor; itor = hash_table_iter_next(itor)) {
434  fsg_model_t *oldfsg;
435 
436  oldfsg = (fsg_model_t *) hash_entry_val(itor->ent);
437  if (oldfsg == fsg) {
438  key = hash_entry_key(itor->ent);
439  hash_table_iter_free(itor);
440  break;
441  }
442  }
443  if (key == NULL) {
444  E_WARN("FSG '%s' to be deleted not found\n", fsg_model_name(fsg));
445  return NULL;
446  }
447  else
448  return fsg_set_remove_byname(fsgs, key);
449 }
450 
451 
452 fsg_model_t *
453 fsg_set_select(fsg_search_t *fsgs, const char *name)
454 {
455  fsg_model_t *fsg;
456 
457  fsg = fsg_set_get_fsg(fsgs, name);
458  if (fsg == NULL) {
459  E_ERROR("FSG '%s' not known; cannot make it current\n", name);
460  return NULL;
461  }
462  fsgs->fsg = fsg;
463  return fsg;
464 }
465 
468 {
469  return hash_table_iter(fsgs->fsgs);
470 }
471 
474 {
475  return hash_table_iter_next(itor);
476 }
477 
478 fsg_model_t *
480 {
481  return ((fsg_model_t *)itor->ent->val);
482 }
483 
484 void
486 {
487  hash_table_iter_free(itor);
488 }
489 
490 static void
491 fsg_search_sen_active(fsg_search_t *fsgs)
492 {
493  gnode_t *gn;
494  fsg_pnode_t *pnode;
495  hmm_t *hmm;
496 
497  acmod_clear_active(ps_search_acmod(fsgs));
498 
499  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
500  pnode = (fsg_pnode_t *) gnode_ptr(gn);
501  hmm = fsg_pnode_hmmptr(pnode);
502  assert(hmm_frame(hmm) == fsgs->frame);
503  acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
504  }
505 }
506 
507 
508 /*
509  * Evaluate all the active HMMs.
510  * (Executed once per frame.)
511  */
512 static void
513 fsg_search_hmm_eval(fsg_search_t *fsgs)
514 {
515  gnode_t *gn;
516  fsg_pnode_t *pnode;
517  hmm_t *hmm;
518  int32 bestscore;
519  int32 n, maxhmmpf;
520 
521  bestscore = WORST_SCORE;
522 
523  if (!fsgs->pnode_active) {
524  E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
525  return;
526  }
527 
528  for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
529  int32 score;
530 
531  pnode = (fsg_pnode_t *) gnode_ptr(gn);
532  hmm = fsg_pnode_hmmptr(pnode);
533  assert(hmm_frame(hmm) == fsgs->frame);
534 
535 #if __FSG_DBG__
536  E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
537  fsgs->frame);
538  hmm_dump(hmm, stdout);
539 #endif
540  score = hmm_vit_eval(hmm);
541 #if __FSG_DBG_CHAN__
542  E_INFO("pnode(%08x) after eval @frm %5d\n",
543  (int32) pnode, fsgs->frame);
544  hmm_dump(hmm, stdout);
545 #endif
546 
547  if (score BETTER_THAN bestscore)
548  bestscore = score;
549  }
550 
551 #if __FSG_DBG__
552  E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
553 #endif
554  fsgs->n_hmm_eval += n;
555 
556  /* Adjust beams if #active HMMs larger than absolute threshold */
557  maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
558  if (maxhmmpf != -1 && n > maxhmmpf) {
559  /*
560  * Too many HMMs active; reduce the beam factor applied to the default
561  * beams, but not if the factor is already at a floor (0.1).
562  */
563  if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */
564  fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */
565  fsgs->beam =
566  (int32) (fsgs->beam_orig * fsgs->beam_factor);
567  fsgs->pbeam =
568  (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
569  fsgs->wbeam =
570  (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
571  }
572  }
573  else {
574  fsgs->beam_factor = 1.0f;
575  fsgs->beam = fsgs->beam_orig;
576  fsgs->pbeam = fsgs->pbeam_orig;
577  fsgs->wbeam = fsgs->wbeam_orig;
578  }
579 
580  if (n > fsg_lextree_n_pnode(fsgs->lextree))
581  E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
582  fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
583 
584  fsgs->bestscore = bestscore;
585 }
586 
587 
588 static void
589 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
590 {
591  fsg_pnode_t *child;
592  hmm_t *hmm;
593  int32 newscore, thresh, nf;
594 
595  assert(pnode);
596  assert(!fsg_pnode_leaf(pnode));
597 
598  nf = fsgs->frame + 1;
599  thresh = fsgs->bestscore + fsgs->beam;
600 
601  hmm = fsg_pnode_hmmptr(pnode);
602 
603  for (child = fsg_pnode_succ(pnode);
604  child; child = fsg_pnode_sibling(child)) {
605  newscore = hmm_out_score(hmm) + child->logs2prob;
606 
607  if ((newscore BETTER_THAN thresh)
608  && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
609  /* Incoming score > pruning threshold and > target's existing score */
610  if (hmm_frame(&child->hmm) < nf) {
611  /* Child node not yet activated; do so */
612  fsgs->pnode_active_next =
613  glist_add_ptr(fsgs->pnode_active_next,
614  (void *) child);
615  }
616 
617  hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
618  }
619  }
620 }
621 
622 
623 static void
624 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
625 {
626  hmm_t *hmm;
627  fsg_link_t *fl;
628  int32 wid;
629  fsg_pnode_ctxt_t ctxt;
630 
631  assert(pnode);
632  assert(fsg_pnode_leaf(pnode));
633 
634  hmm = fsg_pnode_hmmptr(pnode);
635  fl = fsg_pnode_fsglink(pnode);
636  assert(fl);
637 
638  wid = fsg_link_wid(fl);
639  assert(wid >= 0);
640 
641 #if __FSG_DBG__
642  E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
643  fsgs->frame, (int32) pnode,
644  hmm_out_score(hmm), hmm_out_history(hmm));
645 #endif
646 
647  /*
648  * Check if this is filler or single phone word; these do not model right
649  * context (i.e., the exit score applies to all right contexts).
650  */
651  if (fsg_model_is_filler(fsgs->fsg, wid)
652  /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
653  || (dict_is_single_phone(ps_search_dict(fsgs),
654  dict_wordid(ps_search_dict(fsgs),
655  fsg_model_word_str(fsgs->fsg, wid))))) {
656  /* Create a dummy context structure that applies to all right contexts */
657  fsg_pnode_add_all_ctxt(&ctxt);
658 
659  /* Create history table entry for this word exit */
660  fsg_history_entry_add(fsgs->history,
661  fl,
662  fsgs->frame,
663  hmm_out_score(hmm),
664  hmm_out_history(hmm),
665  pnode->ci_ext, ctxt);
666 
667  }
668  else {
669  /* Create history table entry for this word exit */
670  fsg_history_entry_add(fsgs->history,
671  fl,
672  fsgs->frame,
673  hmm_out_score(hmm),
674  hmm_out_history(hmm),
675  pnode->ci_ext, pnode->ctxt);
676  }
677 }
678 
679 
680 /*
681  * (Beam) prune the just evaluated HMMs, determine which ones remain
682  * active, which ones transition to successors, which ones exit and
683  * terminate in their respective destination FSM states.
684  * (Executed once per frame.)
685  */
686 static void
687 fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
688 {
689  gnode_t *gn;
690  fsg_pnode_t *pnode;
691  hmm_t *hmm;
692  int32 thresh, word_thresh, phone_thresh;
693 
694  assert(fsgs->pnode_active_next == NULL);
695 
696  thresh = fsgs->bestscore + fsgs->beam;
697  phone_thresh = fsgs->bestscore + fsgs->pbeam;
698  word_thresh = fsgs->bestscore + fsgs->wbeam;
699 
700  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
701  pnode = (fsg_pnode_t *) gnode_ptr(gn);
702  hmm = fsg_pnode_hmmptr(pnode);
703 
704  if (hmm_bestscore(hmm) >= thresh) {
705  /* Keep this HMM active in the next frame */
706  if (hmm_frame(hmm) == fsgs->frame) {
707  hmm_frame(hmm) = fsgs->frame + 1;
708  fsgs->pnode_active_next =
709  glist_add_ptr(fsgs->pnode_active_next,
710  (void *) pnode);
711  }
712  else {
713  assert(hmm_frame(hmm) == fsgs->frame + 1);
714  }
715 
716  if (!fsg_pnode_leaf(pnode)) {
717  if (hmm_out_score(hmm) >= phone_thresh) {
718  /* Transition out of this phone into its children */
719  fsg_search_pnode_trans(fsgs, pnode);
720  }
721  }
722  else {
723  if (hmm_out_score(hmm) >= word_thresh) {
724  /* Transition out of leaf node into destination FSG state */
725  fsg_search_pnode_exit(fsgs, pnode);
726  }
727  }
728  }
729  }
730 }
731 
732 
733 /*
734  * Propagate newly created history entries through null transitions.
735  */
736 static void
737 fsg_search_null_prop(fsg_search_t *fsgs)
738 {
739  int32 bpidx, n_entries, thresh, newscore;
740  fsg_hist_entry_t *hist_entry;
741  fsg_link_t *l;
742  int32 s;
743  fsg_model_t *fsg;
744 
745  fsg = fsgs->fsg;
746  thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
747 
748  n_entries = fsg_history_n_entries(fsgs->history);
749 
750  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
751  fsg_arciter_t *itor;
752  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
753 
754  l = fsg_hist_entry_fsglink(hist_entry);
755 
756  /* Destination FSG state for history entry */
757  s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
758 
759  /*
760  * Check null transitions from d to all other states. (Only need to
761  * propagate one step, since FSG contains transitive closure of null
762  * transitions.)
763  */
764  /* Add all links from from_state to dst */
765  for (itor = fsg_model_arcs(fsg, s); itor;
766  itor = fsg_arciter_next(itor)) {
767  fsg_link_t *l = fsg_arciter_get(itor);
768 
769  /* FIXME: Need to deal with tag transitions somehow. */
770  if (fsg_link_wid(l) != -1)
771  continue;
772  newscore =
773  fsg_hist_entry_score(hist_entry) +
774  (fsg_link_logs2prob(l) >> SENSCR_SHIFT);
775 
776  if (newscore >= thresh) {
777  fsg_history_entry_add(fsgs->history, l,
778  fsg_hist_entry_frame(hist_entry),
779  newscore,
780  bpidx,
781  fsg_hist_entry_lc(hist_entry),
782  fsg_hist_entry_rc(hist_entry));
783  }
784  }
785  }
786 }
787 
788 
789 /*
790  * Perform cross-word transitions; propagate each history entry created in this
791  * frame to lextree roots attached to the target FSG state for that entry.
792  */
793 static void
794 fsg_search_word_trans(fsg_search_t *fsgs)
795 {
796  int32 bpidx, n_entries;
797  fsg_hist_entry_t *hist_entry;
798  fsg_link_t *l;
799  int32 score, newscore, thresh, nf, d;
800  fsg_pnode_t *root;
801  int32 lc, rc;
802 
803  n_entries = fsg_history_n_entries(fsgs->history);
804 
805  thresh = fsgs->bestscore + fsgs->beam;
806  nf = fsgs->frame + 1;
807 
808  for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
809  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
810  assert(hist_entry);
811  score = fsg_hist_entry_score(hist_entry);
812  assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
813 
814  l = fsg_hist_entry_fsglink(hist_entry);
815 
816  /* Destination state for hist_entry */
817  d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
818  fsg);
819 
820  lc = fsg_hist_entry_lc(hist_entry);
821 
822  /* Transition to all root nodes attached to state d */
823  for (root = fsg_lextree_root(fsgs->lextree, d);
824  root; root = root->sibling) {
825  rc = root->ci_ext;
826 
827  if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
828  (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
829  /*
830  * Last CIphone of history entry is in left-context list supported by
831  * target root node, and
832  * first CIphone of target root node is in right context list supported
833  * by history entry;
834  * So the transition can go ahead (if new score is good enough).
835  */
836  newscore = score + root->logs2prob;
837 
838  if ((newscore BETTER_THAN thresh)
839  && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
840  if (hmm_frame(&root->hmm) < nf) {
841  /* Newly activated node; add to active list */
842  fsgs->pnode_active_next =
843  glist_add_ptr(fsgs->pnode_active_next,
844  (void *) root);
845 #if __FSG_DBG__
846  E_INFO
847  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
848  fsgs->frame, bpidx, (int32) root);
849 #endif
850  }
851  else {
852 #if __FSG_DBG__
853  E_INFO
854  ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
855  fsgs->frame, bpidx, (int32) root);
856 #endif
857  }
858 
859  hmm_enter(&root->hmm, newscore, bpidx, nf);
860  }
861  }
862  }
863  }
864 }
865 
866 
867 int
868 fsg_search_step(ps_search_t *search, int frame_idx)
869 {
870  fsg_search_t *fsgs = (fsg_search_t *)search;
871  int16 const *senscr;
872  acmod_t *acmod = search->acmod;
873  gnode_t *gn;
874  fsg_pnode_t *pnode;
875  hmm_t *hmm;
876 
877  /* Activate our HMMs for the current frame if need be. */
878  if (!acmod->compallsen)
879  fsg_search_sen_active(fsgs);
880  /* Compute GMM scores for the current frame. */
881  senscr = acmod_score(acmod, &frame_idx);
882  fsgs->n_sen_eval += acmod->n_senone_active;
883  hmm_context_set_senscore(fsgs->hmmctx, senscr);
884 
885  /* Mark backpointer table for current frame. */
886  fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
887 
888  /* Evaluate all active pnodes (HMMs) */
889  fsg_search_hmm_eval(fsgs);
890 
891  /*
892  * Prune and propagate the HMMs evaluated; create history entries for
893  * word exits. The words exits are tentative, and may be pruned; make
894  * the survivors permanent via fsg_history_end_frame().
895  */
896  fsg_search_hmm_prune_prop(fsgs);
897  fsg_history_end_frame(fsgs->history);
898 
899  /*
900  * Propagate new history entries through any null transitions, creating
901  * new history entries, and then make the survivors permanent.
902  */
903  fsg_search_null_prop(fsgs);
904  fsg_history_end_frame(fsgs->history);
905 
906  /*
907  * Perform cross-word transitions; propagate each history entry across its
908  * terminating state to the root nodes of the lextree attached to the state.
909  */
910  fsg_search_word_trans(fsgs);
911 
912  /*
913  * We've now come full circle, HMM and FSG states have been updated for
914  * the next frame.
915  * Update the active lists, deactivate any currently active HMMs that
916  * did not survive into the next frame
917  */
918  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
919  pnode = (fsg_pnode_t *) gnode_ptr(gn);
920  hmm = fsg_pnode_hmmptr(pnode);
921 
922  if (hmm_frame(hmm) == fsgs->frame) {
923  /* This HMM NOT activated for the next frame; reset it */
925  }
926  else {
927  assert(hmm_frame(hmm) == (fsgs->frame + 1));
928  }
929  }
930 
931  /* Free the currently active list */
932  glist_free(fsgs->pnode_active);
933 
934  /* Make the next-frame active list the current one */
935  fsgs->pnode_active = fsgs->pnode_active_next;
936  fsgs->pnode_active_next = NULL;
937 
938  /* End of this frame; ready for the next */
939  ++fsgs->frame;
940 
941  return 1;
942 }
943 
944 
945 /*
946  * Set all HMMs to inactive, clear active lists, initialize FSM start
947  * state to be the only active node.
948  * (Executed at the start of each utterance.)
949  */
950 int
951 fsg_search_start(ps_search_t *search)
952 {
953  fsg_search_t *fsgs = (fsg_search_t *)search;
954  int32 silcipid;
955  fsg_pnode_ctxt_t ctxt;
956 
957  /* Reset dynamic adjustment factor for beams */
958  fsgs->beam_factor = 1.0f;
959  fsgs->beam = fsgs->beam_orig;
960  fsgs->pbeam = fsgs->pbeam_orig;
961  fsgs->wbeam = fsgs->wbeam_orig;
962 
963  silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
964 
965  /* Initialize EVERYTHING to be inactive */
966  assert(fsgs->pnode_active == NULL);
967  assert(fsgs->pnode_active_next == NULL);
968 
969  fsg_history_reset(fsgs->history);
970  fsg_history_utt_start(fsgs->history);
971  fsgs->final = FALSE;
972 
973  /* Dummy context structure that allows all right contexts to use this entry */
974  fsg_pnode_add_all_ctxt(&ctxt);
975 
976  /* Create dummy history entry leading to start state */
977  fsgs->frame = -1;
978  fsgs->bestscore = 0;
979  fsg_history_entry_add(fsgs->history,
980  NULL, -1, 0, -1, silcipid, ctxt);
981  fsgs->bpidx_start = 0;
982 
983  /* Propagate dummy history entry through NULL transitions from start state */
984  fsg_search_null_prop(fsgs);
985 
986  /* Perform word transitions from this dummy history entry */
987  fsg_search_word_trans(fsgs);
988 
989  /* Make the next-frame active list the current one */
990  fsgs->pnode_active = fsgs->pnode_active_next;
991  fsgs->pnode_active_next = NULL;
992 
993  ++fsgs->frame;
994 
995  fsgs->n_hmm_eval = 0;
996  fsgs->n_sen_eval = 0;
997 
998  return 0;
999 }
1000 
1001 /*
1002  * Cleanup at the end of each utterance.
1003  */
1004 int
1005 fsg_search_finish(ps_search_t *search)
1006 {
1007  fsg_search_t *fsgs = (fsg_search_t *)search;
1008  gnode_t *gn;
1009  fsg_pnode_t *pnode;
1010  int32 n_hist;
1011 
1012  /* Deactivate all nodes in the current and next-frame active lists */
1013  for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
1014  pnode = (fsg_pnode_t *) gnode_ptr(gn);
1016  }
1017  for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
1018  pnode = (fsg_pnode_t *) gnode_ptr(gn);
1020  }
1021 
1022  glist_free(fsgs->pnode_active);
1023  fsgs->pnode_active = NULL;
1024  glist_free(fsgs->pnode_active_next);
1025  fsgs->pnode_active_next = NULL;
1026 
1027  fsgs->final = TRUE;
1028 
1029  n_hist = fsg_history_n_entries(fsgs->history);
1030  E_INFO
1031  ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
1032  fsgs->frame, fsgs->n_hmm_eval,
1033  (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
1034  fsgs->n_sen_eval,
1035  (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
1036  n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
1037 
1038  return 0;
1039 }
1040 
1041 static int
1042 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score)
1043 {
1044  fsg_hist_entry_t *hist_entry;
1045  fsg_model_t *fsg;
1046  int bpidx, frm, last_frm, besthist;
1047  int32 bestscore;
1048 
1049  if (frame_idx == -1)
1050  frame_idx = fsgs->frame - 1;
1051  last_frm = frm = frame_idx;
1052 
1053  /* Scan backwards to find a word exit in frame_idx. */
1054  bpidx = fsg_history_n_entries(fsgs->history) - 1;
1055  while (bpidx > 0) {
1056  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
1057  if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
1058  frm = last_frm = fsg_hist_entry_frame(hist_entry);
1059  break;
1060  }
1061  }
1062 
1063  /* No hypothesis (yet). */
1064  if (bpidx <= 0)
1065  return bpidx;
1066 
1067  /* Now find best word exit in this frame. */
1068  bestscore = INT_MIN;
1069  besthist = -1;
1070  fsg = fsgs->fsg;
1071  while (frm == last_frm) {
1072  fsg_link_t *fl;
1073  int32 score;
1074 
1075  fl = fsg_hist_entry_fsglink(hist_entry);
1076  score = fsg_hist_entry_score(hist_entry);
1077 
1078  if (fl == NULL)
1079  break;
1080 
1081  if (score BETTER_THAN bestscore) {
1082  /* Only enforce the final state constraint if this is a final hypothesis. */
1083  if ((!final)
1084  || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
1085  bestscore = score;
1086  besthist = bpidx;
1087  }
1088  }
1089 
1090  --bpidx;
1091  if (bpidx < 0)
1092  break;
1093  hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
1094  frm = fsg_hist_entry_frame(hist_entry);
1095  }
1096 
1097  /* Final state not reached. */
1098  if (besthist == -1) {
1099  E_ERROR("Final state not reached in frame %d\n", frame_idx);
1100  return -1;
1101  }
1102 
1103  /* This here's the one we want. */
1104  if (out_score)
1105  *out_score = bestscore;
1106  return besthist;
1107 }
1108 
1109 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */
1110 static ps_latlink_t *
1111 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
1112 {
1113  fsg_search_t *fsgs = (fsg_search_t *)search;
1114 
1115  if (search->last_link == NULL) {
1116  search->last_link = ps_lattice_bestpath(search->dag, NULL,
1117  1.0, fsgs->ascale);
1118  if (search->last_link == NULL)
1119  return NULL;
1120  /* Also calculate betas so we can fill in the posterior
1121  * probability field in the segmentation. */
1122  if (search->post == 0)
1123  search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
1124  }
1125  if (out_score)
1126  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
1127  return search->last_link;
1128 }
1129 
1130 char const *
1131 fsg_search_hyp(ps_search_t *search, int32 *out_score)
1132 {
1133  fsg_search_t *fsgs = (fsg_search_t *)search;
1134  dict_t *dict = ps_search_dict(search);
1135  char *c;
1136  size_t len;
1137  int bp, bpidx;
1138 
1139  /* Get last backpointer table index. */
1140  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
1141  /* No hypothesis (yet). */
1142  if (bpidx <= 0)
1143  return NULL;
1144 
1145  /* If bestpath is enabled and the utterance is complete, then run it. */
1146  if (fsgs->bestpath && fsgs->final) {
1147  ps_lattice_t *dag;
1148  ps_latlink_t *link;
1149 
1150  if ((dag = fsg_search_lattice(search)) == NULL) {
1151  E_WARN("Failed to obtain the lattice while bestpath enabled\n");
1152  return NULL;
1153  }
1154  if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) {
1155  E_WARN("Failed to bestpath in a lattice\n");
1156  return NULL;
1157  }
1158  return ps_lattice_hyp(dag, link);
1159  }
1160 
1161  bp = bpidx;
1162  len = 0;
1163  while (bp > 0) {
1164  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1165  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1166  char const *baseword;
1167  int32 wid;
1168 
1169  bp = fsg_hist_entry_pred(hist_entry);
1170  wid = fsg_link_wid(fl);
1171  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1172  continue;
1173  baseword = dict_basestr(dict,
1174  dict_wordid(dict,
1175  fsg_model_word_str(fsgs->fsg, wid)));
1176  len += strlen(baseword) + 1;
1177  }
1178 
1179  ckd_free(search->hyp_str);
1180  if (len == 0) {
1181  search->hyp_str = NULL;
1182  return search->hyp_str;
1183  }
1184  search->hyp_str = ckd_calloc(1, len);
1185 
1186  bp = bpidx;
1187  c = search->hyp_str + len - 1;
1188  while (bp > 0) {
1189  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1190  fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1191  char const *baseword;
1192  int32 wid;
1193 
1194  bp = fsg_hist_entry_pred(hist_entry);
1195  wid = fsg_link_wid(fl);
1196  if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
1197  continue;
1198  baseword = dict_basestr(dict,
1199  dict_wordid(dict,
1200  fsg_model_word_str(fsgs->fsg, wid)));
1201  len = strlen(baseword);
1202  c -= len;
1203  memcpy(c, baseword, len);
1204  if (c > search->hyp_str) {
1205  --c;
1206  *c = ' ';
1207  }
1208  }
1209 
1210  return search->hyp_str;
1211 }
1212 
1213 static void
1214 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
1215 {
1216  fsg_search_t *fsgs = (fsg_search_t *)seg->search;
1217  fsg_hist_entry_t *ph = NULL;
1218  int32 bp;
1219 
1220  if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
1221  ph = fsg_history_entry_get(fsgs->history, bp);
1222  seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
1223  seg->ef = fsg_hist_entry_frame(hist_entry);
1224  seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
1225  /* This is kind of silly but it happens for null transitions. */
1226  if (seg->sf > seg->ef) seg->sf = seg->ef;
1227  seg->prob = 0; /* Bogus value... */
1228  /* "Language model" score = transition probability. */
1229  seg->lback = 1;
1230  seg->lscr = hist_entry->fsglink->logs2prob;
1231  if (ph) {
1232  /* FIXME: Not sure exactly how cross-word triphones are handled. */
1233  seg->ascr = hist_entry->score - ph->score - seg->lscr;
1234  }
1235  else
1236  seg->ascr = hist_entry->score - seg->lscr;
1237 }
1238 
1239 
1240 static void
1241 fsg_seg_free(ps_seg_t *seg)
1242 {
1243  fsg_seg_t *itor = (fsg_seg_t *)seg;
1244  ckd_free(itor->hist);
1245  ckd_free(itor);
1246 }
1247 
1248 static ps_seg_t *
1249 fsg_seg_next(ps_seg_t *seg)
1250 {
1251  fsg_seg_t *itor = (fsg_seg_t *)seg;
1252 
1253  if (++itor->cur == itor->n_hist) {
1254  fsg_seg_free(seg);
1255  return NULL;
1256  }
1257 
1258  fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
1259  return seg;
1260 }
1261 
1262 static ps_segfuncs_t fsg_segfuncs = {
1263  /* seg_next */ fsg_seg_next,
1264  /* seg_free */ fsg_seg_free
1265 };
1266 
1267 static ps_seg_t *
1268 fsg_search_seg_iter(ps_search_t *search, int32 *out_score)
1269 {
1270  fsg_search_t *fsgs = (fsg_search_t *)search;
1271  fsg_seg_t *itor;
1272  int bp, bpidx, cur;
1273 
1274  bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
1275  /* No hypothesis (yet). */
1276  if (bpidx <= 0)
1277  return NULL;
1278 
1279  /* If bestpath is enabled and the utterance is complete, then run it. */
1280  if (fsgs->bestpath && fsgs->final) {
1281  ps_lattice_t *dag;
1282  ps_latlink_t *link;
1283 
1284  if ((dag = fsg_search_lattice(search)) == NULL)
1285  return NULL;
1286  if ((link = fsg_search_bestpath(search, out_score, TRUE)) == NULL)
1287  return NULL;
1288  return ps_lattice_seg_iter(dag, link, 1.0);
1289  }
1290 
1291  /* Calling this an "iterator" is a bit of a misnomer since we have
1292  * to get the entire backtrace in order to produce it. On the
1293  * other hand, all we actually need is the bptbl IDs, and we can
1294  * allocate a fixed-size array of them. */
1295  itor = ckd_calloc(1, sizeof(*itor));
1296  itor->base.vt = &fsg_segfuncs;
1297  itor->base.search = search;
1298  itor->base.lwf = 1.0;
1299  itor->n_hist = 0;
1300  bp = bpidx;
1301  while (bp > 0) {
1302  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1303  bp = fsg_hist_entry_pred(hist_entry);
1304  ++itor->n_hist;
1305  }
1306  if (itor->n_hist == 0) {
1307  ckd_free(itor);
1308  return NULL;
1309  }
1310  itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
1311  cur = itor->n_hist - 1;
1312  bp = bpidx;
1313  while (bp > 0) {
1314  fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
1315  itor->hist[cur] = hist_entry;
1316  bp = fsg_hist_entry_pred(hist_entry);
1317  --cur;
1318  }
1319 
1320  /* Fill in relevant fields for first element. */
1321  fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
1322 
1323  return (ps_seg_t *)itor;
1324 }
1325 
1326 static int
1327 fsg_search_prob(ps_search_t *search)
1328 {
1329  fsg_search_t *fsgs = (fsg_search_t *)search;
1330 
1331  /* If bestpath is enabled and the utterance is complete, then run it. */
1332  if (fsgs->bestpath && fsgs->final) {
1333  ps_lattice_t *dag;
1334  ps_latlink_t *link;
1335 
1336  if ((dag = fsg_search_lattice(search)) == NULL)
1337  return 0;
1338  if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
1339  return 0;
1340  return search->post;
1341  }
1342  else {
1343  /* FIXME: Give some kind of good estimate here, eventually. */
1344  return 0;
1345  }
1346 }
1347 
1348 static ps_latnode_t *
1349 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 ascr)
1350 {
1351  ps_latnode_t *node;
1352 
1353  for (node = dag->nodes; node; node = node->next)
1354  if (node->sf == sf && node->wid == wid)
1355  break;
1356 
1357  if (node) {
1358  /* Update end frames. */
1359  if (node->lef == -1 || node->lef < ef)
1360  node->lef = ef;
1361  if (node->fef == -1 || node->fef > ef)
1362  node->fef = ef;
1363  /* Update best link score. */
1364  if (ascr BETTER_THAN node->info.best_exit)
1365  node->info.best_exit = ascr;
1366  }
1367  else {
1368  /* New node; link to head of list */
1369  node = listelem_malloc(dag->latnode_alloc);
1370  node->wid = wid;
1371  node->sf = sf;
1372  node->fef = node->lef = ef;
1373  node->reachable = FALSE;
1374  node->entries = NULL;
1375  node->exits = NULL;
1376  node->info.best_exit = ascr;
1377 
1378  node->next = dag->nodes;
1379  dag->nodes = node;
1380  ++dag->n_nodes;
1381  }
1382 
1383  return node;
1384 }
1385 
1386 static ps_latnode_t *
1387 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid)
1388 {
1389  ps_latnode_t *node;
1390 
1391  for (node = dag->nodes; node; node = node->next)
1392  if (node->sf == sf && node->wid == wid)
1393  break;
1394  return node;
1395 }
1396 
1397 static ps_latnode_t *
1398 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1399 {
1400  ps_latnode_t *node;
1401  glist_t start = NULL;
1402  int nstart = 0;
1403 
1404  /* Look for all nodes starting in frame zero with some exits. */
1405  for (node = dag->nodes; node; node = node->next) {
1406  if (node->sf == 0 && node->exits) {
1407  E_INFO("Start node %s.%d:%d:%d\n",
1408  fsg_model_word_str(fsgs->fsg, node->wid),
1409  node->sf, node->fef, node->lef);
1410  start = glist_add_ptr(start, node);
1411  ++nstart;
1412  }
1413  }
1414 
1415  /* If there was more than one start node candidate, then we need
1416  * to create an artificial start node with epsilon transitions to
1417  * all of them. */
1418  if (nstart == 1) {
1419  node = gnode_ptr(start);
1420  }
1421  else {
1422  gnode_t *st;
1423  int wid;
1424 
1425  wid = fsg_model_word_add(fsgs->fsg, "<s>");
1426  if (fsgs->fsg->silwords)
1427  bitvec_set(fsgs->fsg->silwords, wid);
1428  node = new_node(dag, fsgs->fsg, 0, 0, wid, 0);
1429  for (st = start; st; st = gnode_next(st))
1430  ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
1431  }
1432  glist_free(start);
1433  return node;
1434 }
1435 
1436 static ps_latnode_t *
1437 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
1438 {
1439  ps_latnode_t *node;
1440  glist_t end = NULL;
1441  int nend = 0;
1442 
1443  /* Look for all nodes ending in last frame with some entries. */
1444  for (node = dag->nodes; node; node = node->next) {
1445  if (node->lef == dag->n_frames - 1 && node->entries) {
1446  E_INFO("End node %s.%d:%d:%d (%d)\n",
1447  fsg_model_word_str(fsgs->fsg, node->wid),
1448  node->sf, node->fef, node->lef, node->info.best_exit);
1449  end = glist_add_ptr(end, node);
1450  ++nend;
1451  }
1452  }
1453 
1454  if (nend == 1) {
1455  node = gnode_ptr(end);
1456  }
1457  else if (nend == 0) {
1458  ps_latnode_t *last = NULL;
1459  int ef = 0;
1460 
1461  /* If there were no end node candidates, then just use the
1462  * node with the last exit frame. */
1463  for (node = dag->nodes; node; node = node->next) {
1464  if (node->lef > ef && node->entries) {
1465  last = node;
1466  ef = node->lef;
1467  }
1468  }
1469  node = last;
1470  if (node)
1471  E_INFO("End node %s.%d:%d:%d (%d)\n",
1472  fsg_model_word_str(fsgs->fsg, node->wid),
1473  node->sf, node->fef, node->lef, node->info.best_exit);
1474  }
1475  else {
1476  /* If there was more than one end node candidate, then we need
1477  * to create an artificial end node with epsilon transitions
1478  * out of all of them. */
1479  gnode_t *st;
1480  int wid;
1481 
1482  wid = fsg_model_word_add(fsgs->fsg, "</s>");
1483  if (fsgs->fsg->silwords)
1484  bitvec_set(fsgs->fsg->silwords, wid);
1485  node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, 0);
1486  /* Use the "best" (in reality it will be the only) exit link
1487  * score from this final node as the link score. */
1488  for (st = end; st; st = gnode_next(st)) {
1489  ps_latnode_t *src = gnode_ptr(st);
1490  ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
1491  }
1492  }
1493  glist_free(end);
1494  return node;
1495 }
1496 
1497 static void
1498 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
1499 {
1500  glist_t q;
1501 
1502  /* It doesn't matter which order we do this in. */
1503  end->reachable = TRUE;
1504  q = glist_add_ptr(NULL, end);
1505  while (q) {
1506  ps_latnode_t *node = gnode_ptr(q);
1507  latlink_list_t *x;
1508 
1509  /* Pop the front of the list. */
1510  q = gnode_free(q, NULL);
1511  /* Expand all its predecessors that haven't been seen yet. */
1512  for (x = node->entries; x; x = x->next) {
1513  ps_latnode_t *next = x->link->from;
1514  if (!next->reachable) {
1515  next->reachable = TRUE;
1516  q = glist_add_ptr(q, next);
1517  }
1518  }
1519  }
1520 }
1521 
1530 static ps_lattice_t *
1531 fsg_search_lattice(ps_search_t *search)
1532 {
1533  fsg_search_t *fsgs;
1534  fsg_model_t *fsg;
1535  ps_latnode_t *node;
1536  ps_lattice_t *dag;
1537  int32 i, n;
1538 
1539  fsgs = (fsg_search_t *)search;
1540 
1541  /* Check to see if a lattice has previously been created over the
1542  * same number of frames, and reuse it if so. */
1543  if (search->dag && search->dag->n_frames == fsgs->frame)
1544  return search->dag;
1545 
1546  /* Nope, create a new one. */
1547  ps_lattice_free(search->dag);
1548  search->dag = NULL;
1549  dag = ps_lattice_init_search(search, fsgs->frame);
1550  fsg = fsgs->fsg;
1551 
1552  /*
1553  * Each history table entry represents a link in the word graph.
1554  * The set of nodes is determined by the number of unique
1555  * (word,start-frame) pairs in the history table. So we will
1556  * first find all those nodes.
1557  */
1558  n = fsg_history_n_entries(fsgs->history);
1559  for (i = 0; i < n; ++i) {
1560  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1561  ps_latnode_t *node;
1562  int32 ascr;
1563  int sf;
1564 
1565  /* Skip null transitions. */
1566  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1567  continue;
1568 
1569  /* Find the start node of this link. */
1570  if (fh->pred) {
1571  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1572  /* FIXME: We include the transition score in the lattice
1573  * link score. This is because of the practical
1574  * difficulty of obtaining it separately in bestpath or
1575  * forward-backward search, and because it is essentially
1576  * a unigram probability, so there is no need to treat it
1577  * separately from the acoustic score. However, it's not
1578  * clear that this will actually yield correct results.*/
1579  ascr = fh->score - pfh->score;
1580  sf = pfh->frame + 1;
1581  }
1582  else {
1583  ascr = fh->score;
1584  sf = 0;
1585  }
1586 
1587  /*
1588  * Note that although scores are tied to links rather than
1589  * nodes, it's possible that there are no links out of the
1590  * destination node, and thus we need to preserve its score in
1591  * case it turns out to be utterance-final.
1592  */
1593  node = new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, ascr);
1594  }
1595 
1596  /*
1597  * Now, we will create links only to nodes that actually exist.
1598  */
1599  n = fsg_history_n_entries(fsgs->history);
1600  for (i = 0; i < n; ++i) {
1601  fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
1602  fsg_arciter_t *itor;
1603  ps_latnode_t *src, *dest;
1604  int32 ascr;
1605  int sf;
1606 
1607  /* Skip null transitions. */
1608  if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1609  continue;
1610 
1611  /* Find the start node of this link and calculate its link score. */
1612  if (fh->pred) {
1613  fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
1614  sf = pfh->frame + 1;
1615  ascr = fh->score - pfh->score;
1616  }
1617  else {
1618  ascr = fh->score;
1619  sf = 0;
1620  }
1621  src = find_node(dag, fsg, sf, fh->fsglink->wid);
1622 
1623  sf = fh->frame + 1;
1624  for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink));
1625  itor; itor = fsg_arciter_next(itor)) {
1626  fsg_link_t *link = fsg_arciter_get(itor);
1627 
1628  /* FIXME: Need to figure out what to do about tag transitions. */
1629  if (link->wid >= 0) {
1630  /*
1631  * For each non-epsilon link following this one, look for a
1632  * matching node in the lattice and link to it.
1633  */
1634  if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
1635  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1636  }
1637  else {
1638  /*
1639  * Transitive closure on nulls has already been done, so we
1640  * just need to look one link forward from them.
1641  */
1642  fsg_arciter_t *itor2;
1643 
1644  /* Add all non-null links out of j. */
1645  for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link));
1646  itor2; itor2 = fsg_arciter_next(itor2)) {
1647  fsg_link_t *link = fsg_arciter_get(itor2);
1648  if (link->wid == -1)
1649  continue;
1650  if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
1651  ps_lattice_link(dag, src, dest, ascr, fh->frame);
1652  }
1653  }
1654  }
1655  }
1656 
1657  /* Figure out which nodes are the start and end nodes. */
1658  if ((dag->start = find_start_node(fsgs, dag)) == NULL)
1659  goto error_out;
1660  if ((dag->end = find_end_node(fsgs, dag)) == NULL)
1661  goto error_out;
1662  E_INFO("lattice start node %s.%d end node %s.%d\n",
1663  fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
1664  fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
1665  /* FIXME: Need to calculate final_node_ascr here. */
1666 
1667  /*
1668  * Convert word IDs from FSG to dictionary.
1669  */
1670  for (node = dag->nodes; node; node = node->next) {
1671  node->wid = dict_wordid(dag->search->dict,
1672  fsg_model_word_str(fsg, node->wid));
1673  node->basewid = dict_basewid(dag->search->dict, node->wid);
1674  }
1675 
1676  /*
1677  * Now we are done, because the links in the graph are uniquely
1678  * defined by the history table. However we should remove any
1679  * nodes which are not reachable from the end node of the FSG.
1680  * Everything is reachable from the start node by definition.
1681  */
1682  mark_reachable(dag, dag->end);
1683 
1685  {
1686  int32 silpen, fillpen;
1687 
1688  silpen = (int32)(logmath_log(fsg->lmath,
1689  cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
1690  * fsg->lw)
1691  >> SENSCR_SHIFT;
1692  fillpen = (int32)(logmath_log(fsg->lmath,
1693  cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
1694  * fsg->lw)
1695  >> SENSCR_SHIFT;
1696  ps_lattice_bypass_fillers(dag, silpen, fillpen);
1697  }
1698  search->dag = dag;
1699  return dag;
1700 
1701 error_out:
1702  ps_lattice_free(dag);
1703  return NULL;
1704 
1705 }