PocketSphinx  0.6
src/libpocketsphinx/fsg_lextree.c
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2010 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00041 /* System headers. */
00042 #include <stdio.h>
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <sphinxbase/ckd_alloc.h>
00048 #include <sphinxbase/err.h>
00049 
00050 /* Local headers. */
00051 #include "fsg_lextree.h"
00052 
00053 #define __FSG_DBG__             0
00054 
00055 /* A linklist structure that is actually used to build local lextrees at grammar nodes */
00056 typedef struct fsg_glist_linklist_t {
00057     int32    ci, rc;
00058     glist_t  glist;
00059     struct   fsg_glist_linklist_t *next;
00060 } fsg_glist_linklist_t;
00061 
00068 static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
00069                                       fsg_model_t *fsg,
00070                                       int32 from_state,
00071                                       fsg_pnode_t **alloc_head);
00072 
00077 static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
00078 
00083 static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *alloc_head, FILE *fp);
00084 
00088 static void
00089 fsg_lextree_lc_rc(fsg_lextree_t *lextree)
00090 {
00091     int32 s, i, j;
00092     int32 n_ci;
00093     fsg_model_t *fsg;
00094     int32 silcipid;
00095     int32 len;
00096 
00097     silcipid = bin_mdef_silphone(lextree->mdef);
00098     assert(silcipid >= 0);
00099     n_ci = bin_mdef_n_ciphone(lextree->mdef);
00100 
00101     fsg = lextree->fsg;
00102     /*
00103      * lextree->lc[s] = set of left context CIphones for state s.  Similarly, rc[s]
00104      * for right context CIphones.
00105      */
00106     lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
00107     lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
00108     E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n",
00109            fsg->n_state * (n_ci + 1) * 2,
00110            fsg->n_state * (n_ci + 1) * 2 / 1024);
00111 
00112 
00113     for (s = 0; s < fsg->n_state; s++) {
00114         fsg_arciter_t *itor;
00115         for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
00116             fsg_link_t *l = fsg_arciter_get(itor);
00117             int32 dictwid; 
00119             if (fsg_link_wid(l) >= 0) {
00120                 dictwid = dict_wordid(lextree->dict,
00121                                       fsg_model_word_str(lextree->fsg, l->wid));
00122 
00123                 /*
00124                  * Add the first CIphone of l->wid to the rclist of state s, and
00125                  * the last CIphone to lclist of state d.
00126                  * (Filler phones are a pain to deal with.  There is no direct
00127                  * marking of a filler phone; but only filler words are supposed to
00128                  * use such phones, so we use that fact.  HACK!!  FRAGILE!!)
00129                  */
00130                 if (fsg_model_is_filler(fsg, fsg_link_wid(l))) {
00131                     /* Filler phone; use silence phone as context */
00132                     lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
00133                     lextree->lc[fsg_link_to_state(l)][silcipid] = 1;
00134                 }
00135                 else {
00136                     len = dict_pronlen(lextree->dict, dictwid);
00137                     lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1;
00138                     lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1;
00139                 }
00140             }
00141 
00142             /*
00143              * Add SIL phone to the lclist and rclist of each state.  Strictly
00144              * speaking, only needed at start and final states, respectively, but
00145              * all states considered since the user may change the start and final
00146              * states.  In any case, most applications would have a silence self
00147              * loop at each state, hence these would be needed anyway.
00148              */
00149             lextree->lc[fsg_link_from_state(l)][silcipid] = 1;
00150             lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
00151         }
00152     }
00153 
00154     /*
00155      * Propagate lc and rc lists past null transitions.  (Since FSG contains
00156      * null transitions closure, no need to worry about a chain of successive
00157      * null transitions.  Right??)
00158      *
00159      * This can't be joined with the previous loop because we first calculate 
00160      * contexts and only then we can propagate them.
00161      */
00162     for (s = 0; s < fsg->n_state; s++) {
00163         fsg_arciter_t *itor;
00164         for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
00165             fsg_link_t *l = fsg_arciter_get(itor);
00166             if (fsg_link_wid(l) < 0) {
00167 
00168                 /*
00169                  * lclist(d) |= lclist(s), because all the words ending up at s, can
00170                  * now also end at d, becoming the left context for words leaving d.
00171                  */
00172                 for (i = 0; i < n_ci; i++)
00173                     lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i];
00174                 /*
00175                  * Similarly, rclist(s) |= rclist(d), because all the words leaving d
00176                  * can equivalently leave s, becoming the right context for words
00177                  * ending up at s.
00178                  */
00179                 for (i = 0; i < n_ci; i++)
00180                     lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i];
00181             }
00182         }
00183     }
00184 
00185     /* Convert the bit-vector representation into a list */
00186     for (s = 0; s < fsg->n_state; s++) {
00187         j = 0;
00188         for (i = 0; i < n_ci; i++) {
00189             if (lextree->lc[s][i]) {
00190                 lextree->lc[s][j] = i;
00191                 j++;
00192             }
00193         }
00194         lextree->lc[s][j] = -1;     /* Terminate the list */
00195 
00196         j = 0;
00197         for (i = 0; i < n_ci; i++) {
00198             if (lextree->rc[s][i]) {
00199                 lextree->rc[s][j] = i;
00200                 j++;
00201             }
00202         }
00203         lextree->rc[s][j] = -1;     /* Terminate the list */
00204     }
00205 }
00206 
00207 /*
00208  * For now, allocate the entire lextree statically.
00209  */
00210 fsg_lextree_t *
00211 fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p,
00212                  bin_mdef_t *mdef, hmm_context_t *ctx,
00213                  int32 wip, int32 pip)
00214 {
00215     int32 s, n_leaves;
00216     fsg_lextree_t *lextree;
00217     fsg_pnode_t *pn;
00218 
00219     lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
00220     lextree->fsg = fsg;
00221     lextree->root = ckd_calloc(fsg_model_n_state(fsg),
00222                                sizeof(fsg_pnode_t *));
00223     lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
00224                                      sizeof(fsg_pnode_t *));
00225     lextree->ctx = ctx;
00226     lextree->dict = dict;
00227     lextree->d2p = d2p;
00228     lextree->mdef = mdef;
00229     lextree->wip = wip;
00230     lextree->pip = pip;
00231 
00232     /* Compute lc and rc for fsg. */
00233     fsg_lextree_lc_rc(lextree);
00234 
00235     /* Create lextree for each state, i.e. an HMM network that
00236      * represents words for all arcs exiting that state.  Note that
00237      * for a dense grammar such as an N-gram model, this will
00238      * rapidly exhaust all available memory. */
00239     lextree->n_pnode = 0;
00240     n_leaves = 0;
00241     for (s = 0; s < fsg_model_n_state(fsg); s++) {
00242         lextree->root[s] =
00243             fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
00244 
00245         for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) {
00246             lextree->n_pnode++;
00247             if (pn->leaf)
00248                 ++n_leaves;
00249         }
00250     }
00251     E_INFO("%d HMM nodes in lextree (%d leaves)\n",
00252            lextree->n_pnode, n_leaves);
00253     E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n",
00254            lextree->n_pnode * sizeof(fsg_pnode_t),
00255            lextree->n_pnode * sizeof(fsg_pnode_t) / 1024);
00256     E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n",
00257            n_leaves * sizeof(fsg_pnode_t),
00258            n_leaves * sizeof(fsg_pnode_t) / 1024);
00259 
00260 #if __FSG_DBG__
00261     fsg_lextree_dump(lextree, stdout);
00262 #endif
00263 
00264     return lextree;
00265 }
00266 
00267 
00268 void
00269 fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
00270 {
00271     int32 s;
00272 
00273     for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
00274         fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
00275         fsg_psubtree_dump(lextree, lextree->alloc_head[s], fp);
00276     }
00277     fflush(fp);
00278 }
00279 
00280 
00281 void
00282 fsg_lextree_free(fsg_lextree_t * lextree)
00283 {
00284     int32 s;
00285 
00286     if (lextree == NULL)
00287         return;
00288 
00289     if (lextree->fsg)
00290         for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
00291             fsg_psubtree_free(lextree->alloc_head[s]);
00292 
00293     ckd_free_2d(lextree->lc);
00294     ckd_free_2d(lextree->rc);
00295     ckd_free(lextree->root);
00296     ckd_free(lextree->alloc_head);
00297     ckd_free(lextree);
00298 }
00299 
00300 /******************************
00301  * psubtree stuff starts here *
00302  ******************************/
00303 
00304 void fsg_glist_linklist_free(fsg_glist_linklist_t *glist)
00305 {
00306     if (glist) {
00307         fsg_glist_linklist_t *nxtglist;
00308         if (glist->glist)
00309             glist_free(glist->glist);
00310         nxtglist = glist->next;
00311         while (nxtglist) {
00312             ckd_free(glist);
00313             glist = nxtglist;
00314             if (glist->glist)
00315                 glist_free(glist->glist);
00316             nxtglist = glist->next;
00317         }
00318         ckd_free(glist);
00319     }
00320     return;
00321 }
00322 
00323 void
00324 fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t * ctxt)
00325 {
00326     int32 i;
00327 
00328     for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00329         ctxt->bv[i] = 0xffffffff;
00330 }
00331 
00332 
00333 /*
00334  * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
00335  * This has been moved into a macro in fsg_psubtree.h 
00336  * because it is called so frequently!
00337  */
00338 
00339 
00340 /*
00341  * Add the word emitted by the given transition (fsglink) to the given lextree
00342  * (rooted at root), and return the new lextree root.  (There may actually be
00343  * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
00344  * "root" is the head of this list.)
00345  * lclist, rclist: sets of left and right context phones for this link.
00346  * alloc_head: head of a linear list of all allocated pnodes for the parent
00347  * FSG state, kept elsewhere and updated by this routine.
00348  */
00349 static fsg_pnode_t *
00350 psubtree_add_trans(fsg_lextree_t *lextree, 
00351                    fsg_pnode_t * root,
00352                    fsg_glist_linklist_t **curglist,
00353                    fsg_link_t * fsglink,
00354                    int16 *lclist, int16 *rclist,
00355                    fsg_pnode_t ** alloc_head)
00356 {
00357     int32 silcipid;             /* Silence CI phone ID */
00358     int32 pronlen;              /* Pronunciation length */
00359     int32 wid;                  /* FSG (not dictionary!!) word ID */
00360     int32 dictwid;              /* Dictionary (not FSG!!) word ID */
00361     int32 ssid;                 /* Senone Sequence ID */
00362     int32 tmatid;
00363     gnode_t *gn;
00364     fsg_pnode_t *pnode, *pred, *head;
00365     int32 n_ci, p, lc, rc;
00366     glist_t lc_pnodelist;       /* Temp pnodes list for different left contexts */
00367     glist_t rc_pnodelist;       /* Temp pnodes list for different right contexts */
00368     int32 i, j;
00369     int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;
00370 
00371     silcipid = bin_mdef_silphone(lextree->mdef);
00372     n_ci = bin_mdef_n_ciphone(lextree->mdef);
00373 
00374     wid = fsg_link_wid(fsglink);
00375     assert(wid >= 0);           /* Cannot be a null transition */
00376     dictwid = dict_wordid(lextree->dict,
00377                           fsg_model_word_str(lextree->fsg, wid));
00378     pronlen = dict_pronlen(lextree->dict, dictwid);
00379     assert(pronlen >= 1);
00380 
00381     assert(lclist[0] >= 0);     /* At least one phonetic context provided */
00382     assert(rclist[0] >= 0);
00383 
00384     head = *alloc_head;
00385     pred = NULL;
00386 
00387     if (pronlen == 1) {         /* Single-phone word */
00388         int ci = dict_first_phone(lextree->dict, dictwid);
00389         /* Only non-filler words are mpx */
00390         if (dict_filler_word(lextree->dict, dictwid)) {
00391             /*
00392              * Left diphone ID for single-phone words already assumes SIL is right
00393              * context; only left contexts need to be handled.
00394              */
00395             lc_pnodelist = NULL;
00396 
00397             for (i = 0; lclist[i] >= 0; i++) {
00398                 lc = lclist[i];
00399                 ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
00400                 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
00401                 /* Check if this ssid already allocated for some other context */
00402                 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00403                     pnode = (fsg_pnode_t *) gnode_ptr(gn);
00404 
00405                     if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
00406                         /* already allocated; share it for this context phone */
00407                         fsg_pnode_add_ctxt(pnode, lc);
00408                         break;
00409                     }
00410                 }
00411 
00412                 if (!gn) {      /* ssid not already allocated */
00413                     pnode =
00414                         (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00415                     pnode->ctx = lextree->ctx;
00416                     pnode->next.fsglink = fsglink;
00417                     pnode->logs2prob =
00418                         (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
00419                         + lextree->wip + lextree->pip;
00420                     pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
00421                     pnode->ppos = 0;
00422                     pnode->leaf = TRUE;
00423                     pnode->sibling = root;      /* All root nodes linked together */
00424                     fsg_pnode_add_ctxt(pnode, lc);      /* Initially zeroed by calloc above */
00425                     pnode->alloc_next = head;
00426                     head = pnode;
00427                     root = pnode;
00428                     ++n_lc_alloc;
00429 
00430                     hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
00431 
00432                     lc_pnodelist =
00433                         glist_add_ptr(lc_pnodelist, (void *) pnode);
00434                 }
00435             }
00436 
00437             glist_free(lc_pnodelist);
00438         }
00439         else {                  /* Filler word; no context modelled */
00440             ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */
00441             tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci);
00442 
00443             pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00444             pnode->ctx = lextree->ctx;
00445             pnode->next.fsglink = fsglink;
00446             pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
00447                 + lextree->wip + lextree->pip;
00448             pnode->ci_ext = silcipid;   /* Presents SIL as context to neighbors */
00449             pnode->ppos = 0;
00450             pnode->leaf = TRUE;
00451             pnode->sibling = root;
00452             fsg_pnode_add_all_ctxt(&(pnode->ctxt));
00453             pnode->alloc_next = head;
00454             head = pnode;
00455             root = pnode;
00456             ++n_int_alloc;
00457 
00458             hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
00459         }
00460     }
00461     else {                      /* Multi-phone word */
00462         fsg_pnode_t **ssid_pnode_map;       /* Temp array of ssid->pnode mapping */
00463         ssid_pnode_map =
00464             (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
00465         lc_pnodelist = NULL;
00466         rc_pnodelist = NULL;
00467 
00468         for (p = 0; p < pronlen; p++) {
00469             int ci = dict_pron(lextree->dict, dictwid, p);
00470             if (p == 0) {       /* Root phone, handle required left contexts */
00471                 /* Find if we already have an lc_pnodelist for the first phone of this word */
00472                 fsg_glist_linklist_t *glist;
00473 
00474                 rc = dict_pron(lextree->dict, dictwid, 1);
00475                 for (glist = *curglist;
00476                      glist && glist->glist && glist->ci != ci && glist->rc != rc;
00477                      glist = glist->next)
00478                     ;
00479                 if (glist && glist->ci == ci && glist->rc == rc && glist->glist) {
00480                     /* We've found a valid glist. Hook to it and move to next phoneme */
00481                     E_DEBUG(2,("Found match for (%d,%d)\n", ci, rc));
00482                     lc_pnodelist = glist->glist;
00483                     /* Set the predecessor node for the future tree first */
00484                     pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
00485                     continue;
00486                 }
00487                 else {
00488                     /* Two cases that can bring us here
00489                      * a. glist == NULL, i.e. end of current list. Create new entry.
00490                      * b. glist->glist == NULL, i.e. first entry into list.
00491                      */
00492                     if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */
00493                         glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
00494                         glist->next = *curglist;
00495                         *curglist = glist;
00496                     }
00497                     glist->ci = ci;
00498                     glist->rc = rc;
00499                     lc_pnodelist = glist->glist = NULL; /* Gets created below */
00500                 }
00501 
00502                 for (i = 0; lclist[i] >= 0; i++) {
00503                     lc = lclist[i];
00504                     ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
00505                     tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
00506                     /* Compression is not done by d2p, so we do it
00507                      * here.  This might be slow, but it might not
00508                      * be... we'll see. */
00509                     pnode = ssid_pnode_map[0];
00510                     for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
00511                         pnode = ssid_pnode_map[j];
00512                         if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
00513                             break;
00514                     }
00515                     assert(j < n_ci);
00516                     if (!pnode) {       /* Allocate pnode for this new ssid */
00517                         pnode =
00518                             (fsg_pnode_t *) ckd_calloc(1,
00519                                                        sizeof
00520                                                        (fsg_pnode_t));
00521                         pnode->ctx = lextree->ctx;
00522                         /* This bit is tricky! For now we'll put the prob in the final link only */
00523                         /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
00524                            + lextree->wip + lextree->pip; */
00525                         pnode->logs2prob = lextree->wip + lextree->pip;
00526                         pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
00527                         pnode->ppos = 0;
00528                         pnode->leaf = FALSE;
00529                         pnode->sibling = root;  /* All root nodes linked together */
00530                         pnode->alloc_next = head;
00531                         head = pnode;
00532                         root = pnode;
00533                         ++n_lc_alloc;
00534 
00535                         hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
00536 
00537                         lc_pnodelist =
00538                             glist_add_ptr(lc_pnodelist, (void *) pnode);
00539                         ssid_pnode_map[j] = pnode;
00540                     }
00541                     fsg_pnode_add_ctxt(pnode, lc);
00542                 }
00543                 /* Put the lc_pnodelist back into glist */
00544                 glist->glist = lc_pnodelist;
00545 
00546                 /* The predecessor node for the future tree is the root */
00547                 pred = root;
00548             }
00549             else if (p != pronlen - 1) {        /* Word internal phone */
00550                 fsg_pnode_t    *pnodeyoungest;
00551 
00552                 ssid = dict2pid_internal(lextree->d2p, dictwid, p);
00553                 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
00554                 /* First check if we already have this ssid in our tree */
00555                 pnode = pred->next.succ;
00556                 pnodeyoungest = pnode; /* The youngest sibling */
00557                 while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
00558                     pnode = pnode->sibling;
00559                 }
00560                 if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
00561                     /* Found the ssid; go to next phoneme */
00562                     E_DEBUG(2,("Found match for %d\n", ci));
00563                     pred = pnode;
00564                     continue;
00565                 }
00566 
00567                 /* pnode not found, allocate it */
00568                 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00569                 pnode->ctx = lextree->ctx;
00570                 pnode->logs2prob = lextree->pip;
00571                 pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
00572                 pnode->ppos = p;
00573                 pnode->leaf = FALSE;
00574                 pnode->sibling = pnodeyoungest; /* May be NULL */
00575                 if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
00576                     for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00577                         pred = (fsg_pnode_t *) gnode_ptr(gn);
00578                         pred->next.succ = pnode;
00579                     }
00580                 }
00581                 else {          /* Predecessor = word internal node */
00582                     pred->next.succ = pnode;
00583                 }
00584                 pnode->alloc_next = head;
00585                 head = pnode;
00586                 ++n_int_alloc;
00587 
00588                 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
00589 
00590                 pred = pnode;
00591             }
00592             else {              /* Leaf phone, handle required right contexts */
00593                 /* Note, leaf phones are not part of the tree */
00594                 xwdssid_t *rssid;
00595                 memset((void *) ssid_pnode_map, 0,
00596                        n_ci * sizeof(fsg_pnode_t *));
00597                 lc = dict_pron(lextree->dict, dictwid, p-1);
00598                 rssid = dict2pid_rssid(lextree->d2p, ci, lc);
00599                 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
00600 
00601                 for (i = 0; rclist[i] >= 0; i++) {
00602                     rc = rclist[i];
00603 
00604                     j = rssid->cimap[rc];
00605                     ssid = rssid->ssid[j];
00606                     pnode = ssid_pnode_map[j];
00607 
00608                     if (!pnode) {       /* Allocate pnode for this new ssid */
00609                         pnode =
00610                             (fsg_pnode_t *) ckd_calloc(1,
00611                                                        sizeof
00612                                                        (fsg_pnode_t));
00613                         pnode->ctx = lextree->ctx;
00614                         /* We are plugging the word prob here. Ugly */
00615                         /* pnode->logs2prob = lextree->pip; */
00616                         pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
00617                             + lextree->pip;
00618                         pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
00619                         pnode->ppos = p;
00620                         pnode->leaf = TRUE;
00621                         pnode->sibling = rc_pnodelist ?
00622                             (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
00623                         pnode->next.fsglink = fsglink;
00624                         pnode->alloc_next = head;
00625                         head = pnode;
00626                         ++n_rc_alloc;
00627 
00628                         hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
00629 
00630                         rc_pnodelist =
00631                             glist_add_ptr(rc_pnodelist, (void *) pnode);
00632                         ssid_pnode_map[j] = pnode;
00633                     }
00634                     else {
00635                         assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
00636                     }
00637                     fsg_pnode_add_ctxt(pnode, rc);
00638                 }
00639 
00640                 if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
00641                     for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00642                         pred = (fsg_pnode_t *) gnode_ptr(gn);
00643                         if (!pred->next.succ)
00644                             pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00645                         else {
00646                             /* Link to the end of the sibling chain */
00647                             fsg_pnode_t *succ = pred->next.succ;
00648                             while (succ->sibling) succ = succ->sibling;
00649                             succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
00650                             /* Since all entries of lc_pnodelist point
00651                                to the same array, sufficient to update it once */
00652                             break; 
00653                         }
00654                     }
00655                 }
00656                 else {          /* Predecessor = word internal node */
00657                     if (!pred->next.succ)
00658                         pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00659                     else {
00660                         /* Link to the end of the sibling chain */
00661                         fsg_pnode_t *succ = pred->next.succ;
00662                         while (succ->sibling) succ = succ->sibling;
00663                         succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00664                     }
00665                 }
00666             }
00667         }
00668 
00669         ckd_free((void *) ssid_pnode_map);
00670         /* glist_free(lc_pnodelist);  Nope; this gets freed outside */
00671         glist_free(rc_pnodelist);
00672     }
00673 
00674     E_DEBUG(2,("Allocated %d HMMs (%d lc, %d rc, %d internal)\n",
00675                n_lc_alloc + n_rc_alloc + n_int_alloc,
00676                n_lc_alloc, n_rc_alloc, n_int_alloc));
00677     *alloc_head = head;
00678 
00679     return root;
00680 }
00681 
00682 
00683 static fsg_pnode_t *
00684 fsg_psubtree_init(fsg_lextree_t *lextree,
00685                   fsg_model_t * fsg, int32 from_state,
00686                   fsg_pnode_t ** alloc_head)
00687 {
00688     int32 dst;
00689     gnode_t *gn;
00690     fsg_link_t *fsglink;
00691     fsg_pnode_t *root;
00692     int32 n_ci, n_arc;
00693     fsg_glist_linklist_t *glist = NULL;
00694 
00695     root = NULL;
00696     assert(*alloc_head == NULL);
00697 
00698     n_ci = bin_mdef_n_ciphone(lextree->mdef);
00699     if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
00700         E_FATAL
00701             ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
00702              FSG_PNODE_CTXT_BVSZ * 32);
00703     }
00704     n_arc = 0;
00705     for (dst = 0; dst < fsg_model_n_state(fsg); dst++) {
00706         /* Add all links from from_state to dst */
00707         for (gn = fsg_model_trans(fsg, from_state, dst); gn;
00708              gn = gnode_next(gn)) {
00709             /* Add word emitted by this transition (fsglink) to lextree */
00710             fsglink = (fsg_link_t *) gnode_ptr(gn);
00711 
00712             assert(fsg_link_wid(fsglink) >= 0);     /* Cannot be a null trans */
00713 
00714             E_DEBUG(2,("Building lextree for arc from %d to %d: %s\n",
00715                        from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink))));
00716             root = psubtree_add_trans(lextree, root, &glist, fsglink,
00717                                       lextree->lc[from_state],
00718                                       lextree->rc[dst],
00719                                       alloc_head);
00720             ++n_arc;
00721         }
00722     }
00723     E_DEBUG(2,("State %d has %d outgoing arcs\n", from_state, n_arc));
00724 
00725     fsg_glist_linklist_free(glist);
00726 
00727     return root;
00728 }
00729 
00730 
00731 static void
00732 fsg_psubtree_free(fsg_pnode_t * head)
00733 {
00734     fsg_pnode_t *next;
00735 
00736     while (head) {
00737         next = head->alloc_next;
00738         hmm_deinit(&head->hmm);
00739         ckd_free(head);
00740         head = next;
00741     }
00742 }
00743 
00744 void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp)
00745 {    
00746     int32 i;
00747     fsg_link_t *tl;
00748 
00749     /* Indentation */
00750     for (i = 0; i <= node->ppos; i++)
00751         fprintf(fp, "  ");
00752 
00753     fprintf(fp, "%p.@", node);    /* Pointer used as node
00754                          * ID */
00755     fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm));
00756     fprintf(fp, " %10d.LP", node->logs2prob);
00757     fprintf(fp, " %p.SIB", node->sibling);
00758     fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos);
00759     if ((node->ppos == 0) || node->leaf) {
00760         fprintf(fp, " [");
00761         for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00762             fprintf(fp, "%08x", node->ctxt.bv[i]);
00763         fprintf(fp, "]");
00764     }
00765     if (node->leaf) {
00766         tl = node->next.fsglink;
00767         fprintf(fp, " {%s[%d->%d](%d)}",
00768                 fsg_model_word_str(tree->fsg, tl->wid),
00769                 tl->from_state, tl->to_state, tl->logs2prob);
00770     } else {
00771         fprintf(fp, " %p.NXT", node->next.succ);
00772     }
00773     fprintf(fp, "\n");
00774 
00775     return;
00776 }
00777 
00778 void 
00779 fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp)
00780 {
00781     fsg_pnode_t *succ;
00782 
00783     if (root == NULL) return;
00784     if (root->ppos == 0) {
00785         while(root->sibling && root->sibling->next.succ == root->next.succ) {
00786             fsg_psubtree_dump_node(tree, root, fp);
00787             root = root->sibling;
00788         }
00789         fflush(fp);
00790     }
00791     
00792     fsg_psubtree_dump_node(tree, root, fp);
00793 
00794     if (root->leaf) {
00795         if (root->ppos == 0 && root->sibling) { // For single-phone words
00796             fsg_psubtree_dump(tree, root->sibling,fp);
00797         }
00798         return;
00799     }
00800 
00801     succ = root->next.succ;
00802     while(succ) {
00803         fsg_psubtree_dump(tree, succ,fp);
00804         succ = succ->sibling;
00805     }
00806 
00807     if (root->ppos == 0) {
00808         fsg_psubtree_dump(tree, root->sibling,fp);
00809         fflush(fp);
00810     }
00811 
00812     return;
00813 }
00814 
00815 void
00816 fsg_psubtree_pnode_deactivate(fsg_pnode_t * pnode)
00817 {
00818     hmm_clear(&pnode->hmm);
00819 }