PocketSphinx  0.6
src/libpocketsphinx/fsg_lextree.h
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 /*
00035  * fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
00036  * 
00037  * **********************************************
00038  * CMU ARPA Speech Project
00039  *
00040  * Copyright (c) 2004 Carnegie Mellon University.
00041  * ALL RIGHTS RESERVED.
00042  * **********************************************
00043  * 
00044  * HISTORY
00045  * 
00046  * $Log: fsg_lextree.h,v $
00047  * Revision 1.1.1.1  2006/05/23 18:45:02  dhuggins
00048  * re-importation
00049  *
00050  * Revision 1.1  2004/07/16 00:57:12  egouvea
00051  * Added Ravi's implementation of FSG support.
00052  *
00053  * Revision 1.3  2004/06/23 20:32:16  rkm
00054  * *** empty log message ***
00055  *
00056  * Revision 1.2  2004/05/27 14:22:57  rkm
00057  * FSG cross-word triphones completed (but for single-phone words)
00058  *
00059  * Revision 1.1.1.1  2004/03/01 14:30:31  rkm
00060  *
00061  *
00062  * Revision 1.1  2004/02/23 15:53:45  rkm
00063  * Renamed from fst to fsg
00064  *
00065  * Revision 1.2  2004/02/19 21:16:54  rkm
00066  * Added fsg_search.{c,h}
00067  *
00068  * Revision 1.1  2004/02/18 15:02:34  rkm
00069  * Added fsg_lextree.{c,h}
00070  *
00071  * 
00072  * 18-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00073  *              Started.
00074  */
00075 
00076 
00077 #ifndef __S2_FSG_LEXTREE_H__
00078 #define __S2_FSG_LEXTREE_H__
00079 
00080 /* SphinxBase headers. */
00081 #include <sphinxbase/cmd_ln.h>
00082 #include <sphinxbase/fsg_model.h>
00083 
00084 /* Local headers. */
00085 #include "hmm.h"
00086 #include "dict.h"
00087 #include "dict2pid.h"
00088 
00089 /*
00090  * **HACK-ALERT**!!  Compile-time constant determining the size of the
00091  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv.  (See below.)
00092  * But it makes memory allocation simpler and more efficient.
00093  */
00094 #define FSG_PNODE_CTXT_BVSZ     2
00095 
00096 typedef struct {
00097     uint32 bv[FSG_PNODE_CTXT_BVSZ];
00098 } fsg_pnode_ctxt_t;
00099 
00100 
00101 /*
00102  * All transitions (words) out of any given FSG state represented are by a
00103  * phonetic prefix lextree (except for epsilon or null transitions; they
00104  * are not part of the lextree).  Lextree leaf nodes represent individual
00105  * FSG transitions, so no sharing is allowed at the leaf nodes.  The FSG
00106  * transition probs are distributed along the lextree: the prob at a node
00107  * is the max of the probs of all leaf nodes (and, hence, FSG transitions)
00108  * reachable from that node.
00109  * 
00110  * To conserve memory, the underlying HMMs with state-level information are
00111  * allocated only as needed.  Root and leaf nodes must also account for all
00112  * the possible phonetic contexts, with an independent HMM for each distinct
00113  * context.
00114  */
00115 typedef struct fsg_pnode_s {
00116     /*
00117      * If this is not a leaf node, the first successor (child) node.  Otherwise
00118      * the parent FSG transition for which this is the leaf node (for figuring
00119      * the FSG destination state, and word emitted by the transition).  A node
00120      * may have several children.  The succ ptr gives just the first; the rest
00121      * are linked via the sibling ptr below.
00122      */
00123     union {
00124         struct fsg_pnode_s *succ;
00125         fsg_link_t *fsglink;
00126     } next;
00127   
00128     /*
00129      * For simplicity of memory management (i.e., freeing the pnodes), all
00130      * pnodes allocated for all transitions out of a state are maintained in a
00131      * linear linked list through the alloc_next pointer.
00132      */
00133     struct fsg_pnode_s *alloc_next;
00134   
00135     /*
00136      * The next node that is also a child of the parent of this node; NULL if
00137      * none.
00138      */
00139     struct fsg_pnode_s *sibling;
00140 
00141     /*
00142      * The transition (log) probability to be incurred upon transitioning to
00143      * this node.  (Transition probabilities are really associated with the
00144      * transitions.  But a lextree node has exactly one incoming transition.
00145      * Hence, the prob can be associated with the node.)
00146      * This is a logs2(prob) value, and includes the language weight.
00147      */
00148     int32 logs2prob;
00149   
00150     /*
00151      * The root and leaf positions associated with any transition have to deal
00152      * with multiple phonetic contexts.  However, different contexts may result
00153      * in the same SSID (senone-seq ID), and can share a single pnode with that
00154      * SSID.  But the pnode should track the set of context CI phones that share
00155      * it.  Hence the fsg_pnode_ctxt_t bit-vector set-representation.  (For
00156      * simplicity of implementation, its size is a compile-time constant for
00157      * now.)  Single phone words would need a 2-D array of context, but that's
00158      * too expensive.  For now, they simply use SIL as right context, so only
00159      * the left context is properly modelled.
00160      * (For word-internal phones, this field is unused, of course.)
00161      */
00162     fsg_pnode_ctxt_t ctxt;
00163   
00164     uint16 ci_ext;      /* This node's CIphone as viewed externally (context) */
00165     uint8 ppos; /* Phoneme position in pronunciation */
00166     uint8 leaf; /* Whether this is a leaf node */
00167   
00168     /* HMM-state-level stuff here */
00169     hmm_context_t *ctx;
00170     hmm_t hmm;
00171 } fsg_pnode_t;
00172 
00173 /* Access macros */
00174 #define fsg_pnode_leaf(p)       ((p)->leaf)
00175 #define fsg_pnode_logs2prob(p)  ((p)->logs2prob)
00176 #define fsg_pnode_succ(p)       ((p)->next.succ)
00177 #define fsg_pnode_fsglink(p)    ((p)->next.fsglink)
00178 #define fsg_pnode_sibling(p)    ((p)->sibling)
00179 #define fsg_pnode_hmmptr(p)     (&((p)->hmm))
00180 #define fsg_pnode_ci_ext(p)     ((p)->ci_ext)
00181 #define fsg_pnode_ppos(p)       ((p)->ppos)
00182 #define fsg_pnode_leaf(p)       ((p)->leaf)
00183 #define fsg_pnode_ctxt(p)       ((p)->ctxt)
00184 
00185 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
00186 
00187 /*
00188  * The following is macroized because its called very frequently
00189  * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
00190  */
00191 /*
00192  * Subtract bitvector sub from bitvector src (src updated with the result).
00193  * Return 0 if result is all 0, non-zero otherwise.
00194  */
00195 
00196 #if (FSG_PNODE_CTXT_BVSZ == 1)
00197     #define FSG_PNODE_CTXT_SUB(src,sub) \
00198     ((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))
00199 #elif (FSG_PNODE_CTXT_BVSZ == 2)
00200     #define FSG_PNODE_CTXT_SUB(src,sub) \
00201     (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
00202      ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])))
00203 #elif (FSG_PNODE_CTXT_BVSZ == 4)
00204     #define FSG_PNODE_CTXT_SUB(src,sub) \
00205     (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))  | \
00206      ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1]))  | \
00207      ((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2]))  | \
00208      ((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3])))
00209 #endif
00210 
00214 typedef struct fsg_lextree_s {
00215     fsg_model_t *fsg;   
00216     hmm_context_t *ctx; 
00217     dict_t *dict;     
00218     dict2pid_t *d2p;    
00219     bin_mdef_t *mdef;   
00221     /*
00222      * Left and right CIphone sets for each state.
00223      * Left context CIphones for a state S: If word W transitions into S, W's
00224      * final CIphone is in S's {lc}.  Words transitioning out of S must consider
00225      * these left context CIphones.
00226      * Similarly, right contexts for state S: If word W transitions out of S,
00227      * W's first CIphone is in S's {rc}.  Words transitioning into S must consider
00228      * these right contexts.
00229      * 
00230      * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
00231      *   null transitions.
00232      * NOTE: Single-phone words are difficult; only SILENCE right context is
00233      *   modelled for them.
00234      * NOTE: Non-silence filler phones aren't included in these sets.  Filler
00235      *   words don't use context, and present the SILENCE phone as context to
00236      *   adjacent words.
00237      */
00238     int16 **lc;         
00239     int16 **rc;         
00241     fsg_pnode_t **root; /* root[s] = lextree representing all transitions
00242                            out of state s.  Note that the "tree" for each
00243                            state is actually a collection of trees, linked
00244                            via fsg_pnode_t.sibling (root[s]->sibling) */
00245     fsg_pnode_t **alloc_head;   /* alloc_head[s] = head of linear list of all
00246                                    pnodes allocated for state s */
00247     int32 n_pnode;      /* #HMM nodes in search structure */
00248     int32 wip;
00249     int32 pip;
00250 } fsg_lextree_t;
00251 
00252 /* Access macros */
00253 #define fsg_lextree_root(lt,s)  ((lt)->root[s])
00254 #define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
00255 
00259 fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict,
00260                                 dict2pid_t *d2p,
00261                                 bin_mdef_t *mdef, hmm_context_t *ctx,
00262                                 int32 wip, int32 pip);
00263 
00267 void fsg_lextree_free(fsg_lextree_t *fsg);
00268 
00272 void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh);
00273 
00277 void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode);
00278 
00282 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt);
00283 
00284 
00285 
00286 #endif