• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/fsg_lextree.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 /*
00035  * fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
00036  * 
00037  * **********************************************
00038  * CMU ARPA Speech Project
00039  *
00040  * Copyright (c) 2004 Carnegie Mellon University.
00041  * ALL RIGHTS RESERVED.
00042  * **********************************************
00043  * 
00044  * HISTORY
00045  * 
00046  * $Log: fsg_lextree.h,v $
00047  * Revision 1.1.1.1  2006/05/23 18:45:02  dhuggins
00048  * re-importation
00049  *
00050  * Revision 1.1  2004/07/16 00:57:12  egouvea
00051  * Added Ravi's implementation of FSG support.
00052  *
00053  * Revision 1.3  2004/06/23 20:32:16  rkm
00054  * *** empty log message ***
00055  *
00056  * Revision 1.2  2004/05/27 14:22:57  rkm
00057  * FSG cross-word triphones completed (but for single-phone words)
00058  *
00059  * Revision 1.1.1.1  2004/03/01 14:30:31  rkm
00060  *
00061  *
00062  * Revision 1.1  2004/02/23 15:53:45  rkm
00063  * Renamed from fst to fsg
00064  *
00065  * Revision 1.2  2004/02/19 21:16:54  rkm
00066  * Added fsg_search.{c,h}
00067  *
00068  * Revision 1.1  2004/02/18 15:02:34  rkm
00069  * Added fsg_lextree.{c,h}
00070  *
00071  * 
00072  * 18-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00073  *              Started.
00074  */
00075 
00076 
00077 #ifndef __S2_FSG_LEXTREE_H__
00078 #define __S2_FSG_LEXTREE_H__
00079 
00080 /* SphinxBase headers. */
00081 #include <cmd_ln.h>
00082 #include <fsg_model.h>
00083 
00084 /* Local headers. */
00085 #include "hmm.h"
00086 #include "dict.h"
00087 
00088 /*
00089  * **HACK-ALERT**!!  Compile-time constant determining the size of the
00090  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv.  (See below.)
00091  * But it makes memory allocation simpler and more efficient.
00092  */
00093 #define FSG_PNODE_CTXT_BVSZ     2
00094 
00095 typedef struct {
00096     uint32 bv[FSG_PNODE_CTXT_BVSZ];
00097 } fsg_pnode_ctxt_t;
00098 
00099 
00100 /*
00101  * All transitions (words) out of any given FSG state represented are by a
00102  * phonetic prefix lextree (except for epsilon or null transitions; they
00103  * are not part of the lextree).  Lextree leaf nodes represent individual
00104  * FSG transitions, so no sharing is allowed at the leaf nodes.  The FSG
00105  * transition probs are distributed along the lextree: the prob at a node
00106  * is the max of the probs of all leaf nodes (and, hence, FSG transitions)
00107  * reachable from that node.
00108  * 
00109  * To conserve memory, the underlying HMMs with state-level information are
00110  * allocated only as needed.  Root and leaf nodes must also account for all
00111  * the possible phonetic contexts, with an independent HMM for each distinct
00112  * context.
00113  */
00114 typedef struct fsg_pnode_s {
00115     /*
00116      * If this is not a leaf node, the first successor (child) node.  Otherwise
00117      * the parent FSG transition for which this is the leaf node (for figuring
00118      * the FSG destination state, and word emitted by the transition).  A node
00119      * may have several children.  The succ ptr gives just the first; the rest
00120      * are linked via the sibling ptr below.
00121      */
00122     union {
00123         struct fsg_pnode_s *succ;
00124         fsg_link_t *fsglink;
00125     } next;
00126   
00127     /*
00128      * For simplicity of memory management (i.e., freeing the pnodes), all
00129      * pnodes allocated for all transitions out of a state are maintained in a
00130      * linear linked list through the alloc_next pointer.
00131      */
00132     struct fsg_pnode_s *alloc_next;
00133   
00134     /*
00135      * The next node that is also a child of the parent of this node; NULL if
00136      * none.
00137      */
00138     struct fsg_pnode_s *sibling;
00139 
00140     /*
00141      * The transition (log) probability to be incurred upon transitioning to
00142      * this node.  (Transition probabilities are really associated with the
00143      * transitions.  But a lextree node has exactly one incoming transition.
00144      * Hence, the prob can be associated with the node.)
00145      * This is a logs2(prob) value, and includes the language weight.
00146      */
00147     int32 logs2prob;
00148   
00149     /*
00150      * The root and leaf positions associated with any transition have to deal
00151      * with multiple phonetic contexts.  However, different contexts may result
00152      * in the same SSID (senone-seq ID), and can share a single pnode with that
00153      * SSID.  But the pnode should track the set of context CI phones that share
00154      * it.  Hence the fsg_pnode_ctxt_t bit-vector set-representation.  (For
00155      * simplicity of implementation, its size is a compile-time constant for
00156      * now.)  Single phone words would need a 2-D array of context, but that's
00157      * too expensive.  For now, they simply use SIL as right context, so only
00158      * the left context is properly modelled.
00159      * (For word-internal phones, this field is unused, of course.)
00160      */
00161     fsg_pnode_ctxt_t ctxt;
00162   
00163     uint16 ci_ext;      /* This node's CIphone as viewed externally (context) */
00164     uint8 ppos; /* Phoneme position in pronunciation */
00165     uint8 leaf; /* Whether this is a leaf node */
00166   
00167     /* HMM-state-level stuff here */
00168     hmm_context_t *ctx;
00169     hmm_t hmm;
00170 } fsg_pnode_t;
00171 
00172 /* Access macros */
00173 #define fsg_pnode_leaf(p)       ((p)->leaf)
00174 #define fsg_pnode_logs2prob(p)  ((p)->logs2prob)
00175 #define fsg_pnode_succ(p)       ((p)->next.succ)
00176 #define fsg_pnode_fsglink(p)    ((p)->next.fsglink)
00177 #define fsg_pnode_sibling(p)    ((p)->sibling)
00178 #define fsg_pnode_hmmptr(p)     (&((p)->hmm))
00179 #define fsg_pnode_ci_ext(p)     ((p)->ci_ext)
00180 #define fsg_pnode_ppos(p)       ((p)->ppos)
00181 #define fsg_pnode_leaf(p)       ((p)->leaf)
00182 #define fsg_pnode_ctxt(p)       ((p)->ctxt)
00183 
00184 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
00185 
00189 typedef struct fsg_lextree_s {
00190     fsg_model_t *fsg;   
00191     hmm_context_t *ctx; 
00192     dict_t *dict;       
00193     bin_mdef_t *mdef;   
00195     /*
00196      * Left and right CIphone sets for each state.
00197      * Left context CIphones for a state S: If word W transitions into S, W's
00198      * final CIphone is in S's {lc}.  Words transitioning out of S must consider
00199      * these left context CIphones.
00200      * Similarly, right contexts for state S: If word W transitions out of S,
00201      * W's first CIphone is in S's {rc}.  Words transitioning into S must consider
00202      * these right contexts.
00203      * 
00204      * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
00205      *   null transitions.
00206      * NOTE: Single-phone words are difficult; only SILENCE right context is
00207      *   modelled for them.
00208      * NOTE: Non-silence filler phones aren't included in these sets.  Filler
00209      *   words don't use context, and present the SILENCE phone as context to
00210      *   adjacent words.
00211      */
00212     int16 **lc;         
00213     int16 **rc;         
00215     fsg_pnode_t **root; /* root[s] = lextree representing all transitions
00216                            out of state s.  Note that the "tree" for each
00217                            state is actually a collection of trees, linked
00218                            via fsg_pnode_t.sibling (root[s]->sibling) */
00219     fsg_pnode_t **alloc_head;   /* alloc_head[s] = head of linear list of all
00220                                    pnodes allocated for state s */
00221     int32 n_pnode;      /* #HMM nodes in search structure */
00222     int32 wip;
00223     int32 pip;
00224 } fsg_lextree_t;
00225 
00226 /* Access macros */
00227 #define fsg_lextree_root(lt,s)  ((lt)->root[s])
00228 #define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
00229 
00233 fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict,
00234                                 bin_mdef_t *mdef, hmm_context_t *ctx,
00235                                 int32 wip, int32 pip);
00236 
00240 void fsg_lextree_free(fsg_lextree_t *fsg);
00241 
00245 void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh);
00246 
00250 void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode);
00251 
00256 uint32 fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub);
00257 
00261 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt);
00262 
00263 
00264 
00265 #endif

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1