fsg_search.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 /*
35  * fsg_search.h -- Search structures for FSM decoding.
36  *
37  * **********************************************
38  * CMU ARPA Speech Project
39  *
40  * Copyright (c) 2004 Carnegie Mellon University.
41  * ALL RIGHTS RESERVED.
42  * **********************************************
43  *
44  * HISTORY
45  *
46  * $Log: fsg_search.h,v $
47  * Revision 1.2 2006/02/23 05:12:43 arthchan2003
48  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
49  *
50  * Revision 1.1.2.7 2006/01/16 18:20:46 arthchan2003
51  * Remove junks in the code, change the reporting from printf to log_hypstr.
52  *
53  * Revision 1.1.2.6 2005/07/24 19:34:46 arthchan2003
54  * Removed search_hyp_t, used srch_hyp_t instead
55  *
56  * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003
57  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
58  *
59  * Revision 1.1.2.4 2005/07/17 05:44:32 arthchan2003
60  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
61  *
62  * Revision 1.1.2.3 2005/07/13 18:39:48 arthchan2003
63  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet.
64  *
65  * Revision 1.1.2.2 2005/06/28 07:01:20 arthchan2003
66  * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed. The number of empty functions in fsg_search is now decreased from 35 to 30.
67  *
68  * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003
69  * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users.
70  *
71  * Revision 1.2 2004/07/23 23:36:34 egouvea
72  * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently
73  *
74  * Revision 1.6 2004/07/20 13:40:55 rkm
75  * Added FSG get/set start/final state functions.
76  *
77  * Revision 1.5 2004/07/16 19:55:28 rkm
78  * Added state information to hypothesis.
79  *
80  * Revision 1.1 2004/07/16 00:57:12 egouvea
81  * Added Ravi's implementation of FSG support.
82  *
83  * Revision 1.4 2004/07/07 13:56:33 rkm
84  * Added reporting of (acoustic score - best senone score)/frame
85  *
86  * Revision 1.3 2004/06/22 15:36:12 rkm
87  * Added partial result handling in FSG mode
88  *
89  * Revision 1.2 2004/05/27 14:22:57 rkm
90  * FSG cross-word triphones completed (but for single-phone words)
91  *
92  * Revision 1.1.1.1 2004/03/01 14:30:31 rkm
93  *
94  *
95  * Revision 1.6 2004/02/27 16:15:13 rkm
96  * Added FSG switching
97  *
98  * Revision 1.5 2004/02/27 15:05:21 rkm
99  * *** empty log message ***
100  *
101  * Revision 1.4 2004/02/26 14:48:20 rkm
102  * *** empty log message ***
103  *
104  * Revision 1.3 2004/02/26 01:14:48 rkm
105  * *** empty log message ***
106  *
107  * Revision 1.2 2004/02/24 18:13:05 rkm
108  * Added NULL transition handling
109  *
110  * Revision 1.1 2004/02/23 15:53:45 rkm
111  * Renamed from fst to fsg
112  *
113  * Revision 1.1 2004/02/19 21:16:54 rkm
114  * Added fsg_search.{c,h}
115  *
116  *
117  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
118  * Started.
119  */
120 
121 
122 #ifndef __S2_FSG_SEARCH_H__
123 #define __S2_FSG_SEARCH_H__
124 
125 #define HYP_SZ 1024
126 
127 
128 #include <stdio.h>
129 
130 #include <cmd_ln.h>
131 #include <logmath.h>
132 #include <s3types.h>
133 #include <glist.h>
134 #include <word_fsg.h>
135 #include <fsg_lextree.h>
136 #include <fsg_history.h>
137 #include <ascr.h>
138 #include <search.h>
139 #include <dict.h>
140 #include <mdef.h>
141 #include <tmat.h>
142 #include <hmm.h>
143 
144 
145 #ifdef __cplusplus
146 extern "C" {
147 #endif
148 #if 0
149 /* Fool Emacs. */
150 }
151 #endif
152 
153 typedef struct fsg_search_s {
154  glist_t fsglist; /* List of all FSGs loaded */
155 
156  word_fsg_t *fsg; /* Currently active FSG; NULL if none. One
157  must be made active before starting FSG
158  decoding */
159  fsg_lextree_t *lextree; /* Lextree structure for the currently
160  active FSG */
161  fsg_history_t *history; /* For storing the Viterbi search history */
162 
163  glist_t pnode_active; /* Those active in this frame */
164  glist_t pnode_active_next; /* Those activated for the next frame */
165 
166  int32 beam; /* Global threshold */
167  int32 pbeam; /* Threshold for phone transition */
168  int32 wbeam; /* Threshold for word exit */
169 
170  int32 frame; /* Current frame */
171 
172  int32 bestscore; /* For beam pruning */
173  int32 bpidx_start; /* First history entry index this frame */
174 
175  srch_hyp_t *hyp; /* Search hypothesis */
176  int32 ascr, lscr; /* Total acoustic and lm score for utt */
177 
178  int32 n_hmm_eval; /* Total HMMs evaluated this utt */
179 
180  int32 state; /* Whether IDLE or BUSY */
181 
183 
184  /*Added by Arthur at 20050627*/
186  int32 isUseFiller;
187  int32 isBacktrace;
189  char* DumpLatdir;
190  int32 n_ci_phone;
191 
196  char* uttid;
197  int32 *senscale;
199  FILE* matchfp;
200  FILE* matchsegfp;
201 
202  cmd_ln_t *config;
203  logmath_t *logmath;
204 } fsg_search_t;
205 
206 
207 /* Access macros */
208 #define fsg_search_frame(s) ((s)->frame)
209 
210 
211 /*
212  * Create, initialize and return a search module for the given FSM.
213  * If no FSG is given (i.e., the argument is NULL), a search structure is
214  * still created. If an FSG is provided, it is made the currently active
215  * FSG.
216  */
217 fsg_search_t *fsg_search_init (word_fsg_t *, void *srch_struct);
218 
219 
224 
225 /*
226  * Lookup the FSG associated with the given name and return it, or NULL if
227  * no match found.
228  */
230 
231 
232 /*
233  * Add the given FSG to the collection of FSGs known to this search object.
234  * The given fsg is simply added to the collection. It is not automatically
235  * made the currently active one.
236  * The name of the new FSG must not match any of the existing ones. If so,
237  * FALSE is returned. If successfully added, TRUE is returned.
238  */
240 
241 
242 /*
243  * Delete the given FSG from the known collection. Free the FSG itself,
244  * and if it was the currently active FSG, also free the associated search
245  * structures and leave the current FSG undefined.
246  */
248 
249 
250 /* Like fsg_search_del_fsg(), but identifies the FSG by its name */
251 int fsg_search_del_fsg_byname (fsg_search_t *, char *name);
252 
253 
254 /*
255  * Switch to a new FSG (identified by its string name). Must not be invoked
256  * when search is busy (ie, in the midst of an utterance. That's an error
257  * and FALSE is returned. If successful, returns TRUE.
258  */
260 
261 
262 /*
263  * Deallocate search structure.
264  */
266 
267 
268 /*
269  * Prepare the FSG search structure for beginning decoding of the next
270  * utterance.
271  */
273 
274 
275 /*
276  * Windup and clean the FSG search structure after utterance. Fill in the
277  * results of search: fsg_search_t.{hyp,ascr,lscr,frame}. (But some fields
278  * of hyp are left unfilled for now: conf, latden, phone_perp.)
279  */
281 
282 
283 /*
284  * Step one frame forward through the Viterbi search.
285  */
287 
288 
289 /*
290  * Compute the partial or final Viterbi backtrace result. (The result can
291  * be retrieved using the API functions seach_result or search_get_hyp().)
292  * If "check_fsg_final_state" is TRUE, the backtrace starts from the best
293  * history entry ending in the final state (if it exists). Otherwise it
294  * starts from the best entry, regardless of the terminating state (usually
295  * used for partial results).
296  */
298  int check_fsg_final_state);
299 
300 /*
301  * Return the start (or final) state of the currently active FSG, if any.
302  * Otherwise return -1.
303  */
306 
307 
308 /*
309  * Set the start (or final) state of the current active FSG, if any, to the
310  * given state. This operation can only be done in between utterances, not
311  * in the midst of one. Return the previous start (or final) state if
312  * successful. Return -1 if any error.
313  */
314 int32 fsg_search_set_start_state (fsg_search_t *, int32 state);
315 int32 fsg_search_set_final_state (fsg_search_t *, int32 state);
316 
317 
318 void fsg_search_sen_active (fsg_search_t *search);
319 
320 #ifdef __cplusplus
321 }
322 #endif
323 
324 
325 #endif
fsg_search_t * fsg_search_init(word_fsg_t *, void *srch_struct)
strcture for storing the model definition.
Definition: mdef.h:184
glist_t pnode_active_next
Definition: fsg_search.h:164
Definition: fsg_search.h:153
int32 fsg_search_get_start_state(fsg_search_t *)
fsg_history_t * history
Definition: fsg_search.h:161
int32 beam
Definition: fsg_search.h:166
a hypothesis structure
The temporary header file for sphinx 3 functions.
void fsg_search_utt_start(fsg_search_t *)
cmd_ln_t * config
Definition: fsg_search.h:202
int32 * senscale
Definition: fsg_search.h:197
int32 fsg_search_get_final_state(fsg_search_t *)
Definition: word_fsg.h:187
int32 wbeam
Definition: fsg_search.h:168
Operations on dictionary.
int32 lscr
Definition: fsg_search.h:176
char * DumpLatdir
Definition: fsg_search.h:189
int32 n_hmm_eval
Definition: fsg_search.h:178
ascr_t * am_score_pool
Definition: fsg_search.h:195
void fsg_search_history_backtrace(fsg_search_t *search, int check_fsg_final_state)
int32 ascr
Definition: fsg_search.h:176
void fsg_search_sen_active(fsg_search_t *search)
HMM data structure and operation.
srch_hyp_t * hyp
Definition: fsg_search.h:175
int32 pbeam
Definition: fsg_search.h:167
int32 state
Definition: fsg_search.h:180
tmat_t * tmat
Definition: fsg_search.h:194
hmm_context_t * hmmctx
Definition: fsg_search.h:182
void fsg_search_utt_end(fsg_search_t *)
int32 fsg_search_set_final_state(fsg_search_t *, int32 state)
glist_t fsglist
Definition: fsg_search.h:154
int32 isUsealtpron
Definition: fsg_search.h:185
Definition: ascr.h:99
int32 isUseFiller
Definition: fsg_search.h:186
int32 fsg_search_set_start_state(fsg_search_t *, int32 state)
int fsg_search_del_fsg_byname(fsg_search_t *, char *name)
Size definition of semantically units. Common for both s3 and s3.X decoder.
FILE * matchfp
Definition: fsg_search.h:199
Shared information between a set of HMMs.
Definition: fsg_lextree.h:117
Definition: fsg_history.h:171
fsg_lextree_t * lextree
Definition: fsg_search.h:159
int32 frame
Definition: fsg_search.h:170
a structure for a dictionary.
Definition: dict.h:146
Transition matrix data structure.
int32 isReportAltpron
Definition: fsg_search.h:188
int32 n_ci_phone
Definition: fsg_search.h:190
int32 bestscore
Definition: fsg_search.h:172
int32 bpidx_start
Definition: fsg_search.h:173
Transition matrix data structure. All phone HMMs are assumed to have the same topology.
Definition: tmat.h:113
void fsg_search_free(fsg_search_t *s)
int fsg_search_del_fsg(fsg_search_t *, word_fsg_t *)
Model definition.
glist_t pnode_active
Definition: fsg_search.h:163
word_fsg_t * fsg
Definition: fsg_search.h:156
char * uttid
Definition: fsg_search.h:196
Wrapper to hold senone scores.
int fsg_search_add_fsg(fsg_search_t *, word_fsg_t *)
struct fsg_search_s fsg_search_t
FILE * matchsegfp
Definition: fsg_search.h:200
mdef_t * mdef
Definition: fsg_search.h:193
int32 isBacktrace
Definition: fsg_search.h:187
logmath_t * logmath
Definition: fsg_search.h:203
int fsg_search_set_current_fsg(fsg_search_t *, char *)
word_fsg_t * fsg_search_fsgname_to_fsg(fsg_search_t *, char *name)
dict_t * dict
Definition: fsg_search.h:192
void fsg_search_frame_fwd(fsg_search_t *)