s3_cfg.h
Go to the documentation of this file.
1 /* ====================================================================
2  * Copyright (c) 1996-2005 Carnegie Mellon University. All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * This work was supported in part by funding from the Defense Advanced
18  * Research Projects Agency and the National Science Foundation of the
19  * United States of America, and the CMU Sphinx Speech Consortium.
20  *
21  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * ====================================================================
34  */
35 
36 /* Sphinx3 Context Free Grammar Parser
37  *
38  * The purpose here is to create a parser that can handle multiple input
39  * streams at the same time. The implementation is based on the Earley
40  * algorithm.
41  *
42  * The legal rules are in the form
43  *
44  * 0.33 $rule1 N product1 product2 ... productN
45  *
46  * 0.33 is a float32 indicating the score (or probability) of this rule being
47  * applied. $rule1 is the name of a non-terminal to be expanded. And
48  * product[1...N] is a string of (non-)terminals that $rule1 will expand to.
49  *
50  * The rules are read from a file, if that is not obviously indicated by the
51  * API.
52  */
53 
54 #ifndef _S3_CONTEXT_FREE_GRAMMAR_H
55 #define _S3_CONTEXT_FREE_GRAMMAR_H
56 
57 #include <stdio.h>
58 
59 #include <logmath.h>
60 #include "prim_type.h"
61 #include "hash_table.h"
62 #include "s3_arraylist.h"
63 #include "fsg.h"
64 
65 #ifdef __cplusplus
66 extern "C" {
67 #endif
68 
69 #define S3_CFG_MAX_RULE_STR_LEN 1023
70 #define S3_CFG_MAX_ITEM_STR_LEN 40
71 #define S3_CFG_MAX_ITEM_COUNT 20
72 #define S3_CFG_INITIAL_RULE_COUNT 1
73 #define S3_CFG_INITIAL_RULE_SET_COUNT 50
74 #define S3_CFG_INITIAL_PARSE_SET_COUNT 20
75 #define S3_CFG_PARSE_HASH_SIZE 251
76 #define S3_CFG_INITIAL_STATE_SET_COUNT 20
77 #define S3_CFG_INITIAL_TERM_COUNT 50
78 #define S3_CFG_NAME_HASH_SIZE 4091
79 
80 #define S3_CFG_INVALID_SCORE 1.0f
81 #define S3_CFG_INITIAL_SCORE 0.0f
82 
83 #define S3_CFG_NONTERM_PREFIX '$'
84 #define S3_CFG_TERM_BIT 0x80000000
85 #define S3_CFG_INDEX_MASK 0x7FFFFFFF
86 
87 #define S3_CFG_INVALID_ID 0x7FFFFFFF
88 
89 #define S3_CFG_PSTART_ITEM 0x00000000
90 #define S3_CFG_PSTART_ITEM_STR "$PSTART"
91 #define S3_CFG_START_ITEM 0x00000001
92 #define S3_CFG_START_ITEM_STR "$START"
93 #define S3_CFG_EOR_ITEM (0x00000002 | S3_CFG_TERM_BIT)
94 #define S3_CFG_EOR_ITEM_STR "#EOR#"
95 #define S3_CFG_EOI_ITEM (0x00000003 | S3_CFG_TERM_BIT)
96 #define S3_CFG_EOI_ITEM_STR "#EOI#"
97 #define S3_CFG_NIL_ITEM (0x00000004 | S3_CFG_TERM_BIT)
98 #define S3_CFG_NIL_ITEM_STR "#NIL#"
99 
100 #define S3_CFG_START_RULE \
101  { S3_CFG_PSTART_ITEM, 0.0f, { S3_CFG_START_ITEM, S3_CFG_EOR_ITEM }, 1 }
102 
103 #define S3_CFG_AUTO_PRUNE_SCORE 0x00000001
104 #define S3_CFG_AUTO_PRUNE_RANK 0x00000002
105 
106 #define s3_cfg_is_null_parse(x) (x->entries.count == 0)
107 
108 #define s3_cfg_is_terminal(x) (x & S3_CFG_TERM_BIT)
109 
110 #define s3_cfg_id2index(x) (x & S3_CFG_INDEX_MASK)
111 
112 typedef uint32 s3_cfg_id_t;
113 
114 typedef struct s3_cfg_rule_s {
116 
117  /* arbitrary floating point score */
118  float32 score;
119  /* normalized probability score */
120  float32 prob_score;
121  /* probability fed to logs3 */
122  int32 log_score;
123 
125  int len;
126 } s3_cfg_rule_t;
127 
128 typedef struct {
130  char *name;
133 } s3_cfg_item_t;
134 
135 struct s3_cfg_state_s;
136 typedef struct s3_cfg_entry_s {
138  int dot;
140  int32 score;
144 
145 
146 typedef struct s3_cfg_state_s {
151 
156 
159 
160 typedef struct {
163  hash_table_t *name2id;
164 
165  int8 *predictions;
166 } s3_cfg_t;
167 
174 void
175 s3_cfg_init(s3_cfg_t *_cfg);
176 
177 
184 void
185 s3_cfg_close(s3_cfg_t *_cfg);
186 
187 
196 s3_cfg_t *
197 s3_cfg_read_simple(const char *_fn);
198 
199 
208 s3_cfg_t *
209 s3_cfg_read_srgs(const char *_fn);
210 
211 
219 void
220 s3_cfg_write_simple(s3_cfg_t *_cfg, const char *_fn);
221 
222 
230 s2_fsg_t *
231 s3_cfg_convert_to_fsg(s3_cfg_t *_cfg, int _max_expansion);
232 
233 
234 /*
235  *
236  */
237 void
238 s3_cfg_rescore(s3_cfg_t *_cfg, logmath_t *logmath);
239 
240 
250 
251 
260 
261 
268 void
270 
271 
272 /*
273  *
274  */
275 void
277 
278 
288 
289 
301 s3_cfg_add_rule(s3_cfg_t *_cfg, s3_cfg_id_t _src, float32 _fake_score,
302  s3_cfg_id_t *_products);
303 
304 
312 void
313 s3_cfg_compile_rules(s3_cfg_t *_cfg, logmath_t *logmath);
314 
315 
323 void
324 s3_cfg_print_rule(s3_cfg_t *_cfg, s3_cfg_rule_t *_rule, FILE *_out);
325 
326 
335 void
336 s3_cfg_print_entry(s3_cfg_t *_cfg, s3_cfg_entry_t *_entry, FILE *_out);
337 
338 
347 void
348 s3_cfg_print_parse(s3_cfg_t *_cfg, s3_cfg_entry_t *_parse, FILE *_out);
349 
350 
360 s3_cfg_str2id(s3_cfg_t *_cfg, char *_item);
361 
362 
370 const char *
372 
373 #ifdef __cplusplus
374 }
375 #endif
376 #endif
377 
s3_cfg_id_t input
Definition: s3_cfg.h:147
s3_cfg_rule_t * rule
Definition: s3_cfg.h:137
void s3_cfg_free_parse_tree(s3_cfg_t *_cfg, s3_cfg_state_t *_parse)
int32 log_score
Definition: s3_cfg.h:122
void s3_cfg_close(s3_cfg_t *_cfg)
s3_cfg_entry_t * best_completed_entry
Definition: s3_cfg.h:152
s3_arraylist_t entries
Definition: s3_cfg.h:148
s3_cfg_id_t src
Definition: s3_cfg.h:115
int8 * predictions
Definition: s3_cfg.h:165
Definition: s3_cfg.h:136
struct s3_cfg_entry_s * complete
Definition: s3_cfg.h:142
void s3_cfg_rescore(s3_cfg_t *_cfg, logmath_t *logmath)
hash_table_t * name2id
Definition: s3_cfg.h:163
s3_cfg_state_t * s3_cfg_input_term(s3_cfg_t *_cfg, s3_cfg_state_t *_cur, s3_cfg_id_t _term)
S3DECODER_EXPORT s3_cfg_t * s3_cfg_read_simple(const char *_fn)
Definition: s3_cfg.h:114
S3DECODER_EXPORT void s3_cfg_compile_rules(s3_cfg_t *_cfg, logmath_t *logmath)
struct s3_cfg_state_s s3_cfg_state_t
s3_arraylist_t item_info
Definition: s3_cfg.h:162
s3_cfg_entry_t * best_overall_parse
Definition: s3_cfg.h:155
int num_expanded
Definition: s3_cfg.h:157
s3_arraylist_t expansions
Definition: s3_cfg.h:149
Definition: fsg.h:58
s3_cfg_t * s3_cfg_read_srgs(const char *_fn)
void s3_cfg_print_parse(s3_cfg_t *_cfg, s3_cfg_entry_t *_parse, FILE *_out)
Definition: s3_cfg.h:160
void s3_cfg_print_rule(s3_cfg_t *_cfg, s3_cfg_rule_t *_rule, FILE *_out)
struct s3_cfg_state_s * back
Definition: s3_cfg.h:150
void s3_cfg_init(s3_cfg_t *_cfg)
uint32 s3_cfg_id_t
Definition: s3_cfg.h:112
void s3_cfg_print_entry(s3_cfg_t *_cfg, s3_cfg_entry_t *_entry, FILE *_out)
struct s3_cfg_state_s * origin
Definition: s3_cfg.h:139
s3_cfg_entry_t * best_overall_entry
Definition: s3_cfg.h:153
struct s3_cfg_entry_s * back
Definition: s3_cfg.h:141
s3_cfg_state_t * s3_cfg_create_parse(s3_cfg_t *_cfg)
s3_arraylist_t rules
Definition: s3_cfg.h:161
struct s3_cfg_rule_s s3_cfg_rule_t
#define S3DECODER_EXPORT
Definition: sphinx3_export.h:15
void s3_cfg_write_simple(s3_cfg_t *_cfg, const char *_fn)
S3DECODER_EXPORT s2_fsg_t * s3_cfg_convert_to_fsg(s3_cfg_t *_cfg, int _max_expansion)
Definition: s3_cfg.h:146
char * name
Definition: s3_cfg.h:130
int len
Definition: s3_cfg.h:125
int32 score
Definition: s3_cfg.h:140
s3_cfg_id_t * products
Definition: s3_cfg.h:124
s3_cfg_id_t id
Definition: s3_cfg.h:129
s3_arraylist_t rules
Definition: s3_cfg.h:131
s3_cfg_rule_t * nil_rule
Definition: s3_cfg.h:132
s3_cfg_item_t * s3_cfg_get_term_info(s3_cfg_t *_cfg, s3_cfg_id_t _id)
struct s3_cfg_entry_s s3_cfg_entry_t
s3_cfg_id_t s3_cfg_str2id(s3_cfg_t *_cfg, char *_item)
s3_cfg_entry_t * best_completed_parse
Definition: s3_cfg.h:154
Definition: s3_arraylist.h:16
s3_cfg_rule_t * s3_cfg_add_rule(s3_cfg_t *_cfg, s3_cfg_id_t _src, float32 _fake_score, s3_cfg_id_t *_products)
float32 prob_score
Definition: s3_cfg.h:120
float32 score
Definition: s3_cfg.h:118
int dot
Definition: s3_cfg.h:138
Definition: s3_cfg.h:128
void s3_cfg_free_parse(s3_cfg_t *_cfg, s3_cfg_state_t *_parse)
const char * s3_cfg_id2str(s3_cfg_t *_cfg, s3_cfg_id_t _id)