47 #include <sphinxbase/err.h> 48 #include <sphinxbase/strfuncs.h> 49 #include <sphinxbase/filename.h> 50 #include <sphinxbase/pio.h> 51 #include <sphinxbase/jsgf.h> 52 #include <sphinxbase/hash_table.h> 55 #include "cmdln_macro.h" 60 #include "kws_search.h" 61 #include "fsg_search_internal.h" 65 #include "allphone_search.h" 67 static const arg_t ps_args_def[] = {
74 file_exists(
const char *path)
78 tmp = fopen(path,
"rb");
85 hmmdir_exists(
const char *path)
88 char *mdef = string_join(path,
"/mdef", NULL);
90 tmp = fopen(mdef,
"rb");
98 ps_expand_file_config(
ps_decoder_t *ps,
const char *arg,
const char *extra_arg,
99 const char *hmmdir,
const char *file)
102 if ((val = cmd_ln_str_r(ps->
config, arg)) != NULL) {
103 cmd_ln_set_str_extra_r(ps->
config, extra_arg, val);
104 }
else if (hmmdir == NULL) {
105 cmd_ln_set_str_extra_r(ps->
config, extra_arg, NULL);
107 char *tmp = string_join(hmmdir,
"/", file, NULL);
108 if (file_exists(tmp))
109 cmd_ln_set_str_extra_r(ps->
config, extra_arg, tmp);
111 cmd_ln_set_str_extra_r(ps->
config, extra_arg, NULL);
117 static const arg_t feat_defn[] = {
118 waveform_to_cepstral_command_line_macro(),
119 cepstral_to_feature_command_line_macro(),
126 char const *hmmdir, *featparams;
129 #ifdef __ADSPBLACKFIN__ 130 E_INFO(
"Will not use mmap() on uClinux/Blackfin.");
131 cmd_ln_set_boolean_r(ps->
config,
"-mmap", FALSE);
135 hmmdir = cmd_ln_str_r(ps->
config,
"-hmm");
136 ps_expand_file_config(ps,
"-mdef",
"_mdef", hmmdir,
"mdef");
137 ps_expand_file_config(ps,
"-mean",
"_mean", hmmdir,
"means");
138 ps_expand_file_config(ps,
"-var",
"_var", hmmdir,
"variances");
139 ps_expand_file_config(ps,
"-tmat",
"_tmat", hmmdir,
"transition_matrices");
140 ps_expand_file_config(ps,
"-mixw",
"_mixw", hmmdir,
"mixture_weights");
141 ps_expand_file_config(ps,
"-sendump",
"_sendump", hmmdir,
"sendump");
142 ps_expand_file_config(ps,
"-fdict",
"_fdict", hmmdir,
"noisedict");
143 ps_expand_file_config(ps,
"-lda",
"_lda", hmmdir,
"feature_transform");
144 ps_expand_file_config(ps,
"-featparams",
"_featparams", hmmdir,
"feat.params");
145 ps_expand_file_config(ps,
"-senmgau",
"_senmgau", hmmdir,
"senmgau");
148 if ((featparams = cmd_ln_str_r(ps->
config,
"_featparams"))) {
150 cmd_ln_parse_file_r(ps->
config, feat_defn, featparams, FALSE))
151 E_INFO(
"Parsed model-specific feature parameters from %s\n",
156 if (err_get_logfp() != NULL) {
157 cmd_ln_print_values_r(ps->
config, err_get_logfp(),
ps_args());
165 hash_iter_t *search_it;
166 for (search_it = hash_table_iter(ps->
searches); search_it;
167 search_it = hash_table_iter_next(search_it)) {
168 ps_search_free(hash_entry_val(search_it->ent));
181 hash_table_lookup(ps->
searches, name, &search);
191 const char *hmmdir = cmd_ln_str_r(config,
"-hmm");
192 const char *lmfile = cmd_ln_str_r(config,
"-lm");
193 const char *dictfile = cmd_ln_str_r(config,
"-dict");
195 if (hmmdir == NULL && hmmdir_exists(MODELDIR
"/en-us/en-us")) {
196 hmmdir = MODELDIR
"/en-us/en-us";
197 cmd_ln_set_str_r(config,
"-hmm", hmmdir);
200 if (lmfile == NULL && !cmd_ln_str_r(config,
"-fsg")
201 && !cmd_ln_str_r(config,
"-jsgf")
202 && !cmd_ln_str_r(config,
"-lmctl")
203 && !cmd_ln_str_r(config,
"-kws")
204 && !cmd_ln_str_r(config,
"-keyphrase")
205 && file_exists(MODELDIR
"/en-us/en-us.lm.bin")) {
206 lmfile = MODELDIR
"/en-us/en-us.lm.bin";
207 cmd_ln_set_str_r(config,
"-lm", lmfile);
210 if (dictfile == NULL && file_exists(MODELDIR
"/en-us/cmudict-en-us.dict")) {
211 dictfile = MODELDIR
"/en-us/cmudict-en-us.dict";
212 cmd_ln_set_str_r(config,
"-dict", dictfile);
221 const char *keyphrase;
224 if (config && config != ps->
config) {
225 cmd_ln_free_r(ps->
config);
226 ps->
config = cmd_ln_retain(config);
229 err_set_debug_level(cmd_ln_int32_r(ps->
config,
"-debug"));
232 if (config && cmd_ln_str_r(ps->
config,
"-logfn")) {
233 if (err_set_logfile(cmd_ln_str_r(ps->
config,
"-logfn")) < 0) {
234 E_ERROR(
"Cannot redirect log output\n");
244 ps_expand_model_config(ps);
247 ps_free_searches(ps);
248 ps->
searches = hash_table_new(3, HASH_CASE_YES);
263 if (ps->
lmath == NULL
264 || (logmath_get_base(ps->
lmath) !=
265 (float64)cmd_ln_float32_r(ps->
config,
"-logbase"))) {
267 logmath_free(ps->
lmath);
268 ps->
lmath = logmath_init
269 ((float64)cmd_ln_float32_r(ps->
config,
"-logbase"), 0,
270 cmd_ln_boolean_r(ps->
config,
"-bestpath"));
280 if (cmd_ln_int32_r(ps->
config,
"-pl_window") > 0) {
298 lw = cmd_ln_float32_r(ps->
config,
"-lw");
304 if ((keyphrase = cmd_ln_str_r(ps->
config,
"-keyphrase"))) {
310 if ((path = cmd_ln_str_r(ps->
config,
"-kws"))) {
317 if ((path = cmd_ln_str_r(ps->
config,
"-fsg"))) {
318 fsg_model_t *fsg = fsg_model_readfile(path, ps->
lmath, lw);
330 if ((path = cmd_ln_str_r(ps->
config,
"-jsgf"))) {
336 if ((path = cmd_ln_str_r(ps->
config,
"-allphone"))) {
342 if ((path = cmd_ln_str_r(ps->
config,
"-lm")) &&
343 !cmd_ln_boolean_r(ps->
config,
"-allphone")) {
349 if ((path = cmd_ln_str_r(ps->
config,
"-lmctl"))) {
351 ngram_model_t *lmset;
352 ngram_model_set_iter_t *lmset_it;
354 if (!(lmset = ngram_model_set_read(ps->
config, path, ps->
lmath))) {
355 E_ERROR(
"Failed to read language model control file: %s\n", path);
359 for(lmset_it = ngram_model_set_iter(lmset);
360 lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
361 ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);
362 E_INFO(
"adding search %s\n", name);
364 ngram_model_set_iter_free(lmset_it);
365 ngram_model_free(lmset);
369 ngram_model_free(lmset);
371 name = cmd_ln_str_r(ps->
config,
"-lmname");
375 E_ERROR(
"No default LM name (-lmname) for `-lmctl'\n");
381 ps->
perf.name =
"decode";
382 ptmr_init(&ps->
perf);
393 E_ERROR(
"No configuration specified");
397 ps = ckd_calloc(1,
sizeof(*ps));
426 ps_free_searches(ps);
430 logmath_free(ps->
lmath);
431 cmd_ln_free_r(ps->
config);
472 E_ERROR(
"Cannot change search while decoding, end utterance first\n");
476 if (!(search = ps_find_search(ps, name))) {
482 if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
494 hash_iter_t *search_it;
495 const char* name = NULL;
496 for (search_it = hash_table_iter(ps->
searches); search_it;
497 search_it = hash_table_iter_next(search_it)) {
498 if (hash_entry_val(search_it->ent) == ps->
search) {
499 name = hash_entry_key(search_it->ent);
514 ps_search_free(search);
533 return (
const char*)(((hash_iter_t *)itor)->ent->key);
539 hash_table_iter_free((hash_iter_t *)itor);
546 if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search)))
555 if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search)))
564 if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search)))
566 return search ? kws_search_get_keyphrases(search) : NULL;
578 old_search = (
ps_search_t *) hash_table_replace(ps->
searches, ps_search_name(search), search);
579 if (old_search != search)
580 ps_search_free(old_search);
590 return set_search_internal(ps, search);
599 lm = ngram_model_read(ps->
config, path, NGRAM_AUTO, ps->
lmath);
604 ngram_model_free(lm);
613 return set_search_internal(ps, search);
624 lm = ngram_model_read(ps->
config, path, NGRAM_AUTO, ps->
lmath);
627 ngram_model_free(lm);
635 search = kws_search_init(name, NULL, keyfile, ps->
config, ps->
acmod, ps->
dict, ps->
d2p);
636 return set_search_internal(ps, search);
643 search = kws_search_init(name, keyphrase, NULL, ps->
config, ps->
acmod, ps->
dict, ps->
d2p);
644 return set_search_internal(ps, search);
652 return set_search_internal(ps, search);
661 jsgf_t *jsgf = jsgf_parse_file(path, NULL);
670 if ((toprule = cmd_ln_str_r(ps->
config,
"-toprule"))) {
671 rule = jsgf_get_rule(jsgf, toprule);
673 E_ERROR(
"Start rule %s not found\n", toprule);
674 jsgf_grammar_free(jsgf);
678 rule = jsgf_get_public_rule(jsgf);
680 E_ERROR(
"No public rules found in %s\n", path);
681 jsgf_grammar_free(jsgf);
686 lw = cmd_ln_float32_r(ps->
config,
"-lw");
687 fsg = jsgf_build_fsg(jsgf, rule, ps->
lmath, lw);
690 jsgf_grammar_free(jsgf);
700 jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
709 if ((toprule = cmd_ln_str_r(ps->
config,
"-toprule"))) {
710 rule = jsgf_get_rule(jsgf, toprule);
712 E_ERROR(
"Start rule %s not found\n", toprule);
716 rule = jsgf_get_public_rule(jsgf);
718 E_ERROR(
"No public rules found in input string\n");
723 lw = cmd_ln_float32_r(ps->
config,
"-lw");
724 fsg = jsgf_build_fsg(jsgf, rule, ps->
lmath, lw);
733 char const *fdictfile,
char const *format)
737 hash_iter_t *search_it;
742 newconfig = cmd_ln_init(NULL,
ps_args(), TRUE, NULL);
743 cmd_ln_set_boolean_r(newconfig,
"-dictcase",
744 cmd_ln_boolean_r(ps->
config,
"-dictcase"));
745 cmd_ln_set_str_r(newconfig,
"-dict", dictfile);
747 cmd_ln_set_str_extra_r(newconfig,
"_fdict", fdictfile);
749 cmd_ln_set_str_extra_r(newconfig,
"_fdict",
750 cmd_ln_str_r(ps->
config,
"_fdict"));
754 cmd_ln_free_r(newconfig);
760 cmd_ln_free_r(newconfig);
766 cmd_ln_free_r(newconfig);
773 for (search_it = hash_table_iter(ps->
searches); search_it;
774 search_it = hash_table_iter_next(search_it)) {
775 if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
776 hash_table_iter_free(search_it);
799 hash_iter_t *search_it;
800 char **phonestr, *tmp;
804 tmp = ckd_salloc(phones);
805 np = str2words(tmp, NULL, 0);
806 phonestr = ckd_calloc(np,
sizeof(*phonestr));
807 str2words(tmp, phonestr, np);
808 pron = ckd_calloc(np,
sizeof(*pron));
809 for (i = 0; i < np; ++i) {
812 E_ERROR(
"Unknown phone %s in phone string %s\n",
836 for (search_it = hash_table_iter(ps->
searches); search_it;
837 search_it = hash_table_iter_next(search_it)) {
838 ps_search_t *search = hash_entry_val(search_it->ent);
839 if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
841 if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
842 hash_table_iter_free(search_it);
848 if ((rv = ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)) {
849 hash_table_iter_free(search_it);
871 for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
873 phones = ckd_calloc(1, phlen);
874 for (j = 0; j < dict_pronlen(dict, wid); ++j) {
876 if (j != dict_pronlen(dict, wid) - 1)
887 long total, pos, endpos;
894 if (maxsamps != -1) {
895 data = ckd_calloc(maxsamps,
sizeof(*data));
896 total = fread(data,
sizeof(*data), maxsamps, rawfh);
899 }
else if ((pos = ftell(rawfh)) >= 0) {
900 fseek(rawfh, 0, SEEK_END);
901 endpos = ftell(rawfh);
902 fseek(rawfh, pos, SEEK_SET);
903 maxsamps = endpos - pos;
905 data = ckd_calloc(maxsamps,
sizeof(*data));
906 total = fread(data,
sizeof(*data), maxsamps, rawfh);
912 while (!feof(rawfh)) {
916 nread = fread(data,
sizeof(*data),
sizeof(data)/
sizeof(*data), rawfh);
939 E_ERROR(
"Utterance already started\n");
944 E_ERROR(
"No search module is selected, did you forget to " 945 "specify a language model or grammar?\n");
949 ptmr_reset(&ps->
perf);
950 ptmr_start(&ps->
perf);
952 sprintf(uttid,
"%09u", ps->
uttno);
967 char *logfn = string_join(ps->
mfclogdir,
"/",
968 uttid,
".mfc", NULL);
970 E_INFO(
"Writing MFCC file: %s\n", logfn);
971 if ((mfcfh = fopen(logfn,
"wb")) == NULL) {
972 E_ERROR_SYSTEM(
"Failed to open MFCC file %s", logfn);
980 char *logfn = string_join(ps->
rawlogdir,
"/",
981 uttid,
".raw", NULL);
983 E_INFO(
"Writing raw audio file: %s\n", logfn);
984 if ((rawfh = fopen(logfn,
"wb")) == NULL) {
985 E_ERROR_SYSTEM(
"Failed to open raw audio file %s", logfn);
993 char *logfn = string_join(ps->
senlogdir,
"/",
994 uttid,
".sen", NULL);
996 E_INFO(
"Writing senone score file: %s\n", logfn);
997 if ((senfh = fopen(logfn,
"wb")) == NULL) {
998 E_ERROR_SYSTEM(
"Failed to open senone score file %s", logfn);
1010 return ps_search_start(ps->
search);
1025 if ((k = ps_search_step(ps->
search,
1038 int nfr, n_searchfr;
1044 if ((nfr = ps_search_forward(ps)) < 0) {
1066 E_ERROR(
"Failed to process data, utterance is not started. Use start_utt to start it\n");
1078 &n_samples, full_utt)) < 0)
1084 if ((nfr = ps_search_forward(ps)) < 0)
1109 &n_frames, full_utt)) < 0)
1115 if ((nfr = ps_search_forward(ps)) < 0)
1129 E_ERROR(
"Utterance is not started\n");
1135 if ((rv = ps_search_forward(ps)) < 0) {
1136 ptmr_stop(&ps->
perf);
1141 if ((rv = ps_search_finish(ps->
phone_loop)) < 0) {
1142 ptmr_stop(&ps->
perf);
1149 i < ps->acmod->output_frame; ++i)
1150 ps_search_step(ps->
search, i);
1153 if ((rv = ps_search_finish(ps->
search)) < 0) {
1154 ptmr_stop(&ps->
perf);
1157 ptmr_stop(&ps->
perf);
1160 if (cmd_ln_boolean_r(ps->
config,
"-backtrace")) {
1168 E_INFO(
"%s (%d)\n", hyp, score);
1169 E_INFO_NOFN(
"%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
1170 "word",
"start",
"end",
"pprob",
"ascr",
"lscr",
"lback");
1175 int32 post, lscr, ascr, lback;
1180 E_INFO_NOFN(
"%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
1194 ptmr_start(&ps->
perf);
1195 hyp = ps_search_hyp(ps->
search, out_best_score);
1196 ptmr_stop(&ps->
perf);
1205 ptmr_start(&ps->
perf);
1206 prob = ps_search_prob(ps->
search);
1207 ptmr_stop(&ps->
perf);
1216 ptmr_start(&ps->
perf);
1217 itor = ps_search_seg_iter(ps->
search);
1218 ptmr_stop(&ps->
perf);
1225 return ps_search_seg_next(seg);
1239 if (out_sf) *out_sf = seg->
sf + uf;
1240 if (out_ef) *out_ef = seg->
ef + uf;
1246 if (out_ascr) *out_ascr = seg->
ascr;
1247 if (out_lscr) *out_lscr = seg->
lscr;
1248 if (out_lback) *out_lback = seg->
lback;
1255 ps_search_seg_free(seg);
1261 return ps_search_lattice(ps->
search);
1268 ngram_model_t *lmset;
1280 if (0 != strcmp(ps_search_type(ps->
search), PS_SEARCH_TYPE_NGRAM)) {
1317 assert(nbest != NULL);
1319 if (nbest->top == NULL)
1321 if (out_score) *out_score = nbest->top->
score;
1328 if (nbest->top == NULL)
1342 double *out_ncpu,
double *out_nwall)
1346 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1348 *out_ncpu = ps->
perf.t_cpu;
1349 *out_nwall = ps->
perf.t_elapsed;
1354 double *out_ncpu,
double *out_nwall)
1358 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1359 *out_nspeech = (double)ps->
n_frame / frate;
1360 *out_ncpu = ps->
perf.t_tot_cpu;
1361 *out_nwall = ps->
perf.t_tot_elapsed;
1367 return fe_get_vad_state(ps->
acmod->
fe);
1378 search->name = ckd_salloc(name);
1379 search->type = ckd_salloc(type);
1382 search->
acmod = acmod;
1389 search->
start_wid = dict_startwid(dict);
1395 search->
dict = NULL;
1406 ckd_free(search->name);
1407 ckd_free(search->type);
1423 search->
start_wid = dict_startwid(dict);
1429 search->
dict = NULL;
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Implementation of FSG search (and "FSG set") structure.
ptmr_t perf
Performance counter for all of decoding.
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Internal implementation of PocketSphinx decoder.
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
POCKETSPHINX_EXPORT void ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
Sets the limit of the raw audio data to store in decoder to retrieve it later on ps_get_rawdata.
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lm(ps_decoder_t *ps, const char *name)
Get the language model set object for this decoder.
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
Base structure for search module.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
Utterance started, no data yet.
dict_t * dict
Pronunciation dictionary.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
POCKETSPHINX_EXPORT const char * ps_get_kws(ps_decoder_t *ps, const char *name)
Get the current Key phrase to spot.
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
Adds new search based on finite state grammar.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
int32 silence_wid
Silence word ID.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Main header file for the PocketSphinx decoder.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
int32 finish_wid
Finish word ID.
fe_t * fe
Acoustic feature computation.
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
POCKETSPHINX_EXPORT int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
Adds keyphrases from a file to spotting.
POCKETSPHINX_EXPORT uint8 ps_get_in_speech(ps_decoder_t *ps)
Checks if the last feed audio buffer contained speech.
Utterance ended, still buffering.
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
#define BAD_S3WID
Dictionary word id.
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter(ps_decoder_t *ps)
Returns iterator over current searches.
char const * mfclogdir
Log directory for MFCC files.
POCKETSPHINX_EXPORT void ps_default_search_args(cmd_ln_t *)
Sets default grammar and language model if they are not set explicitly and are present in the default...
Word graph search implementation.
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
POCKETSPHINX_EXPORT char * ps_lookup_word(ps_decoder_t *ps, const char *word)
Lookup for the word in the dictionary and return phone transcription for it.
int refcount
Reference count.
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
int32 prob
Log posterior probability.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
POCKETSPHINX_EXPORT long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, long maxsamps)
Decode a raw audio stream.
char const * word
Word string (pointer into dictionary hash)
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
acmod_t * acmod
Acoustic model.
ps_search_t * search
Search object from whence this came.
ps_search_t * search
Currently active search module.
Lexicon tree based Viterbi search.
hash_table_t * searches
Set of search modules.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps)
Get posterior probability.
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
N-Gram search module structure.
POCKETSPHINX_EXPORT int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on N-gram language model.
dict2pid_t * d2p
Dictionary to senone mappings.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
Get hypothesis string and path score.
char const * rawlogdir
Log directory for audio files.
void ps_search_base_free(ps_search_t *search)
Free search.
POCKETSPHINX_EXPORT int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
Adds new search using JSGF model.
frame_idx_t output_frame
Index of next frame of dynamic features.
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
N-Gram based multi-pass search ("FBS")
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
int acmod_advance(acmod_t *acmod)
Advance the frame index.
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest)
Get the word segmentation from an N-best list iterator.
uint8 state
State of utterance processing.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps)
Get an iterator over the word segmentation for the best hypothesis.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
POCKETSPHINX_EXPORT int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on N-gram language model.
ps_search_t * phone_loop
Phone loop search for lookahead.
Feature space linear transform structure.
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
POCKETSPHINX_EXPORT int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on phone N-gram language model.
a structure for a dictionary.
feat_t * fcb
Dynamic feature computation.
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
POCKETSPHINX_EXPORT int ps_unset_search(ps_decoder_t *ps, const char *name)
Unsets the search and releases related resources.
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
uint32 n_frame
Total number of frames processed.
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter_next(ps_search_iter_t *itor)
Updates search iterator to point to the next position.
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
POCKETSPHINX_EXPORT int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search using JSGF model.
Fast and rough context-independent phoneme loop search.
dict2pid_t * d2p
Dictionary to senone mapping.
POCKETSPHINX_EXPORT fsg_model_t * ps_get_fsg(ps_decoder_t *ps, const char *name)
Get the finite-state grammar set object for this decoder.
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
Partial path structure used in N-best (A*) search.
dict_t * dict
Pronunciation dictionary.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Flat lexicon based Viterbi search.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
int32 lback
Language model backoff.
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
POCKETSPHINX_EXPORT int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
Adds new keyphrase to spot.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps)
Get an iterator over the best hypotheses.
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
bin_mdef_t * mdef
Model definition.
POCKETSPHINX_EXPORT void ps_search_iter_free(ps_search_iter_t *itor)
Delete an unfinished search iterator.
ps_latlink_t * last_link
Final link in best path.
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
POCKETSPHINX_EXPORT int ps_start_stream(ps_decoder_t *ps)
Start processing of the stream of speech.
POCKETSPHINX_EXPORT int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on phone N-gram language model.
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
V-table for search algorithm.
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps)
Start utterance processing.
ps_search_t * pls
Phoneme loop for lookahead.
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
ps_lattice_t * dag
Current hypothesis word graph.
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
#define dict_size(d)
Packaged macro access to dictionary members.
int32 score
Exact score from start node up to node->sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
POCKETSPHINX_EXPORT const char * ps_search_iter_val(ps_search_iter_t *itor)
Retrieves the name of the search the iterator points to.
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Acoustic model structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
Decode a senone score dump file.
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT const char * ps_get_search(ps_decoder_t *ps)
Returns name of curent search in decoder.
int pl_window
Window size for phoneme lookahead.
POCKETSPHINX_EXPORT void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
POCKETSPHINX_EXPORT int ps_set_search(ps_decoder_t *ps, const char *name)
Actives search with the provided name.