PocketSphinx  0.6
src/libpocketsphinx/pocketsphinx.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers. */
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <sphinxbase/err.h>
00044 #include <sphinxbase/strfuncs.h>
00045 #include <sphinxbase/filename.h>
00046 #include <sphinxbase/pio.h>
00047 
00048 /* Local headers. */
00049 #include "cmdln_macro.h"
00050 #include "pocketsphinx_internal.h"
00051 #include "ps_lattice_internal.h"
00052 #include "phone_loop_search.h"
00053 #include "fsg_search_internal.h"
00054 #include "ngram_search.h"
00055 #include "ngram_search_fwdtree.h"
00056 #include "ngram_search_fwdflat.h"
00057 
00058 static const arg_t ps_args_def[] = {
00059     POCKETSPHINX_OPTIONS,
00060     CMDLN_EMPTY_OPTION
00061 };
00062 
00063 /* I'm not sure what the portable way to do this is. */
00064 static int
00065 file_exists(const char *path)
00066 {
00067     FILE *tmp;
00068 
00069     tmp = fopen(path, "rb");
00070     if (tmp) fclose(tmp);
00071     return (tmp != NULL);
00072 }
00073 
00074 static int
00075 hmmdir_exists(const char *path)
00076 {
00077     FILE *tmp;
00078     char *mdef = string_join(path, "/mdef", NULL);
00079 
00080     tmp = fopen(mdef, "rb");
00081     if (tmp) fclose(tmp);
00082     ckd_free(mdef);
00083     return (tmp != NULL);
00084 }
00085 
00086 static void
00087 ps_add_file(ps_decoder_t *ps, const char *arg,
00088             const char *hmmdir, const char *file)
00089 {
00090     char *tmp = string_join(hmmdir, "/", file, NULL);
00091 
00092     if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
00093         cmd_ln_set_str_r(ps->config, arg, tmp);
00094     ckd_free(tmp);
00095 }
00096 
00097 static void
00098 ps_init_defaults(ps_decoder_t *ps)
00099 {
00100     char const *hmmdir, *lmfile, *dictfile;
00101 
00102     /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
00103 #ifdef __ADSPBLACKFIN__
00104     E_INFO("Will not use mmap() on uClinux/Blackfin.");
00105     cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
00106 #endif
00107 
00108 #ifdef MODELDIR
00109     /* Set default acoustic and language models. */
00110     hmmdir = cmd_ln_str_r(ps->config, "-hmm");
00111     lmfile = cmd_ln_str_r(ps->config, "-lm");
00112     dictfile = cmd_ln_str_r(ps->config, "-dict");
00113     if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) {
00114         hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k";
00115         cmd_ln_set_str_r(ps->config, "-hmm", hmmdir);
00116     }
00117     if (lmfile == NULL && !cmd_ln_str_r(ps->config, "-fsg")
00118         && !cmd_ln_str_r(ps->config, "-jsgf")
00119         && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) {
00120         lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP";
00121         cmd_ln_set_str_r(ps->config, "-lm", lmfile);
00122     }
00123     if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) {
00124         dictfile = MODELDIR "/lm/en_US/cmu07a.dic";
00125         cmd_ln_set_str_r(ps->config, "-dict", dictfile);
00126     }
00127 
00128     /* Expand acoustic and language model filenames relative to installation path. */
00129     if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
00130         char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
00131         if (hmmdir_exists(tmphmm)) {
00132             cmd_ln_set_str_r(ps->config, "-hmm", tmphmm);
00133         } else {
00134             E_ERROR("Failed to find mdef file inside the model folder specified with -hmm '%s'\n", hmmdir);
00135         }
00136         ckd_free(tmphmm);
00137     }
00138     if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
00139         char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
00140         cmd_ln_set_str_r(ps->config, "-lm", tmplm);
00141         ckd_free(tmplm);
00142     }
00143     if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
00144         char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
00145         cmd_ln_set_str_r(ps->config, "-dict", tmpdict);
00146         ckd_free(tmpdict);
00147     }
00148 #endif
00149 
00150     /* Get acoustic model filenames and add them to the command-line */
00151     if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
00152         ps_add_file(ps, "-mdef", hmmdir, "mdef");
00153         ps_add_file(ps, "-mean", hmmdir, "means");
00154         ps_add_file(ps, "-var", hmmdir, "variances");
00155         ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
00156         ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
00157         ps_add_file(ps, "-sendump", hmmdir, "sendump");
00158         ps_add_file(ps, "-fdict", hmmdir, "noisedict");
00159         ps_add_file(ps, "-lda", hmmdir, "feature_transform");
00160         ps_add_file(ps, "-featparams", hmmdir, "feat.params");
00161         ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
00162     }
00163 }
00164 
00165 static void
00166 ps_free_searches(ps_decoder_t *ps)
00167 {
00168     gnode_t *gn;
00169 
00170     if (ps->searches == NULL)
00171         return;
00172 
00173     for (gn = ps->searches; gn; gn = gnode_next(gn))
00174         ps_search_free(gnode_ptr(gn));
00175     glist_free(ps->searches);
00176     ps->searches = NULL;
00177     ps->search = NULL;
00178 }
00179 
00180 static ps_search_t *
00181 ps_find_search(ps_decoder_t *ps, char const *name)
00182 {
00183     gnode_t *gn;
00184 
00185     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00186         if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
00187             return (ps_search_t *)gnode_ptr(gn);
00188     }
00189     return NULL;
00190 }
00191 
00192 int
00193 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
00194 {
00195     char const *lmfile, *lmctl = NULL;
00196 
00197     if (config && config != ps->config) {
00198         cmd_ln_free_r(ps->config);
00199         ps->config = config;
00200     }
00201 #ifndef _WIN32_WCE
00202     /* Set up logging. */
00203     if (cmd_ln_str_r(ps->config, "-logfn"))
00204         err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
00205 #endif
00206     err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
00207     ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
00208     ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
00209     ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
00210 
00211     /* Fill in some default arguments. */
00212     ps_init_defaults(ps);
00213 
00214     /* Free old searches (do this before other reinit) */
00215     ps_free_searches(ps);
00216 
00217     /* Free old acmod. */
00218     acmod_free(ps->acmod);
00219     ps->acmod = NULL;
00220 
00221     /* Free old dictionary (must be done after the two things above) */
00222     dict_free(ps->dict);
00223     ps->dict = NULL;
00224 
00225 
00226     /* Logmath computation (used in acmod and search) */
00227     if (ps->lmath == NULL
00228         || (logmath_get_base(ps->lmath) != 
00229             (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
00230         if (ps->lmath)
00231             logmath_free(ps->lmath);
00232         ps->lmath = logmath_init
00233             ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
00234              cmd_ln_boolean_r(ps->config, "-bestpath"));
00235     }
00236 
00237     /* Acoustic model (this is basically everything that
00238      * uttproc.c, senscr.c, and others used to do) */
00239     if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
00240         return -1;
00241     /* Make the acmod's feature buffer growable if we are doing two-pass search. */
00242     if (cmd_ln_boolean_r(ps->config, "-fwdflat")
00243         && cmd_ln_boolean_r(ps->config, "-fwdtree"))
00244         acmod_set_grow(ps->acmod, TRUE);
00245 
00246     if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
00247         /* Initialize an auxiliary phone loop search, which will run in
00248          * "parallel" with FSG or N-Gram search. */
00249         if ((ps->phone_loop = phone_loop_search_init(ps->config,
00250                                                      ps->acmod, ps->dict)) == NULL)
00251             return -1;
00252         ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
00253     }
00254 
00255     /* Dictionary and triphone mappings (depends on acmod). */
00256     /* FIXME: pass config, change arguments, implement LTS, etc. */
00257     if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
00258         return -1;
00259 
00260     /* Determine whether we are starting out in FSG or N-Gram search mode. */
00261     if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
00262         ps_search_t *fsgs;
00263 
00264         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00265             return -1;
00266         if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00267             return -1;
00268         fsgs->pls = ps->phone_loop;
00269         ps->searches = glist_add_ptr(ps->searches, fsgs);
00270         ps->search = fsgs;
00271     }
00272     else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
00273              || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
00274         ps_search_t *ngs;
00275 
00276         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00277             return -1;
00278         if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00279             return -1;
00280         ngs->pls = ps->phone_loop;
00281         ps->searches = glist_add_ptr(ps->searches, ngs);
00282         ps->search = ngs;
00283     }
00284     /* Otherwise, we will initialize the search whenever the user
00285      * decides to load an FSG or a language model. */
00286     else {
00287         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00288             return -1;
00289     }
00290 
00291     /* Initialize performance timer. */
00292     ps->perf.name = "decode";
00293     ptmr_init(&ps->perf);
00294 
00295     return 0;
00296 }
00297 
00298 ps_decoder_t *
00299 ps_init(cmd_ln_t *config)
00300 {
00301     ps_decoder_t *ps;
00302 
00303     ps = ckd_calloc(1, sizeof(*ps));
00304     ps->refcount = 1;
00305     if (ps_reinit(ps, config) < 0) {
00306         ps_free(ps);
00307         return NULL;
00308     }
00309     return ps;
00310 }
00311 
00312 arg_t const *
00313 ps_args(void)
00314 {
00315     return ps_args_def;
00316 }
00317 
00318 ps_decoder_t *
00319 ps_retain(ps_decoder_t *ps)
00320 {
00321     ++ps->refcount;
00322     return ps;
00323 }
00324 
00325 int
00326 ps_free(ps_decoder_t *ps)
00327 {
00328     gnode_t *gn;
00329 
00330     if (ps == NULL)
00331         return 0;
00332     if (--ps->refcount > 0)
00333         return ps->refcount;
00334     for (gn = ps->searches; gn; gn = gnode_next(gn))
00335         ps_search_free(gnode_ptr(gn));
00336     glist_free(ps->searches);
00337     dict_free(ps->dict);
00338     dict2pid_free(ps->d2p);
00339     acmod_free(ps->acmod);
00340     logmath_free(ps->lmath);
00341     cmd_ln_free_r(ps->config);
00342     ckd_free(ps->uttid);
00343     ckd_free(ps);
00344     return 0;
00345 }
00346 
00347 char const *
00348 ps_get_uttid(ps_decoder_t *ps)
00349 {
00350     return ps->uttid;
00351 }
00352 
00353 cmd_ln_t *
00354 ps_get_config(ps_decoder_t *ps)
00355 {
00356     return ps->config;
00357 }
00358 
00359 logmath_t *
00360 ps_get_logmath(ps_decoder_t *ps)
00361 {
00362     return ps->lmath;
00363 }
00364 
00365 fe_t *
00366 ps_get_fe(ps_decoder_t *ps)
00367 {
00368     return ps->acmod->fe;
00369 }
00370 
00371 feat_t *
00372 ps_get_feat(ps_decoder_t *ps)
00373 {
00374     return ps->acmod->fcb;
00375 }
00376 
00377 ps_mllr_t *
00378 ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
00379 {
00380     return acmod_update_mllr(ps->acmod, mllr);
00381 }
00382 
00383 ngram_model_t *
00384 ps_get_lmset(ps_decoder_t *ps)
00385 {
00386     if (ps->search == NULL
00387         || 0 != strcmp(ps_search_name(ps->search), "ngram"))
00388         return NULL;
00389     return ((ngram_search_t *)ps->search)->lmset;
00390 }
00391 
00392 ngram_model_t *
00393 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
00394 {
00395     ngram_search_t *ngs;
00396     ps_search_t *search;
00397 
00398     /* Look for N-Gram search. */
00399     search = ps_find_search(ps, "ngram");
00400     if (search == NULL) {
00401         /* Initialize N-Gram search. */
00402         search = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p);
00403         if (search == NULL)
00404             return NULL;
00405         search->pls = ps->phone_loop;
00406         ps->searches = glist_add_ptr(ps->searches, search);
00407         ngs = (ngram_search_t *)search;
00408     }
00409     else {
00410         ngs = (ngram_search_t *)search;
00411         /* Free any previous lmset if this is a new one. */
00412         if (ngs->lmset != NULL && ngs->lmset != lmset)
00413             ngram_model_free(ngs->lmset);
00414         ngs->lmset = lmset;
00415         /* Tell N-Gram search to update its view of the world. */
00416         if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
00417             return NULL;
00418     }
00419     ps->search = search;
00420     return ngs->lmset;
00421 }
00422 
00423 fsg_set_t *
00424 ps_get_fsgset(ps_decoder_t *ps)
00425 {
00426     if (ps->search == NULL
00427         || 0 != strcmp(ps_search_name(ps->search), "fsg"))
00428         return NULL;
00429     return (fsg_set_t *)ps->search;
00430 }
00431 
00432 fsg_set_t *
00433 ps_update_fsgset(ps_decoder_t *ps)
00434 {
00435     ps_search_t *search;
00436 
00437     /* Look for FSG search. */
00438     search = ps_find_search(ps, "fsg");
00439     if (search == NULL) {
00440         /* Initialize FSG search. */
00441         search = fsg_search_init(ps->config,
00442                                  ps->acmod, ps->dict, ps->d2p);
00443         search->pls = ps->phone_loop;
00444         ps->searches = glist_add_ptr(ps->searches, search);
00445     }
00446     else {
00447         /* Tell FSG search to update its view of the world. */
00448         if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
00449             return NULL;
00450     }
00451     ps->search = search;
00452     return (fsg_set_t *)search;
00453 }
00454 
00455 int
00456 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
00457              char const *fdictfile, char const *format)
00458 {
00459     cmd_ln_t *newconfig;
00460     dict2pid_t *d2p;
00461     dict_t *dict;
00462     gnode_t *gn;
00463     int rv;
00464 
00465     /* Create a new scratch config to load this dict (so existing one
00466      * won't be affected if it fails) */
00467     newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
00468     cmd_ln_set_boolean_r(newconfig, "-dictcase",
00469                          cmd_ln_boolean_r(ps->config, "-dictcase"));
00470     cmd_ln_set_str_r(newconfig, "-dict", dictfile);
00471     if (fdictfile)
00472         cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
00473     else
00474         cmd_ln_set_str_r(newconfig, "-fdict",
00475                          cmd_ln_str_r(ps->config, "-fdict"));
00476 
00477     /* Try to load it. */
00478     if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
00479         cmd_ln_free_r(newconfig);
00480         return -1;
00481     }
00482 
00483     /* Reinit the dict2pid. */
00484     if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
00485         cmd_ln_free_r(newconfig);
00486         return -1;
00487     }
00488 
00489     /* Success!  Update the existing config to reflect new dicts and
00490      * drop everything into place. */
00491     cmd_ln_free_r(newconfig);
00492     cmd_ln_set_str_r(ps->config, "-dict", dictfile);
00493     if (fdictfile)
00494         cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
00495     dict_free(ps->dict);
00496     ps->dict = dict;
00497     dict2pid_free(ps->d2p);
00498     ps->d2p = d2p;
00499 
00500     /* And tell all searches to reconfigure themselves. */
00501     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00502         ps_search_t *search = gnode_ptr(gn);
00503         if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
00504             return rv;
00505     }
00506 
00507     return 0;
00508 }
00509 
00510 int
00511 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
00512              char const *format)
00513 {
00514     return dict_write(ps->dict, dictfile, format);
00515 }
00516 
00517 int
00518 ps_add_word(ps_decoder_t *ps,
00519             char const *word,
00520             char const *phones,
00521             int update)
00522 {
00523     int32 wid, lmwid;
00524     ngram_model_t *lmset;
00525     s3cipid_t *pron;
00526     char **phonestr, *tmp;
00527     int np, i, rv;
00528 
00529     /* Parse phones into an array of phone IDs. */
00530     tmp = ckd_salloc(phones);
00531     np = str2words(tmp, NULL, 0);
00532     phonestr = ckd_calloc(np, sizeof(*phonestr));
00533     str2words(tmp, phonestr, np);
00534     pron = ckd_calloc(np, sizeof(*pron));
00535     for (i = 0; i < np; ++i) {
00536         pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
00537         if (pron[i] == -1) {
00538             E_ERROR("Unknown phone %s in phone string %s\n",
00539                     phonestr[i], tmp);
00540             ckd_free(phonestr);
00541             ckd_free(tmp);
00542             ckd_free(pron);
00543             return -1;
00544         }
00545     }
00546     /* No longer needed. */
00547     ckd_free(phonestr);
00548     ckd_free(tmp);
00549 
00550     /* Add it to the dictionary. */
00551     if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
00552         ckd_free(pron);
00553         return -1;
00554     }
00555     /* No longer needed. */
00556     ckd_free(pron);
00557 
00558     /* Now we also have to add it to dict2pid. */
00559     dict2pid_add_word(ps->d2p, wid);
00560 
00561     if ((lmset = ps_get_lmset(ps)) != NULL) {
00562         /* Add it to the LM set (meaning, the current LM).  In a perfect
00563          * world, this would result in the same WID, but because of the
00564          * weird way that word IDs are handled, it doesn't. */
00565         if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
00566             == NGRAM_INVALID_WID)
00567             return -1;
00568     }
00569  
00570     /* Rebuild the widmap and search tree if requested. */
00571     if (update) {
00572         if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
00573             return rv;
00574     }
00575     return wid;
00576 }
00577 
00578 int
00579 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
00580               char const *uttid, long maxsamps)
00581 {
00582     long total, pos;
00583 
00584     ps_start_utt(ps, uttid);
00585     /* If this file is seekable or maxsamps is specified, then decode
00586      * the whole thing at once. */
00587     if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
00588         int16 *data;
00589 
00590         if (maxsamps == -1) {
00591             long endpos;
00592             fseek(rawfh, 0, SEEK_END);
00593             endpos = ftell(rawfh);
00594             fseek(rawfh, pos, SEEK_SET);
00595             maxsamps = endpos - pos;
00596         }
00597         data = ckd_calloc(maxsamps, sizeof(*data));
00598         total = fread(data, sizeof(*data), maxsamps, rawfh);
00599         ps_process_raw(ps, data, total, FALSE, TRUE);
00600         ckd_free(data);
00601     }
00602     else {
00603         /* Otherwise decode it in a stream. */
00604         total = 0;
00605         while (!feof(rawfh)) {
00606             int16 data[256];
00607             size_t nread;
00608 
00609             nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
00610             ps_process_raw(ps, data, nread, FALSE, FALSE);
00611             total += nread;
00612         }
00613     }
00614     ps_end_utt(ps);
00615     return total;
00616 }
00617 
00618 int
00619 ps_start_utt(ps_decoder_t *ps, char const *uttid)
00620 {
00621     int rv;
00622 
00623     if (ps->search == NULL) {
00624         E_ERROR("No search module is selected, did you forget to "
00625                 "specify a language model or grammar?\n");
00626         return -1;
00627     }
00628 
00629     ptmr_reset(&ps->perf);
00630     ptmr_start(&ps->perf);
00631 
00632     if (uttid) {
00633         ckd_free(ps->uttid);
00634         ps->uttid = ckd_salloc(uttid);
00635     }
00636     else {
00637         char nuttid[16];
00638         ckd_free(ps->uttid);
00639         sprintf(nuttid, "%09u", ps->uttno);
00640         ps->uttid = ckd_salloc(nuttid);
00641         ++ps->uttno;
00642     }
00643     /* Remove any residual word lattice and hypothesis. */
00644     ps_lattice_free(ps->search->dag);
00645     ps->search->dag = NULL;
00646     ps->search->last_link = NULL;
00647     ps->search->post = 0;
00648     ckd_free(ps->search->hyp_str);
00649     ps->search->hyp_str = NULL;
00650 
00651     if ((rv = acmod_start_utt(ps->acmod)) < 0)
00652         return rv;
00653 
00654     /* Start logging features and audio if requested. */
00655     if (ps->mfclogdir) {
00656         char *logfn = string_join(ps->mfclogdir, "/",
00657                                   ps->uttid, ".mfc", NULL);
00658         FILE *mfcfh;
00659         E_INFO("Writing MFCC log file: %s\n", logfn);
00660         if ((mfcfh = fopen(logfn, "wb")) == NULL) {
00661             E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
00662             ckd_free(logfn);
00663             return -1;
00664         }
00665         ckd_free(logfn);
00666         acmod_set_mfcfh(ps->acmod, mfcfh);
00667     }
00668     if (ps->rawlogdir) {
00669         char *logfn = string_join(ps->rawlogdir, "/",
00670                                   ps->uttid, ".raw", NULL);
00671         FILE *rawfh;
00672         E_INFO("Writing raw audio log file: %s\n", logfn);
00673         if ((rawfh = fopen(logfn, "wb")) == NULL) {
00674             E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
00675             ckd_free(logfn);
00676             return -1;
00677         }
00678         ckd_free(logfn);
00679         acmod_set_rawfh(ps->acmod, rawfh);
00680     }
00681     if (ps->senlogdir) {
00682         char *logfn = string_join(ps->senlogdir, "/",
00683                                   ps->uttid, ".sen", NULL);
00684         FILE *senfh;
00685         E_INFO("Writing senone score log file: %s\n", logfn);
00686         if ((senfh = fopen(logfn, "wb")) == NULL) {
00687             E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn);
00688             ckd_free(logfn);
00689             return -1;
00690         }
00691         ckd_free(logfn);
00692         acmod_set_senfh(ps->acmod, senfh);
00693     }
00694 
00695     /* Start auxiliary phone loop search. */
00696     if (ps->phone_loop)
00697         ps_search_start(ps->phone_loop);
00698 
00699     return ps_search_start(ps->search);
00700 }
00701 
00702 static int
00703 ps_search_forward(ps_decoder_t *ps)
00704 {
00705     int nfr;
00706 
00707     nfr = 0;
00708     while (ps->acmod->n_feat_frame > 0) {
00709         int k;
00710         if (ps->phone_loop)
00711             if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
00712                 return k;
00713         if (ps->acmod->output_frame >= ps->pl_window)
00714             if ((k = ps_search_step(ps->search,
00715                                     ps->acmod->output_frame - ps->pl_window)) < 0)
00716                 return k;
00717         acmod_advance(ps->acmod);
00718         ++ps->n_frame;
00719         ++nfr;
00720     }
00721     return nfr;
00722 }
00723 
00724 int
00725 ps_decode_senscr(ps_decoder_t *ps, FILE *senfh,
00726                  char const *uttid)
00727 {
00728     int nfr, n_searchfr;
00729 
00730     ps_start_utt(ps, uttid);
00731     n_searchfr = 0;
00732     acmod_set_insenfh(ps->acmod, senfh);
00733     while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
00734         if ((nfr = ps_search_forward(ps)) < 0) {
00735             ps_end_utt(ps);
00736             return nfr;
00737         }
00738         n_searchfr += nfr;
00739     }
00740     ps_end_utt(ps);
00741     acmod_set_insenfh(ps->acmod, NULL);
00742 
00743     return n_searchfr;
00744 }
00745 
00746 int
00747 ps_process_raw(ps_decoder_t *ps,
00748                int16 const *data,
00749                size_t n_samples,
00750                int no_search,
00751                int full_utt)
00752 {
00753     int n_searchfr = 0;
00754 
00755     if (no_search)
00756         acmod_set_grow(ps->acmod, TRUE);
00757 
00758     while (n_samples) {
00759         int nfr;
00760 
00761         /* Process some data into features. */
00762         if ((nfr = acmod_process_raw(ps->acmod, &data,
00763                                      &n_samples, full_utt)) < 0)
00764             return nfr;
00765 
00766         /* Score and search as much data as possible */
00767         if (no_search)
00768             continue;
00769         if ((nfr = ps_search_forward(ps)) < 0)
00770             return nfr;
00771         n_searchfr += nfr;
00772     }
00773 
00774     return n_searchfr;
00775 }
00776 
00777 int
00778 ps_process_cep(ps_decoder_t *ps,
00779                mfcc_t **data,
00780                int32 n_frames,
00781                int no_search,
00782                int full_utt)
00783 {
00784     int n_searchfr = 0;
00785 
00786     if (no_search)
00787         acmod_set_grow(ps->acmod, TRUE);
00788 
00789     while (n_frames) {
00790         int nfr;
00791 
00792         /* Process some data into features. */
00793         if ((nfr = acmod_process_cep(ps->acmod, &data,
00794                                      &n_frames, full_utt)) < 0)
00795             return nfr;
00796 
00797         /* Score and search as much data as possible */
00798         if (no_search)
00799             continue;
00800         if ((nfr = ps_search_forward(ps)) < 0)
00801             return nfr;
00802         n_searchfr += nfr;
00803     }
00804 
00805     return n_searchfr;
00806 }
00807 
00808 int
00809 ps_end_utt(ps_decoder_t *ps)
00810 {
00811     int rv, i;
00812 
00813     acmod_end_utt(ps->acmod);
00814 
00815     /* Search any remaining frames. */
00816     if ((rv = ps_search_forward(ps)) < 0) {
00817         ptmr_stop(&ps->perf);
00818         return rv;
00819     }
00820     /* Finish phone loop search. */
00821     if (ps->phone_loop) {
00822         if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
00823             ptmr_stop(&ps->perf);
00824             return rv;
00825         }
00826     }
00827     /* Search any frames remaining in the lookahead window. */
00828     for (i = ps->acmod->output_frame - ps->pl_window;
00829          i < ps->acmod->output_frame; ++i)
00830         ps_search_step(ps->search, i);
00831     /* Finish main search. */
00832     if ((rv = ps_search_finish(ps->search)) < 0) {
00833         ptmr_stop(&ps->perf);
00834         return rv;
00835     }
00836     ptmr_stop(&ps->perf);
00837 
00838     /* Log a backtrace if requested. */
00839     if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
00840         char const *uttid, *hyp;
00841         ps_seg_t *seg;
00842         int32 score;
00843 
00844         hyp = ps_get_hyp(ps, &score, &uttid);
00845         E_INFO("%s: %s (%d)\n", uttid, hyp, score);
00846         E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
00847                     "word", "start", "end", "pprob", "ascr", "lscr", "lback");
00848         for (seg = ps_seg_iter(ps, &score); seg;
00849              seg = ps_seg_next(seg)) {
00850             char const *word;
00851             int sf, ef;
00852             int32 post, lscr, ascr, lback;
00853 
00854             word = ps_seg_word(seg);
00855             ps_seg_frames(seg, &sf, &ef);
00856             post = ps_seg_prob(seg, &ascr, &lscr, &lback);
00857             E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
00858                         word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
00859         }
00860     }
00861     return rv;
00862 }
00863 
00864 char const *
00865 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
00866 {
00867     char const *hyp;
00868 
00869     ptmr_start(&ps->perf);
00870     hyp = ps_search_hyp(ps->search, out_best_score);
00871     if (out_uttid)
00872         *out_uttid = ps->uttid;
00873     ptmr_stop(&ps->perf);
00874     return hyp;
00875 }
00876 
00877 int32
00878 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
00879 {
00880     int32 prob;
00881 
00882     ptmr_start(&ps->perf);
00883     prob = ps_search_prob(ps->search);
00884     if (out_uttid)
00885         *out_uttid = ps->uttid;
00886     ptmr_stop(&ps->perf);
00887     return prob;
00888 }
00889 
00890 ps_seg_t *
00891 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
00892 {
00893     ps_seg_t *itor;
00894 
00895     ptmr_start(&ps->perf);
00896     itor = ps_search_seg_iter(ps->search, out_best_score);
00897     ptmr_stop(&ps->perf);
00898     return itor;
00899 }
00900 
00901 ps_seg_t *
00902 ps_seg_next(ps_seg_t *seg)
00903 {
00904     return ps_search_seg_next(seg);
00905 }
00906 
00907 char const *
00908 ps_seg_word(ps_seg_t *seg)
00909 {
00910     return seg->word;
00911 }
00912 
00913 void
00914 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
00915 {
00916     if (out_sf) *out_sf = seg->sf;
00917     if (out_ef) *out_ef = seg->ef;
00918 }
00919 
00920 int32
00921 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
00922 {
00923     if (out_ascr) *out_ascr = seg->ascr;
00924     if (out_lscr) *out_lscr = seg->lscr;
00925     if (out_lback) *out_lback = seg->lback;
00926     return seg->prob;
00927 }
00928 
00929 void
00930 ps_seg_free(ps_seg_t *seg)
00931 {
00932     ps_search_seg_free(seg);
00933 }
00934 
00935 ps_lattice_t *
00936 ps_get_lattice(ps_decoder_t *ps)
00937 {
00938     return ps_search_lattice(ps->search);
00939 }
00940 
00941 ps_nbest_t *
00942 ps_nbest(ps_decoder_t *ps, int sf, int ef,
00943          char const *ctx1, char const *ctx2)
00944 {
00945     ps_lattice_t *dag;
00946     ngram_model_t *lmset;
00947     ps_astar_t *nbest;
00948     float32 lwf;
00949     int32 w1, w2;
00950 
00951     if (ps->search == NULL)
00952         return NULL;
00953     if ((dag = ps_get_lattice(ps)) == NULL)
00954         return NULL;
00955 
00956     /* FIXME: This is all quite specific to N-Gram search.  Either we
00957      * should make N-best a method for each search module or it needs
00958      * to be abstracted to work for N-Gram and FSG. */
00959     if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
00960         lmset = NULL;
00961         lwf = 1.0f;
00962     }
00963     else {
00964         lmset = ((ngram_search_t *)ps->search)->lmset;
00965         lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
00966     }
00967 
00968     w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
00969     w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
00970     nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
00971 
00972     return (ps_nbest_t *)nbest;
00973 }
00974 
00975 void
00976 ps_nbest_free(ps_nbest_t *nbest)
00977 {
00978     ps_astar_finish(nbest);
00979 }
00980 
00981 ps_nbest_t *
00982 ps_nbest_next(ps_nbest_t *nbest)
00983 {
00984     ps_latpath_t *next;
00985 
00986     next = ps_astar_next(nbest);
00987     if (next == NULL) {
00988         ps_nbest_free(nbest);
00989         return NULL;
00990     }
00991     return nbest;
00992 }
00993 
00994 char const *
00995 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
00996 {
00997     if (nbest->top == NULL)
00998         return NULL;
00999     if (out_score) *out_score = nbest->top->score;
01000     return ps_astar_hyp(nbest, nbest->top);
01001 }
01002 
01003 ps_seg_t *
01004 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
01005 {
01006     if (nbest->top == NULL)
01007         return NULL;
01008     if (out_score) *out_score = nbest->top->score;
01009     return ps_astar_seg_iter(nbest, nbest->top, 1.0);
01010 }
01011 
01012 int
01013 ps_get_n_frames(ps_decoder_t *ps)
01014 {
01015     return ps->acmod->output_frame + 1;
01016 }
01017 
01018 void
01019 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
01020                 double *out_ncpu, double *out_nwall)
01021 {
01022     int32 frate;
01023 
01024     frate = cmd_ln_int32_r(ps->config, "-frate");
01025     *out_nspeech = (double)ps->acmod->output_frame / frate;
01026     *out_ncpu = ps->perf.t_cpu;
01027     *out_nwall = ps->perf.t_elapsed;
01028 }
01029 
01030 void
01031 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
01032                 double *out_ncpu, double *out_nwall)
01033 {
01034     int32 frate;
01035 
01036     frate = cmd_ln_int32_r(ps->config, "-frate");
01037     *out_nspeech = (double)ps->n_frame / frate;
01038     *out_ncpu = ps->perf.t_tot_cpu;
01039     *out_nwall = ps->perf.t_tot_elapsed;
01040 }
01041 
01042 void
01043 ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
01044                cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
01045                dict2pid_t *d2p)
01046 {
01047     search->vt = vt;
01048     search->config = config;
01049     search->acmod = acmod;
01050     if (d2p)
01051         search->d2p = dict2pid_retain(d2p);
01052     else
01053         search->d2p = NULL;
01054     if (dict) {
01055         search->dict = dict_retain(dict);
01056         search->start_wid = dict_startwid(dict);
01057         search->finish_wid = dict_finishwid(dict);
01058         search->silence_wid = dict_silwid(dict);
01059         search->n_words = dict_size(dict);
01060     }
01061     else {
01062         search->dict = NULL;
01063         search->start_wid = search->finish_wid = search->silence_wid = -1;
01064         search->n_words = 0;
01065     }
01066 }
01067 
01068 void
01069 ps_search_base_reinit(ps_search_t *search, dict_t *dict,
01070                       dict2pid_t *d2p)
01071 {
01072     dict_free(search->dict);
01073     dict2pid_free(search->d2p);
01074     /* FIXME: _retain() should just return NULL if passed NULL. */
01075     if (dict) {
01076         search->dict = dict_retain(dict);
01077         search->start_wid = dict_startwid(dict);
01078         search->finish_wid = dict_finishwid(dict);
01079         search->silence_wid = dict_silwid(dict);
01080         search->n_words = dict_size(dict);
01081     }
01082     else {
01083         search->dict = NULL;
01084         search->start_wid = search->finish_wid = search->silence_wid = -1;
01085         search->n_words = 0;
01086     }
01087     if (d2p)
01088         search->d2p = dict2pid_retain(d2p);
01089     else
01090         search->d2p = NULL;
01091 }
01092 
01093 
01094 void
01095 ps_search_deinit(ps_search_t *search)
01096 {
01097     /* FIXME: We will have refcounting on acmod, config, etc, at which
01098      * point we will free them here too. */
01099     dict_free(search->dict);
01100     dict2pid_free(search->d2p);
01101     ckd_free(search->hyp_str);
01102     ps_lattice_free(search->dag);
01103 }