• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/pocketsphinx.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers. */
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <err.h>
00044 #include <strfuncs.h>
00045 #include <filename.h>
00046 #include <pio.h>
00047 
00048 /* Local headers. */
00049 #include "cmdln_macro.h"
00050 #include "pocketsphinx_internal.h"
00051 #include "ps_lattice_internal.h"
00052 #include "fsg_search_internal.h"
00053 #include "ngram_search.h"
00054 #include "ngram_search_fwdtree.h"
00055 #include "ngram_search_fwdflat.h"
00056 
00057 static const arg_t ps_args_def[] = {
00058     POCKETSPHINX_OPTIONS,
00059     CMDLN_EMPTY_OPTION
00060 };
00061 
00062 /* I'm not sure what the portable way to do this is. */
00063 static int
00064 file_exists(const char *path)
00065 {
00066     FILE *tmp;
00067 
00068     tmp = fopen(path, "rb");
00069     if (tmp) fclose(tmp);
00070     return (tmp != NULL);
00071 }
00072 
00073 static void
00074 ps_add_file(ps_decoder_t *ps, const char *arg,
00075             const char *hmmdir, const char *file)
00076 {
00077     char *tmp = string_join(hmmdir, "/", file, NULL);
00078 
00079     if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
00080         cmd_ln_set_str_r(ps->config, arg, tmp);
00081     ckd_free(tmp);
00082 }
00083 
00084 static void
00085 ps_init_defaults(ps_decoder_t *ps)
00086 {
00087     char const *hmmdir;
00088 
00089     /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
00090 #ifdef __ADSPBLACKFIN__
00091     E_INFO("Will not use mmap() on uClinux/Blackfin.");
00092     cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
00093 #endif
00094     /* Get acoustic model filenames and add them to the command-line */
00095     if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
00096         ps_add_file(ps, "-mdef", hmmdir, "mdef");
00097         ps_add_file(ps, "-mean", hmmdir, "means");
00098         ps_add_file(ps, "-var", hmmdir, "variances");
00099         ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
00100         ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
00101         ps_add_file(ps, "-sendump", hmmdir, "sendump");
00102         ps_add_file(ps, "-kdtree", hmmdir, "kdtrees");
00103         ps_add_file(ps, "-fdict", hmmdir, "noisedict");
00104         ps_add_file(ps, "-lda", hmmdir, "feature_transform");
00105         ps_add_file(ps, "-featparams", hmmdir, "feat.params");
00106     }
00107 }
00108 
00109 int
00110 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
00111 {
00112     char const *lmfile, *lmctl = NULL;
00113     gnode_t *gn;
00114 
00115     if (config && config != ps->config) {
00116         cmd_ln_free_r(ps->config);
00117         ps->config = config;
00118     }
00119     /* Set up logging. */
00120     if (cmd_ln_str_r(ps->config, "-logfn"))
00121         err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
00122     ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
00123     ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
00124 
00125     /* Fill in some default arguments. */
00126     ps_init_defaults(ps);
00127 
00128     /* Free old searches (do this before other reinit) */
00129     if (ps->searches) {
00130         for (gn = ps->searches; gn; gn = gnode_next(gn))
00131             ps_search_free(gnode_ptr(gn));
00132         glist_free(ps->searches);
00133         ps->searches = NULL;
00134         ps->search = NULL;
00135     }
00136     /* Free old acmod. */
00137     if (ps->acmod) {
00138         acmod_free(ps->acmod);
00139         ps->acmod = NULL;
00140     }
00141     /* Free old dictionary (must be done after the two things above) */
00142     if (ps->dict) {
00143         dict_free(ps->dict);
00144         ps->dict = NULL;
00145     }
00146 
00147     /* Logmath computation (used in acmod and search) */
00148     if (ps->lmath == NULL
00149         || (logmath_get_base(ps->lmath) != 
00150             (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
00151         if (ps->lmath)
00152             logmath_free(ps->lmath);
00153         ps->lmath = logmath_init
00154             ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
00155              cmd_ln_boolean_r(ps->config, "-bestpath"));
00156     }
00157 
00158     /* Acoustic model (this is basically everything that
00159      * uttproc.c, senscr.c, and others used to do) */
00160     if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
00161         return -1;
00162     /* Make the acmod's feature buffer growable if we are doing two-pass search. */
00163     if (cmd_ln_boolean_r(ps->config, "-fwdflat")
00164         && cmd_ln_boolean_r(ps->config, "-fwdtree"))
00165         acmod_set_grow(ps->acmod, TRUE);
00166 
00167     /* Dictionary and triphone mappings (depends on acmod). */
00168     if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
00169         return -1;
00170 
00171     /* Determine whether we are starting out in FSG or N-Gram search mode. */
00172     if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
00173         ps_search_t *fsgs;
00174 
00175         if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
00176             return -1;
00177         ps->searches = glist_add_ptr(ps->searches, fsgs);
00178         ps->search = fsgs;
00179     }
00180     else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
00181              || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
00182         ps_search_t *ngs;
00183 
00184         if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
00185             return -1;
00186         ps->searches = glist_add_ptr(ps->searches, ngs);
00187         ps->search = ngs;
00188     }
00189     /* Otherwise, we will initialize the search whenever the user
00190      * decides to load an FSG or a language model. */
00191 
00192     /* Initialize performance timer. */
00193     ps->perf.name = "decode";
00194     ptmr_init(&ps->perf);
00195 
00196     return 0;
00197 }
00198 
00199 ps_decoder_t *
00200 ps_init(cmd_ln_t *config)
00201 {
00202     ps_decoder_t *ps;
00203 
00204     ps = ckd_calloc(1, sizeof(*ps));
00205     ps->refcount = 1;
00206     if (ps_reinit(ps, config) < 0) {
00207         ps_free(ps);
00208         return NULL;
00209     }
00210     return ps;
00211 }
00212 
00213 arg_t const *
00214 ps_args(void)
00215 {
00216     return ps_args_def;
00217 }
00218 
00219 ps_decoder_t *
00220 ps_retain(ps_decoder_t *ps)
00221 {
00222     ++ps->refcount;
00223     return ps;
00224 }
00225 
00226 int
00227 ps_free(ps_decoder_t *ps)
00228 {
00229     gnode_t *gn;
00230 
00231     if (ps == NULL)
00232         return 0;
00233     if (--ps->refcount > 0)
00234         return ps->refcount;
00235     for (gn = ps->searches; gn; gn = gnode_next(gn))
00236         ps_search_free(gnode_ptr(gn));
00237     glist_free(ps->searches);
00238     dict_free(ps->dict);
00239     acmod_free(ps->acmod);
00240     logmath_free(ps->lmath);
00241     cmd_ln_free_r(ps->config);
00242     ckd_free(ps->uttid);
00243     ckd_free(ps);
00244     return 0;
00245 }
00246 
00247 char const *
00248 ps_get_uttid(ps_decoder_t *ps)
00249 {
00250     return ps->uttid;
00251 }
00252 
00253 cmd_ln_t *
00254 ps_get_config(ps_decoder_t *ps)
00255 {
00256     return ps->config;
00257 }
00258 
00259 logmath_t *
00260 ps_get_logmath(ps_decoder_t *ps)
00261 {
00262     return ps->lmath;
00263 }
00264 
00265 fe_t *
00266 ps_get_fe(ps_decoder_t *ps)
00267 {
00268     return ps->acmod->fe;
00269 }
00270 
00271 feat_t *
00272 ps_get_feat(ps_decoder_t *ps)
00273 {
00274     return ps->acmod->fcb;
00275 }
00276 
00277 ngram_model_t *
00278 ps_get_lmset(ps_decoder_t *ps)
00279 {
00280     if (ps->search == NULL
00281         || 0 != strcmp(ps_search_name(ps->search), "ngram"))
00282         return NULL;
00283     return ((ngram_search_t *)ps->search)->lmset;
00284 }
00285 
00286 ngram_model_t *
00287 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
00288 {
00289     ngram_search_t *ngs;
00290     gnode_t *gn;
00291 
00292     /* Look for N-Gram search. */
00293     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00294         if (0 == strcmp(ps_search_name(gnode_ptr(gn)), "ngram"))
00295             break;
00296     }
00297     if (gn == NULL) {
00298         /* Initialize N-Gram search. */
00299         ngs = (ngram_search_t *)ngram_search_init(ps->config,
00300                                                   ps->acmod, ps->dict);
00301         if (ngs == NULL)
00302             return NULL;
00303         ps->searches = glist_add_ptr(ps->searches, ngs);
00304     }
00305     else {
00306         ngs = gnode_ptr(gn);
00307         /* Free any previous lmset if this is a new one. */
00308         if (ngs->lmset != NULL && ngs->lmset != lmset)
00309             ngram_model_free(ngs->lmset);
00310         ngs->lmset = lmset;
00311         /* Tell N-Gram search to update its view of the world. */
00312         if (ps_search_reinit(ps_search_base(ngs)) < 0)
00313             return NULL;
00314     }
00315     ps->search = ps_search_base(ngs);
00316     return ngs->lmset;
00317 }
00318 
00319 fsg_set_t *
00320 ps_get_fsgset(ps_decoder_t *ps)
00321 {
00322     if (ps->search == NULL
00323         || 0 != strcmp(ps_search_name(ps->search), "fsg"))
00324         return NULL;
00325     return (fsg_set_t *)ps->search;
00326 }
00327 
00328 fsg_set_t *
00329 ps_update_fsgset(ps_decoder_t *ps)
00330 {
00331     gnode_t *gn;
00332     fsg_search_t *fsgs;
00333 
00334     /* Look for FSG search. */
00335     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00336         if (0 == strcmp(ps_search_name(gnode_ptr(gn)), "fsg"))
00337             break;
00338     }
00339     if (gn == NULL) {
00340         /* Initialize FSG search. */
00341         fsgs = (fsg_search_t *)fsg_search_init(ps->config,
00342                                                ps->acmod, ps->dict);
00343         ps->searches = glist_add_ptr(ps->searches, fsgs);
00344     }
00345     else {
00346         /* Tell FSG search to update its view of the world. */
00347         fsgs = gnode_ptr(gn);
00348         if (ps_search_reinit(ps_search_base(fsgs)) < 0)
00349             return NULL;
00350     }
00351     ps->search = ps_search_base(fsgs);
00352     return (fsg_set_t *)fsgs;
00353 }
00354 
00355 int
00356 ps_add_word(ps_decoder_t *ps,
00357             char const *word,
00358             char const *phones,
00359             int update)
00360 {
00361     int32 wid, lmwid;
00362     ngram_model_t *lmset;
00363     char *pron;
00364     int rv;
00365 
00366     pron = ckd_salloc(phones);
00367     if ((wid = dict_add_word(ps->dict, word, pron)) == -1) {
00368         ckd_free(pron);
00369         return -1;
00370     }
00371     ckd_free(pron);
00372 
00373     if ((lmset = ps_get_lmset(ps)) != NULL) {
00374         /* FIXME: There is a way more efficient way to do this, since all
00375          * we did was replace a placeholder string with the new word
00376          * string - therefore what we ought to do is add it directly to
00377          * the current LM, then update the mapping without reallocating
00378          * everything. */
00379         /* Add it to the LM set (meaning, the current LM).  In a perfect
00380          * world, this would result in the same WID, but because of the
00381          * weird way that word IDs are handled, it doesn't. */
00382         if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
00383             == NGRAM_INVALID_WID)
00384             return -1;
00385     }
00386  
00387     /* Rebuild the widmap and search tree if requested. */
00388     if (update) {
00389         if ((rv = ps_search_reinit(ps->search) < 0))
00390             return rv;
00391     }
00392     return wid;
00393 }
00394 
00395 int
00396 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
00397               char const *uttid, long maxsamps)
00398 {
00399     long total, pos;
00400 
00401     ps_start_utt(ps, uttid);
00402     /* If this file is seekable or maxsamps is specified, then decode
00403      * the whole thing at once. */
00404     if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
00405         int16 *data;
00406 
00407         if (maxsamps == -1) {
00408             long endpos;
00409             fseek(rawfh, 0, SEEK_END);
00410             endpos = ftell(rawfh);
00411             fseek(rawfh, pos, SEEK_SET);
00412             maxsamps = endpos - pos;
00413         }
00414         data = ckd_calloc(maxsamps, sizeof(*data));
00415         total = fread(data, sizeof(*data), maxsamps, rawfh);
00416         ps_process_raw(ps, data, total, FALSE, TRUE);
00417         ckd_free(data);
00418     }
00419     else {
00420         /* Otherwise decode it in a stream. */
00421         total = 0;
00422         while (!feof(rawfh)) {
00423             int16 data[256];
00424             size_t nread;
00425 
00426             nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
00427             ps_process_raw(ps, data, nread, FALSE, FALSE);
00428             total += nread;
00429         }
00430     }
00431     ps_end_utt(ps);
00432     return total;
00433 }
00434 
00435 int
00436 ps_start_utt(ps_decoder_t *ps, char const *uttid)
00437 {
00438     FILE *mfcfh = NULL;
00439     FILE *rawfh = NULL;
00440     int rv;
00441 
00442     if (ps->search == NULL) {
00443         E_ERROR("No search module is selected, did you forget to "
00444                 "specify a language model or grammar?\n");
00445         return -1;
00446     }
00447 
00448     ptmr_reset(&ps->perf);
00449     ptmr_start(&ps->perf);
00450 
00451     if (uttid) {
00452         ckd_free(ps->uttid);
00453         ps->uttid = ckd_salloc(uttid);
00454     }
00455     else {
00456         char nuttid[16];
00457         ckd_free(ps->uttid);
00458         sprintf(nuttid, "%09u", ps->uttno);
00459         ps->uttid = ckd_salloc(nuttid);
00460         ++ps->uttno;
00461     }
00462     /* Remove any residual word lattice and hypothesis. */
00463     ps_lattice_free(ps->search->dag);
00464     ps->search->dag = NULL;
00465     ps->search->last_link = NULL;
00466     ps->search->post = 0;
00467     ckd_free(ps->search->hyp_str);
00468     ps->search->hyp_str = NULL;
00469 
00470     if ((rv = acmod_start_utt(ps->acmod)) < 0)
00471         return rv;
00472 
00473     /* Start logging features and audio if requested. */
00474     if (ps->mfclogdir) {
00475         char *logfn = string_join(ps->mfclogdir, "/",
00476                                   ps->uttid, ".mfc", NULL);
00477         E_INFO("Writing MFCC log file: %s\n", logfn);
00478         if ((mfcfh = fopen(logfn, "wb")) == NULL) {
00479             E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
00480             ckd_free(logfn);
00481             return -1;
00482         }
00483         ckd_free(logfn);
00484         acmod_set_mfcfh(ps->acmod, mfcfh);
00485     }
00486     if (ps->rawlogdir) {
00487         char *logfn = string_join(ps->rawlogdir, "/",
00488                                   ps->uttid, ".raw", NULL);
00489         E_INFO("Writing raw audio log file: %s\n", logfn);
00490         if ((rawfh = fopen(logfn, "wb")) == NULL) {
00491             E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
00492             ckd_free(logfn);
00493             return -1;
00494         }
00495         ckd_free(logfn);
00496         acmod_set_rawfh(ps->acmod, rawfh);
00497     }
00498 
00499     return ps_search_start(ps->search);
00500 }
00501 
00502 int
00503 ps_process_raw(ps_decoder_t *ps,
00504                int16 const *data,
00505                size_t n_samples,
00506                int no_search,
00507                int full_utt)
00508 {
00509     int n_searchfr = 0;
00510 
00511     if (no_search)
00512         acmod_set_grow(ps->acmod, TRUE);
00513 
00514     while (n_samples) {
00515         int nfr;
00516 
00517         /* Process some data into features. */
00518         if ((nfr = acmod_process_raw(ps->acmod, &data,
00519                                      &n_samples, full_utt)) < 0)
00520             return nfr;
00521 
00522         /* Score and search as much data as possible */
00523         if (!no_search) {
00524             while ((nfr = ps_search_step(ps->search)) > 0) {
00525                 n_searchfr += nfr;
00526             }
00527             if (nfr < 0)
00528                 return nfr;
00529         }
00530     }
00531 
00532     ps->n_frame += n_searchfr;
00533     return n_searchfr;
00534 }
00535 
00536 int
00537 ps_process_cep(ps_decoder_t *ps,
00538                mfcc_t **data,
00539                int32 n_frames,
00540                int no_search,
00541                int full_utt)
00542 {
00543     int n_searchfr = 0;
00544 
00545     if (no_search)
00546         acmod_set_grow(ps->acmod, TRUE);
00547 
00548     while (n_frames) {
00549         int nfr;
00550 
00551         /* Process some data into features. */
00552         if ((nfr = acmod_process_cep(ps->acmod, &data,
00553                                      &n_frames, full_utt)) < 0)
00554             return nfr;
00555 
00556         /* Score and search as much data as possible */
00557         if (!no_search) {
00558             while ((nfr = ps_search_step(ps->search)) > 0) {
00559                 n_searchfr += nfr;
00560             }
00561             if (nfr < 0)
00562                 return nfr;
00563         }
00564     }
00565 
00566     ps->n_frame += n_searchfr;
00567     return n_searchfr;
00568 }
00569 
00570 int
00571 ps_end_utt(ps_decoder_t *ps)
00572 {
00573     int rv;
00574 
00575     acmod_end_utt(ps->acmod);
00576     while ((rv = ps_search_step(ps->search)) > 0) {
00577     }
00578     if (rv < 0) {
00579         ptmr_stop(&ps->perf);
00580         return rv;
00581     }
00582     rv = ps_search_finish(ps->search);
00583     ptmr_stop(&ps->perf);
00584 
00585     /* Log a backtrace if requested. */
00586     if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
00587         char const *uttid, *hyp;
00588         ps_seg_t *seg;
00589         int32 score;
00590 
00591         hyp = ps_get_hyp(ps, &score, &uttid);
00592         E_INFO("%s: %s (%d)\n", uttid, hyp, score);
00593         E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
00594                     "word", "start", "end", "pprob", "ascr", "lscr", "lback");
00595         for (seg = ps_seg_iter(ps, &score); seg;
00596              seg = ps_seg_next(seg)) {
00597             char const *word;
00598             int sf, ef;
00599             int32 post, lscr, ascr, lback;
00600 
00601             word = ps_seg_word(seg);
00602             ps_seg_frames(seg, &sf, &ef);
00603             post = ps_seg_prob(seg, &ascr, &lscr, &lback);
00604             E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
00605                         word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
00606         }
00607     }
00608     return rv;
00609 }
00610 
00611 char const *
00612 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
00613 {
00614     char const *hyp;
00615 
00616     ptmr_start(&ps->perf);
00617     hyp = ps_search_hyp(ps->search, out_best_score);
00618     if (out_uttid)
00619         *out_uttid = ps->uttid;
00620     ptmr_stop(&ps->perf);
00621     return hyp;
00622 }
00623 
00624 int32
00625 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
00626 {
00627     int32 prob;
00628 
00629     ptmr_start(&ps->perf);
00630     prob = ps_search_prob(ps->search);
00631     if (out_uttid)
00632         *out_uttid = ps->uttid;
00633     ptmr_stop(&ps->perf);
00634     return prob;
00635 }
00636 
00637 ps_seg_t *
00638 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
00639 {
00640     ps_seg_t *itor;
00641 
00642     ptmr_start(&ps->perf);
00643     itor = ps_search_seg_iter(ps->search, out_best_score);
00644     ptmr_stop(&ps->perf);
00645     return itor;
00646 }
00647 
00648 ps_seg_t *
00649 ps_seg_next(ps_seg_t *seg)
00650 {
00651     return ps_search_seg_next(seg);
00652 }
00653 
00654 char const *
00655 ps_seg_word(ps_seg_t *seg)
00656 {
00657     return seg->word;
00658 }
00659 
00660 void
00661 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
00662 {
00663     if (out_sf) *out_sf = seg->sf;
00664     if (out_ef) *out_ef = seg->ef;
00665 }
00666 
00667 int32
00668 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
00669 {
00670     if (out_ascr) *out_ascr = seg->ascr;
00671     if (out_lscr) *out_lscr = seg->lscr;
00672     if (out_lback) *out_lback = seg->lback;
00673     return seg->prob;
00674 }
00675 
00676 void
00677 ps_seg_free(ps_seg_t *seg)
00678 {
00679     ps_search_seg_free(seg);
00680 }
00681 
00682 ps_lattice_t *
00683 ps_get_lattice(ps_decoder_t *ps)
00684 {
00685     return ps_search_lattice(ps->search);
00686 }
00687 
00688 ps_nbest_t *
00689 ps_nbest(ps_decoder_t *ps, int sf, int ef,
00690          char const *ctx1, char const *ctx2)
00691 {
00692     ps_lattice_t *dag;
00693     ngram_model_t *lmset;
00694     ps_astar_t *nbest;
00695     float32 lwf;
00696     int32 w1, w2;
00697 
00698     if (ps->search == NULL)
00699         return NULL;
00700     if ((dag = ps_get_lattice(ps)) == NULL)
00701         return NULL;
00702 
00703     /* FIXME: This is all quite specific to N-Gram search.  Either we
00704      * should make N-best a method for each search module or it needs
00705      * to be abstracted to work for N-Gram and FSG. */
00706     if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
00707         lmset = NULL;
00708         lwf = 1.0f;
00709     }
00710     else {
00711         lmset = ((ngram_search_t *)ps->search)->lmset;
00712         lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
00713     }
00714 
00715     w1 = ctx1 ? dict_to_id(ps_search_dict(ps->search), ctx1) : -1;
00716     w2 = ctx2 ? dict_to_id(ps_search_dict(ps->search), ctx2) : -1;
00717     nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
00718 
00719     return (ps_nbest_t *)nbest;
00720 }
00721 
00722 void
00723 ps_nbest_free(ps_nbest_t *nbest)
00724 {
00725     ps_astar_finish(nbest);
00726 }
00727 
00728 ps_nbest_t *
00729 ps_nbest_next(ps_nbest_t *nbest)
00730 {
00731     ps_latpath_t *next;
00732 
00733     next = ps_astar_next(nbest);
00734     if (next == NULL) {
00735         ps_nbest_free(nbest);
00736         return NULL;
00737     }
00738     return nbest;
00739 }
00740 
00741 char const *
00742 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
00743 {
00744     if (nbest->paths_done == NULL)
00745         return NULL;
00746     if (out_score) *out_score = nbest->paths_done->score;
00747     return ps_astar_hyp(nbest, nbest->paths_done);
00748 }
00749 
00750 ps_seg_t *
00751 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
00752 {
00753     if (nbest->paths_done == NULL)
00754         return NULL;
00755     if (out_score) *out_score = nbest->paths_done->score;
00756     return ps_astar_seg_iter(nbest, nbest->paths_done, 1.0);
00757 }
00758 
00759 int
00760 ps_get_n_frames(ps_decoder_t *ps)
00761 {
00762     return ps->acmod->output_frame + 1;
00763 }
00764 
00765 void
00766 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
00767                 double *out_ncpu, double *out_nwall)
00768 {
00769     int32 frate;
00770 
00771     frate = cmd_ln_int32_r(ps->config, "-frate");
00772     *out_nspeech = (double)ps->acmod->output_frame / frate;
00773     *out_ncpu = ps->perf.t_cpu;
00774     *out_nwall = ps->perf.t_elapsed;
00775 }
00776 
00777 void
00778 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
00779                 double *out_ncpu, double *out_nwall)
00780 {
00781     int32 frate;
00782 
00783     frate = cmd_ln_int32_r(ps->config, "-frate");
00784     *out_nspeech = (double)ps->n_frame / frate;
00785     *out_ncpu = ps->perf.t_tot_cpu;
00786     *out_nwall = ps->perf.t_tot_elapsed;
00787 }
00788 
00789 void
00790 ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
00791                cmd_ln_t *config, acmod_t *acmod, dict_t *dict)
00792 {
00793     search->vt = vt;
00794     search->config = config;
00795     search->acmod = acmod;
00796     search->dict = dict;
00797     search->start_wid = dict_to_id(dict, "<s>");
00798     search->finish_wid = dict_to_id(dict, "</s>");
00799     search->silence_wid = dict_to_id(dict, "<sil>");
00800 }
00801 
00802 void
00803 ps_search_deinit(ps_search_t *search)
00804 {
00805     /* FIXME: We will have refcounting on acmod, config, etc, at which
00806      * point we will free them here too. */
00807     ckd_free(search->hyp_str);
00808     ps_lattice_free(search->dag);
00809 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1