• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/acmod.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 
00044 /* System headers. */
00045 #include <assert.h>
00046 
00047 /* SphinxBase headers. */
00048 #include <prim_type.h>
00049 #include <cmd_ln.h>
00050 #include <strfuncs.h>
00051 #include <string.h>
00052 #include <byteorder.h>
00053 #include <feat.h>
00054 
00055 /* Local headers. */
00056 #include "cmdln_macro.h"
00057 #include "acmod.h"
00058 
00059 /* Feature and front-end parameters that may be in feat.params */
00060 static const arg_t feat_defn[] = {
00061     waveform_to_cepstral_command_line_macro(),
00062     cepstral_to_feature_command_line_macro(),
00063     POCKETSPHINX_DEBUG_OPTIONS,
00064     CMDLN_EMPTY_OPTION
00065 };
00066 
00067 #ifndef WORDS_BIGENDIAN
00068 #define WORDS_BIGENDIAN 1
00069 #endif
00070 
00071 static int32 acmod_flags2list(acmod_t *acmod);
00072 static int32 acmod_process_mfcbuf(acmod_t *acmod);
00073 
00074 static int
00075 acmod_init_am(acmod_t *acmod)
00076 {
00077     char const *mdeffn, *tmatfn, *featparams;
00078 
00079     /* Look for feat.params in acoustic model dir. */
00080     if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
00081         if (cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE) != NULL) {
00082             E_INFO("Parsed model-specific feature parameters from %s\n", featparams);
00083         }
00084     }
00085 
00086     /* Read model definition. */
00087     if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
00088         E_ERROR("Must specify -mdef or -hmm\n");
00089         return -1;
00090     }
00091 
00092     if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
00093         E_ERROR("Failed to read model definition from %s\n", mdeffn);
00094         return -1;
00095     }
00096 
00097     /* Read transition matrices. */
00098     if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
00099         E_ERROR("No tmat file specified\n");
00100         return -1;
00101     }
00102     acmod->tmat = tmat_init(tmatfn, acmod->lmath,
00103                             cmd_ln_float32_r(acmod->config, "-tmatfloor"),
00104                             TRUE);
00105 
00106     /* Read the acoustic models. */
00107     if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
00108         || (cmd_ln_str_r(acmod->config, "-var") == NULL)
00109         || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
00110         E_ERROR("No mean/var/tmat files specified\n");
00111         return -1;
00112     }
00113 
00114     E_INFO("Attempting to use SCGMM computation module\n");
00115     acmod->mgau
00116         = s2_semi_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
00117     if (acmod->mgau) {
00118         char const *kdtreefn = cmd_ln_str_r(acmod->config, "-kdtree");
00119         if (kdtreefn)
00120             s2_semi_mgau_load_kdtree(acmod->mgau, kdtreefn,
00121                                      cmd_ln_int32_r(acmod->config, "-kdmaxdepth"),
00122                                      cmd_ln_int32_r(acmod->config, "-kdmaxbbi"));
00123         acmod->frame_eval = (frame_eval_t)&s2_semi_mgau_frame_eval;
00124         acmod->mgau_free = (void *)&s2_semi_mgau_free;
00125     }
00126     else {
00127         E_INFO("Falling back to general multi-stream GMM computation\n");
00128         acmod->mgau =
00129             ms_mgau_init(acmod->config, acmod->lmath);
00130         acmod->frame_eval = (frame_eval_t)&ms_cont_mgau_frame_eval;
00131         acmod->mgau_free = (void *)&ms_mgau_free;
00132     }
00133 
00134     return 0;
00135 }
00136 
00137 static int
00138 acmod_init_feat(acmod_t *acmod)
00139 {
00140     acmod->fcb = 
00141         feat_init(cmd_ln_str_r(acmod->config, "-feat"),
00142                   cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
00143                   cmd_ln_boolean_r(acmod->config, "-varnorm"),
00144                   agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
00145                   1, cmd_ln_int32_r(acmod->config, "-ceplen"));
00146     if (acmod->fcb == NULL)
00147         return -1;
00148 
00149     if (cmd_ln_str_r(acmod->config, "-lda")) {
00150         E_INFO("Reading linear feature transformation from %s\n",
00151                cmd_ln_str_r(acmod->config, "-lda"));
00152         if (feat_read_lda(acmod->fcb,
00153                           cmd_ln_str_r(acmod->config, "-lda"),
00154                           cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
00155             return -1;
00156     }
00157 
00158     if (cmd_ln_str_r(acmod->config, "-svspec")) {
00159         int32 **subvecs;
00160         E_INFO("Using subvector specification %s\n", 
00161                cmd_ln_str_r(acmod->config, "-svspec"));
00162         if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
00163             return -1;
00164         if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
00165             return -1;
00166     }
00167 
00168     if (cmd_ln_exists_r(acmod->config, "-agcthresh")
00169         && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
00170         agc_set_threshold(acmod->fcb->agc_struct,
00171                           cmd_ln_float32_r(acmod->config, "-agcthresh"));
00172     }
00173 
00174     if (cmd_ln_exists_r(acmod->config, "-cmninit")) {
00175         char *c, *cc, *vallist;
00176         int32 nvals;
00177 
00178         vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
00179         c = vallist;
00180         nvals = 0;
00181         while (nvals < acmod->fcb->cmn_struct->veclen
00182                && (cc = strchr(c, ',')) != NULL) {
00183             *cc = '\0';
00184             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC((float32)atof(c));
00185             c = cc + 1;
00186             ++nvals;
00187         }
00188         if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
00189             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC((float32)atof(c));
00190         }
00191         ckd_free(vallist);
00192     }
00193     return 0;
00194 }
00195 
00196 int
00197 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
00198 {
00199     /* Output vector dimension needs to be the same. */
00200     if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe))
00201         return TRUE;
00202     /* Feature parameters need to be the same. */
00203     /* ... */
00204     return FALSE;
00205 }
00206 
00207 int
00208 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
00209 {
00210     /* Feature type needs to be the same. */
00211     if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
00212         return TRUE;
00213     /* Input vector dimension needs to be the same. */
00214     if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
00215         return TRUE;
00216     /* FIXME: Need to check LDA and stuff too. */
00217     return FALSE;
00218 }
00219 
00220 acmod_t *
00221 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
00222 {
00223     acmod_t *acmod;
00224 
00225     acmod = ckd_calloc(1, sizeof(*acmod));
00226     acmod->config = config;
00227     acmod->lmath = lmath;
00228     acmod->state = ACMOD_IDLE;
00229 
00230     /* Load acoustic model parameters. */
00231     if (acmod_init_am(acmod) < 0)
00232         goto error_out;
00233 
00234     if (fe) {
00235         if (acmod_fe_mismatch(acmod, fe))
00236             goto error_out;
00237         fe_retain(fe);
00238         acmod->fe = fe;
00239     }
00240     else {
00241         /* Initialize a new front end. */
00242         cmd_ln_retain(config);
00243         acmod->fe = fe_init_auto_r(config);
00244         if (acmod->fe == NULL)
00245             goto error_out;
00246     }
00247     if (fcb) {
00248         if (acmod_feat_mismatch(acmod, fcb))
00249             goto error_out;
00250         feat_retain(fcb);
00251         acmod->fcb = fcb;
00252     }
00253     else {
00254         /* Initialize a new fcb. */
00255         if (acmod_init_feat(acmod) < 0)
00256             goto error_out;
00257     }
00258 
00259     /* The MFCC buffer needs to be at least as large as the dynamic
00260      * feature window.  */
00261     acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
00262     acmod->mfc_buf = (mfcc_t **)
00263         ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
00264                       sizeof(**acmod->mfc_buf));
00265 
00266     /* Feature buffer has to be at least as large as MFCC buffer. */
00267     acmod->n_feat_alloc = acmod->n_mfc_alloc;
00268     acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
00269 
00270     /* Senone computation stuff. */
00271     acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00272                                                      sizeof(*acmod->senone_scores));
00273     acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
00274     acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00275                                                      sizeof(*acmod->senone_active));
00276     acmod->log_zero = logmath_get_zero(acmod->lmath);
00277     acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
00278     return acmod;
00279 
00280 error_out:
00281     acmod_free(acmod);
00282     return NULL;
00283 }
00284 
00285 void
00286 acmod_free(acmod_t *acmod)
00287 {
00288     if (acmod == NULL)
00289         return;
00290 
00291     feat_free(acmod->fcb);
00292     fe_free(acmod->fe);
00293 
00294     if (acmod->mfc_buf)
00295         ckd_free_2d((void **)acmod->mfc_buf);
00296     if (acmod->feat_buf)
00297         feat_array_free(acmod->feat_buf);
00298 
00299     if (acmod->mfcfh)
00300         fclose(acmod->mfcfh);
00301     if (acmod->rawfh)
00302         fclose(acmod->rawfh);
00303 
00304     ckd_free(acmod->senone_scores);
00305     ckd_free(acmod->senone_active_vec);
00306     ckd_free(acmod->senone_active);
00307 
00308     if (acmod->mdef)
00309         bin_mdef_free(acmod->mdef);
00310     if (acmod->tmat)
00311         tmat_free(acmod->tmat);
00312 
00313     if (acmod->mgau_free)
00314         (*acmod->mgau_free)(acmod->mgau);
00315     
00316     ckd_free(acmod);
00317 }
00318 
00319 int
00320 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
00321 {
00322     int rv = 0;
00323 
00324     if (acmod->mfcfh)
00325         fclose(acmod->mfcfh);
00326     acmod->mfcfh = logfh;
00327     fwrite(&rv, 4, 1, acmod->mfcfh);
00328     return rv;
00329 }
00330 
00331 int
00332 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
00333 {
00334     if (acmod->rawfh)
00335         fclose(acmod->rawfh);
00336     acmod->rawfh = logfh;
00337     return 0;
00338 }
00339 
00340 void
00341 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
00342 {
00343     mfcc_t ***new_feat_buf;
00344 
00345     new_feat_buf = feat_array_alloc(acmod->fcb, nfr);
00346     if (acmod->n_feat_frame || acmod->grow_feat) {
00347         memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0],
00348                (acmod->n_feat_alloc
00349                 * feat_dimension(acmod->fcb)
00350                 * sizeof(***acmod->feat_buf)));
00351     }
00352     feat_array_free(acmod->feat_buf);
00353     acmod->feat_buf = new_feat_buf;
00354     acmod->n_feat_alloc = nfr;
00355 }
00356 
00357 int
00358 acmod_set_grow(acmod_t *acmod, int grow_feat)
00359 {
00360     int tmp = acmod->grow_feat;
00361     acmod->grow_feat = grow_feat;
00362 
00363     /* Expand feat_buf to a reasonable size to start with. */
00364     if (grow_feat && acmod->n_feat_alloc < 128)
00365         acmod_grow_feat_buf(acmod, 128);
00366 
00367     return tmp;
00368 }
00369 
00370 int
00371 acmod_start_utt(acmod_t *acmod)
00372 {
00373     fe_start_utt(acmod->fe);
00374     acmod->state = ACMOD_STARTED;
00375     acmod->n_mfc_frame = 0;
00376     acmod->n_feat_frame = 0;
00377     acmod->mfc_outidx = 0;
00378     acmod->feat_outidx = 0;
00379     acmod->output_frame = 0;
00380     return 0;
00381 }
00382 
00383 int
00384 acmod_end_utt(acmod_t *acmod)
00385 {
00386     int32 nfr = 0;
00387 
00388     acmod->state = ACMOD_ENDED;
00389     if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
00390         int inptr;
00391         /* Where to start writing them (circular buffer) */
00392         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00393         /* nfr is always either zero or one. */
00394         fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
00395         acmod->n_mfc_frame += nfr;
00396         /* Process whatever's left, and any leadout. */
00397         if (nfr)
00398             nfr = acmod_process_mfcbuf(acmod);
00399     }
00400     if (acmod->mfcfh) {
00401         int32 outlen, rv;
00402         outlen = (ftell(acmod->mfcfh) - 4) / 4;
00403         if (!WORDS_BIGENDIAN)
00404             SWAP_INT32(&outlen);
00405         /* Try to seek and write */
00406         if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
00407             fwrite(&outlen, 4, 1, acmod->mfcfh);
00408         }
00409         fclose(acmod->mfcfh);
00410         acmod->mfcfh = NULL;
00411     }
00412     if (acmod->rawfh) {
00413         fclose(acmod->rawfh);
00414         acmod->rawfh = NULL;
00415     }
00416 
00417     return nfr;
00418 }
00419 
00420 static int
00421 acmod_log_mfc(acmod_t *acmod,
00422               mfcc_t **cep, int n_frames)
00423 {
00424     int i, n;
00425     int32 *ptr = (int32 *)cep[0];
00426 
00427     n = n_frames * feat_cepsize(acmod->fcb);
00428     /* Swap bytes. */
00429     if (!WORDS_BIGENDIAN) {
00430         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00431             SWAP_INT32(ptr + i);
00432         }
00433     }
00434     /* Write features. */
00435     if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
00436         E_ERROR_SYSTEM("Failed to write %d values to log file", n);
00437     }
00438 
00439     /* Swap them back. */
00440     if (!WORDS_BIGENDIAN) {
00441         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00442             SWAP_INT32(ptr + i);
00443         }
00444     }
00445     return 0;
00446 }
00447 
00448 static int
00449 acmod_process_full_cep(acmod_t *acmod,
00450                        mfcc_t ***inout_cep,
00451                        int *inout_n_frames)
00452 {
00453     int32 nfr;
00454 
00455     /* Write to log file. */
00456     if (acmod->mfcfh)
00457         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00458 
00459     /* Resize feat_buf to fit. */
00460     if (acmod->n_feat_alloc < *inout_n_frames) {
00461         feat_array_free(acmod->feat_buf);
00462         acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
00463         acmod->n_feat_alloc = *inout_n_frames;
00464         acmod->n_feat_frame = 0;
00465         acmod->feat_outidx = 0;
00466     }
00467     /* Make dynamic features. */
00468     nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
00469                                TRUE, TRUE, acmod->feat_buf);
00470     acmod->n_feat_frame = nfr;
00471     *inout_cep += *inout_n_frames;
00472     *inout_n_frames = 0;
00473     return nfr;
00474 }
00475 
00476 static int
00477 acmod_process_full_raw(acmod_t *acmod,
00478                        int16 const **inout_raw,
00479                        size_t *inout_n_samps)
00480 {
00481     int32 nfr, ntail;
00482     mfcc_t **cepptr;
00483 
00484     /* Write to logging file if any. */
00485     if (acmod->rawfh)
00486         fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
00487     /* Resize mfc_buf to fit. */
00488     if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
00489         return -1;
00490     if (acmod->n_mfc_alloc < nfr + 1) {
00491         ckd_free_2d(acmod->mfc_buf);
00492         acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
00493                                        sizeof(**acmod->mfc_buf));
00494         acmod->n_mfc_alloc = nfr + 1;
00495     }
00496     acmod->n_mfc_frame = 0;
00497     acmod->mfc_outidx = 0;
00498     fe_start_utt(acmod->fe);
00499     if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00500                           acmod->mfc_buf, &nfr) < 0)
00501         return -1;
00502     fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
00503     nfr += ntail;
00504 
00505     cepptr = acmod->mfc_buf;
00506     nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
00507     acmod->n_mfc_frame = 0;
00508     return nfr;
00509 }
00510 
00514 static int32
00515 acmod_process_mfcbuf(acmod_t *acmod)
00516 {
00517     mfcc_t **mfcptr;
00518     int32 ncep;
00519 
00520     ncep = acmod->n_mfc_frame;
00521     /* Also do this in two parts because of the circular mfc_buf. */
00522     if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
00523         int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
00524         int saved_state = acmod->state;
00525 
00526         /* Make sure we don't end the utterance here. */
00527         if (acmod->state == ACMOD_ENDED)
00528             acmod->state = ACMOD_PROCESSING;
00529         mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00530         ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
00531         /* It's possible that not all available frames were filled. */
00532         ncep -= ncep1;
00533         acmod->n_mfc_frame -= ncep1;
00534         acmod->mfc_outidx += ncep1;
00535         acmod->mfc_outidx %= acmod->n_mfc_alloc;
00536         /* Restore original state (could this really be the end) */
00537         acmod->state = saved_state;
00538     }
00539     mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00540     ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
00541     acmod->n_mfc_frame -= ncep;
00542     acmod->mfc_outidx += ncep;
00543     acmod->mfc_outidx %= acmod->n_mfc_alloc;
00544     return ncep;
00545 }
00546 
00547 int
00548 acmod_process_raw(acmod_t *acmod,
00549                   int16 const **inout_raw,
00550                   size_t *inout_n_samps,
00551                   int full_utt)
00552 {
00553     int32 ncep;
00554 
00555     /* If this is a full utterance, process it all at once. */
00556     if (full_utt)
00557         return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
00558 
00559     /* Write to logging file if any. */
00560     if (acmod->rawfh)
00561         fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
00562     /* Append MFCCs to the end of any that are previously in there
00563      * (in practice, there will probably be none) */
00564     if (inout_n_samps && *inout_n_samps) {
00565         int inptr;
00566 
00567         /* Total number of frames available. */
00568         ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
00569         /* Where to start writing them (circular buffer) */
00570         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00571 
00572         /* Write them in two parts if there is wraparound. */
00573         if (inptr + ncep > acmod->n_mfc_alloc) {
00574             int32 ncep1 = acmod->n_mfc_alloc - inptr;
00575             if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00576                                   acmod->mfc_buf + inptr, &ncep1) < 0)
00577                 return -1;
00578             acmod->n_mfc_frame += ncep1;
00579             /* It's possible that not all available frames were filled. */
00580             ncep -= ncep1;
00581             inptr += ncep1;
00582             inptr %= acmod->n_mfc_alloc;
00583         }
00584         if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00585                               acmod->mfc_buf + inptr, &ncep) < 0)
00586             return -1;
00587         acmod->n_mfc_frame += ncep;
00588     }
00589 
00590     /* Hand things off to acmod_process_cep. */
00591     return acmod_process_mfcbuf(acmod);
00592 }
00593 
00594 int
00595 acmod_process_cep(acmod_t *acmod,
00596                   mfcc_t ***inout_cep,
00597                   int *inout_n_frames,
00598                   int full_utt)
00599 {
00600     int32 nfeat, ncep, inptr;
00601     int orig_n_frames;
00602 
00603     /* If this is a full utterance, process it all at once. */
00604     if (full_utt)
00605         return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
00606 
00607     /* Write to log file. */
00608     if (acmod->mfcfh)
00609         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00610 
00611     /* Maximum number of frames we're going to generate. */
00612     orig_n_frames = ncep = nfeat = *inout_n_frames;
00613 
00614     /* FIXME: This behaviour isn't guaranteed... */
00615     if (acmod->state == ACMOD_ENDED)
00616         nfeat += feat_window_size(acmod->fcb);
00617     else if (acmod->state == ACMOD_STARTED)
00618         nfeat -= feat_window_size(acmod->fcb);
00619 
00620     /* Clamp number of features to fit available space. */
00621     if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
00622         /* Grow it as needed - we have to grow it at the end of an
00623          * utterance because we can't return a short read there. */
00624         if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
00625             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
00626         else
00627             ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
00628     }
00629 
00630     /* Where to start writing in the feature buffer. */
00631     if (acmod->grow_feat) {
00632         /* Grow to avoid wraparound if grow_feat == TRUE. */
00633         inptr = acmod->feat_outidx + acmod->n_feat_frame;
00634         while (inptr + nfeat > acmod->n_feat_alloc)
00635             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00636     }
00637     else {
00638         inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00639     }
00640 
00641     /* Write them in two parts if there is wraparound. */
00642     if (inptr + nfeat > acmod->n_feat_alloc) {
00643         int32 ncep1 = acmod->n_feat_alloc - inptr;
00644         int saved_state = acmod->state;
00645 
00646         /* Make sure we don't end the utterance here. */
00647         if (acmod->state == ACMOD_ENDED)
00648             acmod->state = ACMOD_PROCESSING;
00649         nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00650                                      &ncep1,
00651                                      (acmod->state == ACMOD_STARTED),
00652                                      (acmod->state == ACMOD_ENDED),
00653                                      acmod->feat_buf + inptr);
00654         if (nfeat < 0)
00655             return -1;
00656         /* Move the output feature pointer forward. */
00657         acmod->n_feat_frame += nfeat;
00658         inptr += nfeat;
00659         inptr %= acmod->n_feat_alloc;
00660         /* Move the input feature pointers forward. */
00661         *inout_n_frames -= ncep1;
00662         *inout_cep += ncep1;
00663         ncep -= ncep1;
00664         /* Restore original state (could this really be the end) */
00665         acmod->state = saved_state;
00666     }
00667 
00668     nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00669                                  &ncep,
00670                                  (acmod->state == ACMOD_STARTED),
00671                                  (acmod->state == ACMOD_ENDED),
00672                                  acmod->feat_buf + inptr);
00673     if (nfeat < 0)
00674         return -1;
00675     acmod->n_feat_frame += nfeat;
00676     /* Move the input feature pointers forward. */
00677     *inout_n_frames -= ncep;
00678     *inout_cep += ncep;
00679     if (acmod->state == ACMOD_STARTED)
00680         acmod->state = ACMOD_PROCESSING;
00681     return orig_n_frames - *inout_n_frames;
00682 }
00683 
00684 int
00685 acmod_process_feat(acmod_t *acmod,
00686                    mfcc_t **feat)
00687 {
00688     int i, inptr;
00689 
00690     if (acmod->n_feat_frame == acmod->n_feat_alloc) {
00691         if (acmod->grow_feat)
00692             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00693         else
00694             return 0;
00695     }
00696 
00697     inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00698     for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
00699         memcpy(acmod->feat_buf[inptr][i],
00700                feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
00701     ++acmod->n_feat_frame;
00702 
00703     return 1;
00704 }
00705 
00706 int
00707 acmod_frame_idx(acmod_t *acmod)
00708 {
00709     return acmod->output_frame;
00710 }
00711 
00712 int
00713 acmod_rewind(acmod_t *acmod)
00714 {
00715     /* If the feature buffer is circular, this is not possible. */
00716     if (acmod->output_frame > acmod->n_feat_alloc)
00717         return -1;
00718 
00719     /* Frames consumed + frames available */
00720     acmod->n_feat_frame = acmod->feat_outidx + acmod->n_feat_frame;
00721 
00722     /* Reset output pointers. */
00723     acmod->feat_outidx = 0;
00724     acmod->output_frame = 0;
00725 
00726     return 0;
00727 }
00728 
00729 int16 const *
00730 acmod_score(acmod_t *acmod,
00731             int *out_frame_idx,
00732             int16 *out_best_score,
00733             int32 *out_best_senid)
00734 {
00735     /* No frames available to score. */
00736     if (acmod->n_feat_frame == 0)
00737         return NULL;
00738 
00739     /* Build active senone list. */
00740     acmod_flags2list(acmod);
00741 
00742     /* Wrap around output pointer.  It is very important that we do
00743      * this *before* scoring instead of after, in order for
00744      * acmod_rewind() to work - i.e. the output pointer always needs
00745      * to reflect the number of frames processed when the buffer is
00746      * not circular. */
00747     if (acmod->feat_outidx == acmod->n_feat_alloc)
00748         acmod->feat_outidx = 0;
00749     /* Generate scores for the next available frame */
00750     *out_best_score = 
00751         (*acmod->frame_eval)(acmod->mgau,
00752                              acmod->senone_scores,
00753                              acmod->senone_active,
00754                              acmod->n_senone_active,
00755                              acmod->feat_buf[acmod->feat_outidx],
00756                              acmod->output_frame,
00757                              acmod->compallsen,
00758                              out_best_senid);
00759     /* Advance the output pointers. */
00760     ++acmod->feat_outidx;
00761     --acmod->n_feat_frame;
00762 
00763     *out_frame_idx = acmod->output_frame;
00764     ++acmod->output_frame;
00765 
00766     return acmod->senone_scores;
00767 }
00768 
00769 void
00770 acmod_clear_active(acmod_t *acmod)
00771 {
00772     bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
00773     acmod->n_senone_active = 0;
00774 }
00775 
00776 #define MPX_BITVEC_SET(a,h,i)                                           \
00777     if ((h)->s.mpx_ssid[i] != -1)                                       \
00778         bitvec_set((a)->senone_active_vec,                              \
00779                    bin_mdef_sseq2sen((a)->mdef, (h)->s.mpx_ssid[i], (i)));
00780 #define NONMPX_BITVEC_SET(a,h,i)                                        \
00781     bitvec_set((a)->senone_active_vec,                                  \
00782                bin_mdef_sseq2sen((a)->mdef, (h)->s.ssid, (i)));
00783 
00784 void
00785 acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
00786 {
00787     int i;
00788 
00789     if (hmm_is_mpx(hmm)) {
00790         switch (hmm_n_emit_state(hmm)) {
00791         case 5:
00792             MPX_BITVEC_SET(acmod, hmm, 4);
00793             MPX_BITVEC_SET(acmod, hmm, 3);
00794         case 3:
00795             MPX_BITVEC_SET(acmod, hmm, 2);
00796             MPX_BITVEC_SET(acmod, hmm, 1);
00797             MPX_BITVEC_SET(acmod, hmm, 0);
00798             break;
00799         default:
00800             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00801                 MPX_BITVEC_SET(acmod, hmm, i);
00802             }
00803         }
00804     }
00805     else {
00806         switch (hmm_n_emit_state(hmm)) {
00807         case 5:
00808             NONMPX_BITVEC_SET(acmod, hmm, 4);
00809             NONMPX_BITVEC_SET(acmod, hmm, 3);
00810         case 3:
00811             NONMPX_BITVEC_SET(acmod, hmm, 2);
00812             NONMPX_BITVEC_SET(acmod, hmm, 1);
00813             NONMPX_BITVEC_SET(acmod, hmm, 0);
00814             break;
00815         default:
00816             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00817                 NONMPX_BITVEC_SET(acmod, hmm, i);
00818             }
00819         }
00820     }
00821 }
00822 
00823 static int32
00824 acmod_flags2list(acmod_t *acmod)
00825 {
00826     int32 w, n, b, total_dists, total_words, extra_bits;
00827     bitvec_t *flagptr;
00828 
00829     total_dists = bin_mdef_n_sen(acmod->mdef);
00830     if (acmod->compallsen) {
00831         acmod->n_senone_active = total_dists;
00832         return total_dists;
00833     }
00834     total_words = total_dists / BITVEC_BITS;
00835     extra_bits = total_dists % BITVEC_BITS;
00836     w = n = 0;
00837     for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
00838         if (*flagptr == 0)
00839             continue;
00840         for (b = 0; b < BITVEC_BITS; ++b)
00841             if (*flagptr & (1UL << b))
00842                 acmod->senone_active[n++] = w * BITVEC_BITS + b;
00843     }
00844 
00845     for (b = 0; b < extra_bits; ++b)
00846         if (*flagptr & (1UL << b))
00847             acmod->senone_active[n++] = w * BITVEC_BITS + b;
00848 
00849     acmod->n_senone_active = n;
00850     return n;
00851 }
00852 
00853 int const *
00854 acmod_active_list(acmod_t *acmod, int32 *out_n_active)
00855 {
00856     acmod_flags2list(acmod);
00857     if (out_n_active) *out_n_active = acmod->n_senone_active;
00858     return acmod->senone_active;
00859 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1