PocketSphinx  0.6
src/libpocketsphinx/phone_loop_search.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 #include <sphinxbase/err.h>
00043 
00044 #include "phone_loop_search.h"
00045 
00046 static int phone_loop_search_start(ps_search_t *search);
00047 static int phone_loop_search_step(ps_search_t *search, int frame_idx);
00048 static int phone_loop_search_finish(ps_search_t *search);
00049 static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
00050 static void phone_loop_search_free(ps_search_t *search);
00051 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
00052 static int32 phone_loop_search_prob(ps_search_t *search);
00053 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
00054 
00055 static ps_searchfuncs_t phone_loop_search_funcs = {
00056     /* name: */   "phone_loop",
00057     /* start: */  phone_loop_search_start,
00058     /* step: */   phone_loop_search_step,
00059     /* finish: */ phone_loop_search_finish,
00060     /* reinit: */ phone_loop_search_reinit,
00061     /* free: */   phone_loop_search_free,
00062     /* lattice: */  NULL,
00063     /* hyp: */      phone_loop_search_hyp,
00064     /* prob: */     phone_loop_search_prob,
00065     /* seg_iter: */ phone_loop_search_seg_iter,
00066 };
00067 
00068 static int
00069 phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
00070 {
00071     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00072     cmd_ln_t *config = ps_search_config(search);
00073     acmod_t *acmod = ps_search_acmod(search);
00074     int i;
00075 
00076     /* Free old dict2pid, dict, if necessary. */
00077     ps_search_base_reinit(search, dict, d2p);
00078 
00079     /* Initialize HMM context. */
00080     if (pls->hmmctx)
00081         hmm_context_free(pls->hmmctx);
00082     pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00083                                    acmod->tmat->tp, NULL, acmod->mdef->sseq);
00084     if (pls->hmmctx == NULL)
00085         return -1;
00086 
00087     /* Initialize phone HMMs. */
00088     if (pls->phones) {
00089         for (i = 0; i < pls->n_phones; ++i)
00090             hmm_deinit((hmm_t *)&pls->phones[i]);
00091         ckd_free(pls->phones);
00092     }
00093     pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
00094     pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones));
00095     for (i = 0; i < pls->n_phones; ++i) {
00096         pls->phones[i].ciphone = i;
00097         hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i],
00098                  FALSE,
00099                  bin_mdef_pid2ssid(acmod->mdef, i),
00100                  bin_mdef_pid2tmatid(acmod->mdef, i));
00101     }
00102     pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam"));
00103     pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam"));
00104     pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip"));
00105     E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
00106            pls->beam, pls->pbeam, pls->pip);
00107 
00108     return 0;
00109 }
00110 
00111 ps_search_t *
00112 phone_loop_search_init(cmd_ln_t *config,
00113                        acmod_t *acmod,
00114                        dict_t *dict)
00115 {
00116     phone_loop_search_t *pls;
00117 
00118     /* Allocate and initialize. */
00119     pls = ckd_calloc(1, sizeof(*pls));
00120     ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
00121                    config, acmod, dict, NULL);
00122     phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
00123                              ps_search_dict2pid(pls));
00124 
00125     return ps_search_base(pls);
00126 }
00127 
00128 static void
00129 phone_loop_search_free_renorm(phone_loop_search_t *pls)
00130 {
00131     gnode_t *gn;
00132     for (gn = pls->renorm; gn; gn = gnode_next(gn))
00133         ckd_free(gnode_ptr(gn));
00134     glist_free(pls->renorm);
00135     pls->renorm = NULL;
00136 }
00137 
00138 static void
00139 phone_loop_search_free(ps_search_t *search)
00140 {
00141     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00142     int i;
00143 
00144     ps_search_deinit(search);
00145     for (i = 0; i < pls->n_phones; ++i)
00146         hmm_deinit((hmm_t *)&pls->phones[i]);
00147     phone_loop_search_free_renorm(pls);
00148     ckd_free(pls->phones);
00149     hmm_context_free(pls->hmmctx);
00150     ckd_free(pls);
00151 }
00152 
00153 static int
00154 phone_loop_search_start(ps_search_t *search)
00155 {
00156     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00157     int i;
00158 
00159     /* Reset and enter all phone HMMs. */
00160     for (i = 0; i < pls->n_phones; ++i) {
00161         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00162         hmm_clear(hmm);
00163         hmm_enter(hmm, 0, -1, 0);
00164     }
00165     phone_loop_search_free_renorm(pls);
00166     pls->best_score = 0;
00167 
00168     return 0;
00169 }
00170 
00171 static void
00172 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
00173 {
00174     phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn));
00175     int i;
00176 
00177     pls->renorm = glist_add_ptr(pls->renorm, rn);
00178     rn->frame_idx = frame_idx;
00179     rn->norm = norm;
00180 
00181     for (i = 0; i < pls->n_phones; ++i) {
00182         hmm_normalize((hmm_t *)&pls->phones[i], norm);
00183     }
00184 }
00185 
00186 static int32
00187 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
00188 {
00189     int32 bs = WORST_SCORE;
00190     int i, bi;
00191 
00192     hmm_context_set_senscore(pls->hmmctx, senscr);
00193 
00194     bi = 0;
00195     for (i = 0; i < pls->n_phones; ++i) {
00196         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00197         int32 score;
00198 
00199         if (hmm_frame(hmm) < frame_idx)
00200             continue;
00201         score = hmm_vit_eval(hmm);
00202         if (score BETTER_THAN bs) {
00203             bs = score;
00204             bi = i;
00205         }
00206     }
00207     pls->best_score = bs;
00208     return bs;
00209 }
00210 
00211 static void
00212 prune_hmms(phone_loop_search_t *pls, int frame_idx)
00213 {
00214     int32 thresh = pls->best_score + pls->beam;
00215     int nf = frame_idx + 1;
00216     int i;
00217 
00218     /* Check all phones to see if they remain active in the next frame. */
00219     for (i = 0; i < pls->n_phones; ++i) {
00220         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00221 
00222         if (hmm_frame(hmm) < frame_idx)
00223             continue;
00224         /* Retain if score better than threshold. */
00225         if (hmm_bestscore(hmm) BETTER_THAN thresh) {
00226             hmm_frame(hmm) = nf;
00227         }
00228         else
00229             hmm_clear_scores(hmm);
00230     }
00231 }
00232 
00233 static void
00234 phone_transition(phone_loop_search_t *pls, int frame_idx)
00235 {
00236     int32 thresh = pls->best_score + pls->pbeam;
00237     int nf = frame_idx + 1;
00238     int i;
00239 
00240     /* Now transition out of phones whose last states are inside the
00241      * phone transition beam. */
00242     for (i = 0; i < pls->n_phones; ++i) {
00243         hmm_t *hmm = (hmm_t *)&pls->phones[i];
00244         int32 newphone_score;
00245         int j;
00246 
00247         if (hmm_frame(hmm) != nf)
00248             continue;
00249 
00250         newphone_score = hmm_out_score(hmm) + pls->pip;
00251         if (newphone_score BETTER_THAN thresh) {
00252             /* Transition into all phones using the usual Viterbi rule. */
00253             for (j = 0; j < pls->n_phones; ++j) {
00254                 hmm_t *nhmm = (hmm_t *)&pls->phones[j];
00255 
00256                 if (hmm_frame(nhmm) < frame_idx
00257                     || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
00258                     hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
00259                 }
00260             }
00261         }
00262     }
00263 }
00264 
00265 static int
00266 phone_loop_search_step(ps_search_t *search, int frame_idx)
00267 {
00268     phone_loop_search_t *pls = (phone_loop_search_t *)search;
00269     acmod_t *acmod = ps_search_acmod(search);
00270     int16 const *senscr;
00271     int i;
00272 
00273     /* All CI senones are active all the time. */
00274     if (!ps_search_acmod(pls)->compallsen)
00275         for (i = 0; i < pls->n_phones; ++i)
00276             acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]);
00277 
00278     /* Calculate senone scores for current frame. */
00279     senscr = acmod_score(acmod, &frame_idx);
00280 
00281     /* Renormalize, if necessary. */
00282     if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
00283         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00284                frame_idx, pls->best_score);
00285         renormalize_hmms(pls, frame_idx, pls->best_score);
00286     }
00287 
00288     /* Evaluate phone HMMs for current frame. */
00289     pls->best_score = evaluate_hmms(pls, senscr, frame_idx);
00290 
00291     /* Prune phone HMMs. */
00292     prune_hmms(pls, frame_idx);
00293 
00294     /* Do phone transitions. */
00295     phone_transition(pls, frame_idx);
00296 
00297     return 0;
00298 }
00299 
00300 static int
00301 phone_loop_search_finish(ps_search_t *search)
00302 {
00303     /* Actually nothing to do here really. */
00304     return 0;
00305 }
00306 
00307 static char const *
00308 phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
00309 {
00310     E_WARN("Hypotheses are not returned from phone loop search");
00311     return NULL;
00312 }
00313 
00314 static int32
00315 phone_loop_search_prob(ps_search_t *search)
00316 {
00317     /* FIXME: Actually... they ought to be. */
00318     E_WARN("Posterior probabilities are not returned from phone loop search");
00319     return 0;
00320 }
00321 
00322 static ps_seg_t *
00323 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
00324 {
00325     E_WARN("Hypotheses are not returned from phone loop search");
00326     return NULL;
00327 }