PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2008 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00042 #include <sphinxbase/err.h> 00043 00044 #include "phone_loop_search.h" 00045 00046 static int phone_loop_search_start(ps_search_t *search); 00047 static int phone_loop_search_step(ps_search_t *search, int frame_idx); 00048 static int phone_loop_search_finish(ps_search_t *search); 00049 static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); 00050 static void phone_loop_search_free(ps_search_t *search); 00051 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score); 00052 static int32 phone_loop_search_prob(ps_search_t *search); 00053 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score); 00054 00055 static ps_searchfuncs_t phone_loop_search_funcs = { 00056 /* name: */ "phone_loop", 00057 /* start: */ phone_loop_search_start, 00058 /* step: */ phone_loop_search_step, 00059 /* finish: */ phone_loop_search_finish, 00060 /* reinit: */ phone_loop_search_reinit, 00061 /* free: */ phone_loop_search_free, 00062 /* lattice: */ NULL, 00063 /* hyp: */ phone_loop_search_hyp, 00064 /* prob: */ phone_loop_search_prob, 00065 /* seg_iter: */ phone_loop_search_seg_iter, 00066 }; 00067 00068 static int 00069 phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) 00070 { 00071 phone_loop_search_t *pls = (phone_loop_search_t *)search; 00072 cmd_ln_t *config = ps_search_config(search); 00073 acmod_t *acmod = ps_search_acmod(search); 00074 int i; 00075 00076 /* Free old dict2pid, dict, if necessary. */ 00077 ps_search_base_reinit(search, dict, d2p); 00078 00079 /* Initialize HMM context. */ 00080 if (pls->hmmctx) 00081 hmm_context_free(pls->hmmctx); 00082 pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), 00083 acmod->tmat->tp, NULL, acmod->mdef->sseq); 00084 if (pls->hmmctx == NULL) 00085 return -1; 00086 00087 /* Initialize phone HMMs. */ 00088 if (pls->phones) { 00089 for (i = 0; i < pls->n_phones; ++i) 00090 hmm_deinit((hmm_t *)&pls->phones[i]); 00091 ckd_free(pls->phones); 00092 } 00093 pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); 00094 pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones)); 00095 for (i = 0; i < pls->n_phones; ++i) { 00096 pls->phones[i].ciphone = i; 00097 hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i], 00098 FALSE, 00099 bin_mdef_pid2ssid(acmod->mdef, i), 00100 bin_mdef_pid2tmatid(acmod->mdef, i)); 00101 } 00102 pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")); 00103 pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")); 00104 pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip")); 00105 E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", 00106 pls->beam, pls->pbeam, pls->pip); 00107 00108 return 0; 00109 } 00110 00111 ps_search_t * 00112 phone_loop_search_init(cmd_ln_t *config, 00113 acmod_t *acmod, 00114 dict_t *dict) 00115 { 00116 phone_loop_search_t *pls; 00117 00118 /* Allocate and initialize. */ 00119 pls = ckd_calloc(1, sizeof(*pls)); 00120 ps_search_init(ps_search_base(pls), &phone_loop_search_funcs, 00121 config, acmod, dict, NULL); 00122 phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls), 00123 ps_search_dict2pid(pls)); 00124 00125 return ps_search_base(pls); 00126 } 00127 00128 static void 00129 phone_loop_search_free_renorm(phone_loop_search_t *pls) 00130 { 00131 gnode_t *gn; 00132 for (gn = pls->renorm; gn; gn = gnode_next(gn)) 00133 ckd_free(gnode_ptr(gn)); 00134 glist_free(pls->renorm); 00135 pls->renorm = NULL; 00136 } 00137 00138 static void 00139 phone_loop_search_free(ps_search_t *search) 00140 { 00141 phone_loop_search_t *pls = (phone_loop_search_t *)search; 00142 int i; 00143 00144 ps_search_deinit(search); 00145 for (i = 0; i < pls->n_phones; ++i) 00146 hmm_deinit((hmm_t *)&pls->phones[i]); 00147 phone_loop_search_free_renorm(pls); 00148 ckd_free(pls->phones); 00149 hmm_context_free(pls->hmmctx); 00150 ckd_free(pls); 00151 } 00152 00153 static int 00154 phone_loop_search_start(ps_search_t *search) 00155 { 00156 phone_loop_search_t *pls = (phone_loop_search_t *)search; 00157 int i; 00158 00159 /* Reset and enter all phone HMMs. */ 00160 for (i = 0; i < pls->n_phones; ++i) { 00161 hmm_t *hmm = (hmm_t *)&pls->phones[i]; 00162 hmm_clear(hmm); 00163 hmm_enter(hmm, 0, -1, 0); 00164 } 00165 phone_loop_search_free_renorm(pls); 00166 pls->best_score = 0; 00167 00168 return 0; 00169 } 00170 00171 static void 00172 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm) 00173 { 00174 phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn)); 00175 int i; 00176 00177 pls->renorm = glist_add_ptr(pls->renorm, rn); 00178 rn->frame_idx = frame_idx; 00179 rn->norm = norm; 00180 00181 for (i = 0; i < pls->n_phones; ++i) { 00182 hmm_normalize((hmm_t *)&pls->phones[i], norm); 00183 } 00184 } 00185 00186 static int32 00187 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx) 00188 { 00189 int32 bs = WORST_SCORE; 00190 int i, bi; 00191 00192 hmm_context_set_senscore(pls->hmmctx, senscr); 00193 00194 bi = 0; 00195 for (i = 0; i < pls->n_phones; ++i) { 00196 hmm_t *hmm = (hmm_t *)&pls->phones[i]; 00197 int32 score; 00198 00199 if (hmm_frame(hmm) < frame_idx) 00200 continue; 00201 score = hmm_vit_eval(hmm); 00202 if (score BETTER_THAN bs) { 00203 bs = score; 00204 bi = i; 00205 } 00206 } 00207 pls->best_score = bs; 00208 return bs; 00209 } 00210 00211 static void 00212 prune_hmms(phone_loop_search_t *pls, int frame_idx) 00213 { 00214 int32 thresh = pls->best_score + pls->beam; 00215 int nf = frame_idx + 1; 00216 int i; 00217 00218 /* Check all phones to see if they remain active in the next frame. */ 00219 for (i = 0; i < pls->n_phones; ++i) { 00220 hmm_t *hmm = (hmm_t *)&pls->phones[i]; 00221 00222 if (hmm_frame(hmm) < frame_idx) 00223 continue; 00224 /* Retain if score better than threshold. */ 00225 if (hmm_bestscore(hmm) BETTER_THAN thresh) { 00226 hmm_frame(hmm) = nf; 00227 } 00228 else 00229 hmm_clear_scores(hmm); 00230 } 00231 } 00232 00233 static void 00234 phone_transition(phone_loop_search_t *pls, int frame_idx) 00235 { 00236 int32 thresh = pls->best_score + pls->pbeam; 00237 int nf = frame_idx + 1; 00238 int i; 00239 00240 /* Now transition out of phones whose last states are inside the 00241 * phone transition beam. */ 00242 for (i = 0; i < pls->n_phones; ++i) { 00243 hmm_t *hmm = (hmm_t *)&pls->phones[i]; 00244 int32 newphone_score; 00245 int j; 00246 00247 if (hmm_frame(hmm) != nf) 00248 continue; 00249 00250 newphone_score = hmm_out_score(hmm) + pls->pip; 00251 if (newphone_score BETTER_THAN thresh) { 00252 /* Transition into all phones using the usual Viterbi rule. */ 00253 for (j = 0; j < pls->n_phones; ++j) { 00254 hmm_t *nhmm = (hmm_t *)&pls->phones[j]; 00255 00256 if (hmm_frame(nhmm) < frame_idx 00257 || newphone_score BETTER_THAN hmm_in_score(nhmm)) { 00258 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); 00259 } 00260 } 00261 } 00262 } 00263 } 00264 00265 static int 00266 phone_loop_search_step(ps_search_t *search, int frame_idx) 00267 { 00268 phone_loop_search_t *pls = (phone_loop_search_t *)search; 00269 acmod_t *acmod = ps_search_acmod(search); 00270 int16 const *senscr; 00271 int i; 00272 00273 /* All CI senones are active all the time. */ 00274 if (!ps_search_acmod(pls)->compallsen) 00275 for (i = 0; i < pls->n_phones; ++i) 00276 acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]); 00277 00278 /* Calculate senone scores for current frame. */ 00279 senscr = acmod_score(acmod, &frame_idx); 00280 00281 /* Renormalize, if necessary. */ 00282 if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) { 00283 E_INFO("Renormalizing Scores at frame %d, best score %d\n", 00284 frame_idx, pls->best_score); 00285 renormalize_hmms(pls, frame_idx, pls->best_score); 00286 } 00287 00288 /* Evaluate phone HMMs for current frame. */ 00289 pls->best_score = evaluate_hmms(pls, senscr, frame_idx); 00290 00291 /* Prune phone HMMs. */ 00292 prune_hmms(pls, frame_idx); 00293 00294 /* Do phone transitions. */ 00295 phone_transition(pls, frame_idx); 00296 00297 return 0; 00298 } 00299 00300 static int 00301 phone_loop_search_finish(ps_search_t *search) 00302 { 00303 /* Actually nothing to do here really. */ 00304 return 0; 00305 } 00306 00307 static char const * 00308 phone_loop_search_hyp(ps_search_t *search, int32 *out_score) 00309 { 00310 E_WARN("Hypotheses are not returned from phone loop search"); 00311 return NULL; 00312 } 00313 00314 static int32 00315 phone_loop_search_prob(ps_search_t *search) 00316 { 00317 /* FIXME: Actually... they ought to be. */ 00318 E_WARN("Posterior probabilities are not returned from phone loop search"); 00319 return 0; 00320 } 00321 00322 static ps_seg_t * 00323 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score) 00324 { 00325 E_WARN("Hypotheses are not returned from phone loop search"); 00326 return NULL; 00327 }