PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * ms_mgau.c -- Essentially a wrapper that wrap up gauden and 00039 * senone. It supports multi-stream. 00040 * 00041 * 00042 * ********************************************** 00043 * CMU ARPA Speech Project 00044 * 00045 * Copyright (c) 1997 Carnegie Mellon University. 00046 * ALL RIGHTS RESERVED. 00047 * ********************************************** 00048 * HISTORY 00049 * $Log$ 00050 * Revision 1.2 2006/02/22 16:56:01 arthchan2003 00051 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone 00052 * 00053 * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003 00054 * Added a flag to turn on and off precomputation. 00055 * 00056 * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu 00057 * Add memory deallocation functions. Also move all the initialization 00058 * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it 00059 * from decode_anytopo and friends. 00060 * 00061 * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003 00062 * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc. 00063 * 00064 * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003 00065 * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone. 00066 * 00067 * 00068 * 00069 */ 00070 00071 /* Local headers. */ 00072 #include "ms_mgau.h" 00073 00074 static ps_mgaufuncs_t ms_mgau_funcs = { 00075 "ms", 00076 &ms_cont_mgau_frame_eval, /* frame_eval */ 00077 &ms_mgau_mllr_transform, /* transform */ 00078 &ms_mgau_free /* free */ 00079 }; 00080 00081 ps_mgau_t * 00082 ms_mgau_init(cmd_ln_t *config, logmath_t *lmath, bin_mdef_t *mdef) 00083 { 00084 /* Codebooks */ 00085 ms_mgau_model_t *msg; 00086 ps_mgau_t *mg; 00087 gauden_t *g; 00088 senone_t *s; 00089 00090 msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t)); 00091 msg->config = config; 00092 msg->g = NULL; 00093 msg->s = NULL; 00094 00095 g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"), 00096 cmd_ln_str_r(config, "-var"), 00097 cmd_ln_float32_r(config, "-varfloor"), 00098 lmath); 00099 s = msg->s = senone_init(msg->g, 00100 cmd_ln_str_r(config, "-mixw"), 00101 cmd_ln_str_r(config, "-senmgau"), 00102 cmd_ln_float32_r(config, "-mixwfloor"), 00103 lmath, mdef); 00104 00105 s->aw = cmd_ln_int32_r(config, "-aw"); 00106 00107 /* Verify senone parameters against gauden parameters */ 00108 if (s->n_feat != g->n_feat) 00109 E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, 00110 s->n_feat); 00111 if (s->n_cw != g->n_density) 00112 E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", 00113 g->n_density, s->n_cw); 00114 if (s->n_gauden > g->n_mgau) 00115 E_FATAL("Senones need more codebooks (%d) than present (%d)\n", 00116 s->n_gauden, g->n_mgau); 00117 if (s->n_gauden < g->n_mgau) 00118 E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", 00119 s->n_gauden, g->n_mgau); 00120 00121 msg->topn = cmd_ln_int32_r(config, "-topn"); 00122 E_INFO("The value of topn: %d\n", msg->topn); 00123 if (msg->topn == 0 || msg->topn > msg->g->n_density) { 00124 E_WARN 00125 ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n", 00126 msg->topn, msg->g->n_density); 00127 msg->topn = msg->g->n_density; 00128 } 00129 00130 msg->dist = (gauden_dist_t ***) 00131 ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn, 00132 sizeof(gauden_dist_t)); 00133 msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8)); 00134 00135 mg = (ps_mgau_t *)msg; 00136 mg->vt = &ms_mgau_funcs; 00137 return mg; 00138 } 00139 00140 void 00141 ms_mgau_free(ps_mgau_t * mg) 00142 { 00143 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg; 00144 if (msg == NULL) 00145 return; 00146 00147 gauden_free(msg->g); 00148 senone_free(msg->s); 00149 ckd_free_3d((void *) msg->dist); 00150 ckd_free(msg->mgau_active); 00151 ckd_free(msg); 00152 } 00153 00154 int 00155 ms_mgau_mllr_transform(ps_mgau_t *s, 00156 ps_mllr_t *mllr) 00157 { 00158 ms_mgau_model_t *msg = (ms_mgau_model_t *)s; 00159 return gauden_mllr_transform(msg->g, mllr, msg->config); 00160 } 00161 00162 int32 00163 ms_cont_mgau_frame_eval(ps_mgau_t * mg, 00164 int16 *senscr, 00165 uint8 *senone_active, 00166 int32 n_senone_active, 00167 mfcc_t ** feat, 00168 int32 frame, 00169 int32 compallsen) 00170 { 00171 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg; 00172 int32 gid; 00173 int32 topn; 00174 int32 best; 00175 gauden_t *g; 00176 senone_t *sen; 00177 00178 topn = ms_mgau_topn(msg); 00179 g = ms_mgau_gauden(msg); 00180 sen = ms_mgau_senone(msg); 00181 00182 if (compallsen) { 00183 int32 s; 00184 00185 for (gid = 0; gid < g->n_mgau; gid++) 00186 gauden_dist(g, gid, topn, feat, msg->dist[gid]); 00187 00188 best = (int32) 0x7fffffff; 00189 for (s = 0; s < sen->n_sen; s++) { 00190 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn); 00191 if (best > senscr[s]) { 00192 best = senscr[s]; 00193 } 00194 } 00195 00196 /* Normalize senone scores */ 00197 for (s = 0; s < sen->n_sen; s++) { 00198 int32 bs = senscr[s] - best; 00199 if (bs > 32767) 00200 bs = 32767; 00201 if (bs < -32768) 00202 bs = -32768; 00203 senscr[s] = bs; 00204 } 00205 } 00206 else { 00207 int32 i, n; 00208 /* Flag all active mixture-gaussian codebooks */ 00209 for (gid = 0; gid < g->n_mgau; gid++) 00210 msg->mgau_active[gid] = 0; 00211 00212 n = 0; 00213 for (i = 0; i < n_senone_active; i++) { 00214 /* senone_active consists of deltas. */ 00215 int32 s = senone_active[i] + n; 00216 msg->mgau_active[sen->mgau[s]] = 1; 00217 n = s; 00218 } 00219 00220 /* Compute topn gaussian density values (for active codebooks) */ 00221 for (gid = 0; gid < g->n_mgau; gid++) { 00222 if (msg->mgau_active[gid]) 00223 gauden_dist(g, gid, topn, feat, msg->dist[gid]); 00224 } 00225 00226 best = (int32) 0x7fffffff; 00227 n = 0; 00228 for (i = 0; i < n_senone_active; i++) { 00229 int32 s = senone_active[i] + n; 00230 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn); 00231 if (best > senscr[s]) { 00232 best = senscr[s]; 00233 } 00234 n = s; 00235 } 00236 00237 /* Normalize senone scores */ 00238 n = 0; 00239 for (i = 0; i < n_senone_active; i++) { 00240 int32 s = senone_active[i] + n; 00241 int32 bs = senscr[s] - best; 00242 if (bs > 32767) 00243 bs = 32767; 00244 if (bs < -32768) 00245 bs = -32768; 00246 senscr[s] = bs; 00247 n = s; 00248 } 00249 } 00250 00251 return 0; 00252 }