PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* System headers */ 00039 #include <stdio.h> 00040 #include <stdlib.h> 00041 #include <string.h> 00042 #include <assert.h> 00043 #include <limits.h> 00044 #include <math.h> 00045 #if defined(__ADSPBLACKFIN__) 00046 #elif !defined(_WIN32_WCE) 00047 #include <sys/types.h> 00048 #endif 00049 00050 #ifndef M_PI 00051 #define M_PI 3.14159265358979323846 00052 #endif 00053 00054 /* SphinxBase headers */ 00055 #include <sphinx_config.h> 00056 #include <sphinxbase/cmd_ln.h> 00057 #include <sphinxbase/fixpoint.h> 00058 #include <sphinxbase/ckd_alloc.h> 00059 #include <sphinxbase/bio.h> 00060 #include <sphinxbase/err.h> 00061 #include <sphinxbase/prim_type.h> 00062 00063 /* Local headers */ 00064 #include "s2_semi_mgau.h" 00065 #include "tied_mgau_common.h" 00066 #include "posixwin32.h" 00067 00068 static ps_mgaufuncs_t s2_semi_mgau_funcs = { 00069 "s2_semi", 00070 &s2_semi_mgau_frame_eval, /* frame_eval */ 00071 &s2_semi_mgau_mllr_transform, /* transform */ 00072 &s2_semi_mgau_free /* free */ 00073 }; 00074 00075 struct vqFeature_s { 00076 int32 score; /* score or distance */ 00077 int32 codeword; /* codeword (vector index) */ 00078 }; 00079 00080 static void 00081 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z) 00082 { 00083 int i, ceplen; 00084 vqFeature_t *topn; 00085 00086 topn = s->f[feat]; 00087 ceplen = s->veclen[feat]; 00088 00089 for (i = 0; i < s->max_topn; i++) { 00090 mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */ 00091 vqFeature_t vtmp; 00092 mfcc_t *var, d; 00093 mfcc_t *obs; 00094 int32 cw, j; 00095 00096 cw = topn[i].codeword; 00097 mean = s->means[feat][0] + cw * ceplen; 00098 var = s->vars[feat][0] + cw * ceplen; 00099 d = s->dets[feat][cw]; 00100 obs = z; 00101 for (j = 0; j < ceplen; j++) { 00102 diff = *obs++ - *mean++; 00103 sqdiff = MFCCMUL(diff, diff); 00104 compl = MFCCMUL(sqdiff, *var); 00105 d = GMMSUB(d, compl); 00106 ++var; 00107 } 00108 topn[i].score = (int32)d; 00109 if (i == 0) 00110 continue; 00111 vtmp = topn[i]; 00112 for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) { 00113 topn[j + 1] = topn[j]; 00114 } 00115 topn[j + 1] = vtmp; 00116 } 00117 } 00118 00119 static void 00120 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z) 00121 { 00122 vqFeature_t *worst, *best, *topn; 00123 mfcc_t *mean; 00124 mfcc_t *var, *det, *detP, *detE; 00125 int32 i, ceplen; 00126 00127 best = topn = s->f[feat]; 00128 worst = topn + (s->max_topn - 1); 00129 mean = s->means[feat][0]; 00130 var = s->vars[feat][0]; 00131 det = s->dets[feat]; 00132 detE = det + s->n_density; 00133 ceplen = s->veclen[feat]; 00134 00135 for (detP = det; detP < detE; ++detP) { 00136 mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */ 00137 mfcc_t d; 00138 mfcc_t *obs; 00139 vqFeature_t *cur; 00140 int32 cw, j; 00141 00142 d = *detP; 00143 obs = z; 00144 cw = detP - det; 00145 for (j = 0; (j < ceplen) && (d >= worst->score); ++j) { 00146 diff = *obs++ - *mean++; 00147 sqdiff = MFCCMUL(diff, diff); 00148 compl = MFCCMUL(sqdiff, *var); 00149 d = GMMSUB(d, compl); 00150 ++var; 00151 } 00152 if (j < ceplen) { 00153 /* terminated early, so not in topn */ 00154 mean += (ceplen - j); 00155 var += (ceplen - j); 00156 continue; 00157 } 00158 if ((int32)d < worst->score) 00159 continue; 00160 for (i = 0; i < s->max_topn; i++) { 00161 /* already there, so don't need to insert */ 00162 if (topn[i].codeword == cw) 00163 break; 00164 } 00165 if (i < s->max_topn) 00166 continue; /* already there. Don't insert */ 00167 /* remaining code inserts codeword and dist in correct spot */ 00168 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur) 00169 memcpy(cur + 1, cur, sizeof(vqFeature_t)); 00170 ++cur; 00171 cur->codeword = cw; 00172 cur->score = (int32)d; 00173 } 00174 } 00175 00176 static void 00177 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z) 00178 { 00179 eval_topn(s, feat, z); 00180 00181 /* If this frame is skipped, do nothing else. */ 00182 if (frame % s->ds_ratio) 00183 return; 00184 00185 /* Evaluate the rest of the codebook (or subset thereof). */ 00186 eval_cb(s, feat, z); 00187 } 00188 00189 static int 00190 mgau_norm(s2_semi_mgau_t *s, int feat) 00191 { 00192 int32 norm; 00193 int j; 00194 00195 /* Compute quantized normalizing constant. */ 00196 norm = s->f[feat][0].score >> SENSCR_SHIFT; 00197 00198 /* Normalize the scores, negate them, and clamp their dynamic range. */ 00199 for (j = 0; j < s->max_topn; ++j) { 00200 s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm); 00201 if (s->f[feat][j].score > MAX_NEG_ASCR) 00202 s->f[feat][j].score = MAX_NEG_ASCR; 00203 if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat]) 00204 break; 00205 } 00206 return j; 00207 } 00208 00209 static int32 00210 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i, 00211 int16 *senone_scores, uint8 *senone_active, 00212 int32 n_senone_active) 00213 { 00214 int32 j, l; 00215 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5; 00216 00217 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00218 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00219 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00220 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00221 pid_cw4 = s->mixw[i][s->f[i][4].codeword]; 00222 pid_cw5 = s->mixw[i][s->f[i][5].codeword]; 00223 00224 for (l = j = 0; j < n_senone_active; j++) { 00225 int sen = senone_active[j] + l; 00226 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00227 00228 tmp = fast_logmath_add(s->lmath_8b, tmp, 00229 pid_cw1[sen] + s->f[i][1].score); 00230 tmp = fast_logmath_add(s->lmath_8b, tmp, 00231 pid_cw2[sen] + s->f[i][2].score); 00232 tmp = fast_logmath_add(s->lmath_8b, tmp, 00233 pid_cw3[sen] + s->f[i][3].score); 00234 tmp = fast_logmath_add(s->lmath_8b, tmp, 00235 pid_cw4[sen] + s->f[i][4].score); 00236 tmp = fast_logmath_add(s->lmath_8b, tmp, 00237 pid_cw5[sen] + s->f[i][5].score); 00238 00239 senone_scores[sen] += tmp; 00240 l = sen; 00241 } 00242 return 0; 00243 } 00244 00245 static int32 00246 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i, 00247 int16 *senone_scores, uint8 *senone_active, 00248 int32 n_senone_active) 00249 { 00250 int32 j, l; 00251 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4; 00252 00253 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00254 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00255 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00256 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00257 pid_cw4 = s->mixw[i][s->f[i][4].codeword]; 00258 00259 for (l = j = 0; j < n_senone_active; j++) { 00260 int sen = senone_active[j] + l; 00261 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00262 00263 tmp = fast_logmath_add(s->lmath_8b, tmp, 00264 pid_cw1[sen] + s->f[i][1].score); 00265 tmp = fast_logmath_add(s->lmath_8b, tmp, 00266 pid_cw2[sen] + s->f[i][2].score); 00267 tmp = fast_logmath_add(s->lmath_8b, tmp, 00268 pid_cw3[sen] + s->f[i][3].score); 00269 tmp = fast_logmath_add(s->lmath_8b, tmp, 00270 pid_cw4[sen] + s->f[i][4].score); 00271 00272 senone_scores[sen] += tmp; 00273 l = sen; 00274 } 00275 return 0; 00276 } 00277 00278 static int32 00279 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i, 00280 int16 *senone_scores, uint8 *senone_active, 00281 int32 n_senone_active) 00282 { 00283 int32 j, l; 00284 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3; 00285 00286 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00287 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00288 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00289 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00290 00291 for (l = j = 0; j < n_senone_active; j++) { 00292 int sen = senone_active[j] + l; 00293 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00294 00295 tmp = fast_logmath_add(s->lmath_8b, tmp, 00296 pid_cw1[sen] + s->f[i][1].score); 00297 tmp = fast_logmath_add(s->lmath_8b, tmp, 00298 pid_cw2[sen] + s->f[i][2].score); 00299 tmp = fast_logmath_add(s->lmath_8b, tmp, 00300 pid_cw3[sen] + s->f[i][3].score); 00301 00302 senone_scores[sen] += tmp; 00303 l = sen; 00304 } 00305 return 0; 00306 } 00307 00308 static int32 00309 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i, 00310 int16 *senone_scores, uint8 *senone_active, 00311 int32 n_senone_active) 00312 { 00313 int32 j, l; 00314 uint8 *pid_cw0, *pid_cw1, *pid_cw2; 00315 00316 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00317 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00318 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00319 00320 for (l = j = 0; j < n_senone_active; j++) { 00321 int sen = senone_active[j] + l; 00322 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00323 00324 tmp = fast_logmath_add(s->lmath_8b, tmp, 00325 pid_cw1[sen] + s->f[i][1].score); 00326 tmp = fast_logmath_add(s->lmath_8b, tmp, 00327 pid_cw2[sen] + s->f[i][2].score); 00328 00329 senone_scores[sen] += tmp; 00330 l = sen; 00331 } 00332 return 0; 00333 } 00334 00335 static int32 00336 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i, 00337 int16 *senone_scores, uint8 *senone_active, 00338 int32 n_senone_active) 00339 { 00340 int32 j, l; 00341 uint8 *pid_cw0, *pid_cw1; 00342 00343 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00344 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00345 00346 for (l = j = 0; j < n_senone_active; j++) { 00347 int sen = senone_active[j] + l; 00348 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00349 00350 tmp = fast_logmath_add(s->lmath_8b, tmp, 00351 pid_cw1[sen] + s->f[i][1].score); 00352 00353 senone_scores[sen] += tmp; 00354 l = sen; 00355 } 00356 return 0; 00357 } 00358 00359 static int32 00360 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i, 00361 int16 *senone_scores, uint8 *senone_active, 00362 int32 n_senone_active) 00363 { 00364 int32 j, l; 00365 uint8 *pid_cw0; 00366 00367 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00368 for (l = j = 0; j < n_senone_active; j++) { 00369 int sen = senone_active[j] + l; 00370 int32 tmp = pid_cw0[sen] + s->f[i][0].score; 00371 senone_scores[sen] += tmp; 00372 l = sen; 00373 } 00374 return 0; 00375 } 00376 00377 static int32 00378 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn, 00379 int16 *senone_scores, uint8 *senone_active, 00380 int32 n_senone_active) 00381 { 00382 int32 j, k, l; 00383 00384 for (l = j = 0; j < n_senone_active; j++) { 00385 int sen = senone_active[j] + l; 00386 uint8 *pid_cw; 00387 int32 tmp; 00388 pid_cw = s->mixw[i][s->f[i][0].codeword]; 00389 tmp = pid_cw[sen] + s->f[i][0].score; 00390 for (k = 1; k < topn; ++k) { 00391 pid_cw = s->mixw[i][s->f[i][k].codeword]; 00392 tmp = fast_logmath_add(s->lmath_8b, tmp, 00393 pid_cw[sen] + s->f[i][k].score); 00394 } 00395 senone_scores[sen] += tmp; 00396 l = sen; 00397 } 00398 return 0; 00399 } 00400 00401 static int32 00402 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn, 00403 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active) 00404 { 00405 switch (topn) { 00406 case 6: 00407 return get_scores_8b_feat_6(s, i, senone_scores, 00408 senone_active, n_senone_active); 00409 case 5: 00410 return get_scores_8b_feat_5(s, i, senone_scores, 00411 senone_active, n_senone_active); 00412 case 4: 00413 return get_scores_8b_feat_4(s, i, senone_scores, 00414 senone_active, n_senone_active); 00415 case 3: 00416 return get_scores_8b_feat_3(s, i, senone_scores, 00417 senone_active, n_senone_active); 00418 case 2: 00419 return get_scores_8b_feat_2(s, i, senone_scores, 00420 senone_active, n_senone_active); 00421 case 1: 00422 return get_scores_8b_feat_1(s, i, senone_scores, 00423 senone_active, n_senone_active); 00424 default: 00425 return get_scores_8b_feat_any(s, i, topn, senone_scores, 00426 senone_active, n_senone_active); 00427 } 00428 } 00429 00430 static int32 00431 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores) 00432 { 00433 int32 j, k; 00434 00435 for (j = 0; j < s->n_sen; j++) { 00436 uint8 *pid_cw; 00437 int32 tmp; 00438 pid_cw = s->mixw[i][s->f[i][0].codeword]; 00439 tmp = pid_cw[j] + s->f[i][0].score; 00440 for (k = 1; k < topn; ++k) { 00441 pid_cw = s->mixw[i][s->f[i][k].codeword]; 00442 tmp = fast_logmath_add(s->lmath_8b, tmp, 00443 pid_cw[j] + s->f[i][k].score); 00444 } 00445 senone_scores[j] += tmp; 00446 } 00447 return 0; 00448 } 00449 00450 static int32 00451 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i, 00452 int16 *senone_scores, uint8 *senone_active, 00453 int32 n_senone_active) 00454 { 00455 int32 j, l; 00456 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5; 00457 uint8 w_den[6][16]; 00458 00459 /* Precompute scaled densities. */ 00460 for (j = 0; j < 16; ++j) { 00461 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; 00462 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; 00463 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; 00464 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; 00465 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score; 00466 w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score; 00467 } 00468 00469 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00470 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00471 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00472 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00473 pid_cw4 = s->mixw[i][s->f[i][4].codeword]; 00474 pid_cw5 = s->mixw[i][s->f[i][5].codeword]; 00475 00476 for (l = j = 0; j < n_senone_active; j++) { 00477 int n = senone_active[j] + l; 00478 int tmp, cw; 00479 00480 if (n & 1) { 00481 cw = pid_cw0[n/2] >> 4; 00482 tmp = w_den[0][cw]; 00483 cw = pid_cw1[n/2] >> 4; 00484 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00485 cw = pid_cw2[n/2] >> 4; 00486 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00487 cw = pid_cw3[n/2] >> 4; 00488 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00489 cw = pid_cw4[n/2] >> 4; 00490 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); 00491 cw = pid_cw5[n/2] >> 4; 00492 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]); 00493 } 00494 else { 00495 cw = pid_cw0[n/2] & 0x0f; 00496 tmp = w_den[0][cw]; 00497 cw = pid_cw1[n/2] & 0x0f; 00498 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00499 cw = pid_cw2[n/2] & 0x0f; 00500 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00501 cw = pid_cw3[n/2] & 0x0f; 00502 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00503 cw = pid_cw4[n/2] & 0x0f; 00504 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); 00505 cw = pid_cw5[n/2] & 0x0f; 00506 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]); 00507 } 00508 senone_scores[n] += tmp; 00509 l = n; 00510 } 00511 return 0; 00512 } 00513 00514 static int32 00515 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i, 00516 int16 *senone_scores, uint8 *senone_active, 00517 int32 n_senone_active) 00518 { 00519 int32 j, l; 00520 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4; 00521 uint8 w_den[5][16]; 00522 00523 /* Precompute scaled densities. */ 00524 for (j = 0; j < 16; ++j) { 00525 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; 00526 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; 00527 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; 00528 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; 00529 w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score; 00530 } 00531 00532 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00533 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00534 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00535 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00536 pid_cw4 = s->mixw[i][s->f[i][4].codeword]; 00537 00538 for (l = j = 0; j < n_senone_active; j++) { 00539 int n = senone_active[j] + l; 00540 int tmp, cw; 00541 00542 if (n & 1) { 00543 cw = pid_cw0[n/2] >> 4; 00544 tmp = w_den[0][cw]; 00545 cw = pid_cw1[n/2] >> 4; 00546 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00547 cw = pid_cw2[n/2] >> 4; 00548 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00549 cw = pid_cw3[n/2] >> 4; 00550 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00551 cw = pid_cw4[n/2] >> 4; 00552 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); 00553 } 00554 else { 00555 cw = pid_cw0[n/2] & 0x0f; 00556 tmp = w_den[0][cw]; 00557 cw = pid_cw1[n/2] & 0x0f; 00558 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00559 cw = pid_cw2[n/2] & 0x0f; 00560 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00561 cw = pid_cw3[n/2] & 0x0f; 00562 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00563 cw = pid_cw4[n/2] & 0x0f; 00564 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); 00565 } 00566 senone_scores[n] += tmp; 00567 l = n; 00568 } 00569 return 0; 00570 } 00571 00572 static int32 00573 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i, 00574 int16 *senone_scores, uint8 *senone_active, 00575 int32 n_senone_active) 00576 { 00577 int32 j, l; 00578 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3; 00579 uint8 w_den[4][16]; 00580 00581 /* Precompute scaled densities. */ 00582 for (j = 0; j < 16; ++j) { 00583 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; 00584 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; 00585 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; 00586 w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; 00587 } 00588 00589 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00590 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00591 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00592 pid_cw3 = s->mixw[i][s->f[i][3].codeword]; 00593 00594 for (l = j = 0; j < n_senone_active; j++) { 00595 int n = senone_active[j] + l; 00596 int tmp, cw; 00597 00598 if (n & 1) { 00599 cw = pid_cw0[n/2] >> 4; 00600 tmp = w_den[0][cw]; 00601 cw = pid_cw1[n/2] >> 4; 00602 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00603 cw = pid_cw2[n/2] >> 4; 00604 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00605 cw = pid_cw3[n/2] >> 4; 00606 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00607 } 00608 else { 00609 cw = pid_cw0[n/2] & 0x0f; 00610 tmp = w_den[0][cw]; 00611 cw = pid_cw1[n/2] & 0x0f; 00612 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00613 cw = pid_cw2[n/2] & 0x0f; 00614 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00615 cw = pid_cw3[n/2] & 0x0f; 00616 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); 00617 } 00618 senone_scores[n] += tmp; 00619 l = n; 00620 } 00621 return 0; 00622 } 00623 00624 static int32 00625 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i, 00626 int16 *senone_scores, uint8 *senone_active, 00627 int32 n_senone_active) 00628 { 00629 int32 j, l; 00630 uint8 *pid_cw0, *pid_cw1, *pid_cw2; 00631 uint8 w_den[3][16]; 00632 00633 /* Precompute scaled densities. */ 00634 for (j = 0; j < 16; ++j) { 00635 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; 00636 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; 00637 w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; 00638 } 00639 00640 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00641 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00642 pid_cw2 = s->mixw[i][s->f[i][2].codeword]; 00643 00644 for (l = j = 0; j < n_senone_active; j++) { 00645 int n = senone_active[j] + l; 00646 int tmp, cw; 00647 00648 if (n & 1) { 00649 cw = pid_cw0[n/2] >> 4; 00650 tmp = w_den[0][cw]; 00651 cw = pid_cw1[n/2] >> 4; 00652 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00653 cw = pid_cw2[n/2] >> 4; 00654 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00655 } 00656 else { 00657 cw = pid_cw0[n/2] & 0x0f; 00658 tmp = w_den[0][cw]; 00659 cw = pid_cw1[n/2] & 0x0f; 00660 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00661 cw = pid_cw2[n/2] & 0x0f; 00662 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); 00663 } 00664 senone_scores[n] += tmp; 00665 l = n; 00666 } 00667 return 0; 00668 } 00669 00670 static int32 00671 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i, 00672 int16 *senone_scores, uint8 *senone_active, 00673 int32 n_senone_active) 00674 { 00675 int32 j, l; 00676 uint8 *pid_cw0, *pid_cw1; 00677 uint8 w_den[2][16]; 00678 00679 /* Precompute scaled densities. */ 00680 for (j = 0; j < 16; ++j) { 00681 w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; 00682 w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; 00683 } 00684 00685 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00686 pid_cw1 = s->mixw[i][s->f[i][1].codeword]; 00687 00688 for (l = j = 0; j < n_senone_active; j++) { 00689 int n = senone_active[j] + l; 00690 int tmp, cw; 00691 00692 if (n & 1) { 00693 cw = pid_cw0[n/2] >> 4; 00694 tmp = w_den[0][cw]; 00695 cw = pid_cw1[n/2] >> 4; 00696 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00697 } 00698 else { 00699 cw = pid_cw0[n/2] & 0x0f; 00700 tmp = w_den[0][cw]; 00701 cw = pid_cw1[n/2] & 0x0f; 00702 tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); 00703 } 00704 senone_scores[n] += tmp; 00705 l = n; 00706 } 00707 return 0; 00708 } 00709 00710 static int32 00711 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i, 00712 int16 *senone_scores, uint8 *senone_active, 00713 int32 n_senone_active) 00714 { 00715 int32 j, l; 00716 uint8 *pid_cw0; 00717 uint8 w_den[16]; 00718 00719 /* Precompute scaled densities. */ 00720 for (j = 0; j < 16; ++j) { 00721 w_den[j] = s->mixw_cb[j] + s->f[i][0].score; 00722 } 00723 00724 pid_cw0 = s->mixw[i][s->f[i][0].codeword]; 00725 00726 for (l = j = 0; j < n_senone_active; j++) { 00727 int n = senone_active[j] + l; 00728 int tmp, cw; 00729 00730 if (n & 1) { 00731 cw = pid_cw0[n/2] >> 4; 00732 tmp = w_den[cw]; 00733 } 00734 else { 00735 cw = pid_cw0[n/2] & 0x0f; 00736 tmp = w_den[cw]; 00737 } 00738 senone_scores[n] += tmp; 00739 l = n; 00740 } 00741 return 0; 00742 } 00743 00744 static int32 00745 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn, 00746 int16 *senone_scores, uint8 *senone_active, 00747 int32 n_senone_active) 00748 { 00749 int32 j, k, l; 00750 00751 for (l = j = 0; j < n_senone_active; j++) { 00752 int n = senone_active[j] + l; 00753 int tmp, cw; 00754 uint8 *pid_cw; 00755 00756 pid_cw = s->mixw[i][s->f[i][0].codeword]; 00757 if (n & 1) 00758 cw = pid_cw[n/2] >> 4; 00759 else 00760 cw = pid_cw[n/2] & 0x0f; 00761 tmp = s->mixw_cb[cw] + s->f[i][0].score; 00762 for (k = 1; k < topn; ++k) { 00763 pid_cw = s->mixw[i][s->f[i][k].codeword]; 00764 if (n & 1) 00765 cw = pid_cw[n/2] >> 4; 00766 else 00767 cw = pid_cw[n/2] & 0x0f; 00768 tmp = fast_logmath_add(s->lmath_8b, tmp, 00769 s->mixw_cb[cw] + s->f[i][k].score); 00770 } 00771 senone_scores[n] += tmp; 00772 l = n; 00773 } 00774 return 0; 00775 } 00776 00777 static int32 00778 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn, 00779 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active) 00780 { 00781 switch (topn) { 00782 case 6: 00783 return get_scores_4b_feat_6(s, i, senone_scores, 00784 senone_active, n_senone_active); 00785 case 5: 00786 return get_scores_4b_feat_5(s, i, senone_scores, 00787 senone_active, n_senone_active); 00788 case 4: 00789 return get_scores_4b_feat_4(s, i, senone_scores, 00790 senone_active, n_senone_active); 00791 case 3: 00792 return get_scores_4b_feat_3(s, i, senone_scores, 00793 senone_active, n_senone_active); 00794 case 2: 00795 return get_scores_4b_feat_2(s, i, senone_scores, 00796 senone_active, n_senone_active); 00797 case 1: 00798 return get_scores_4b_feat_1(s, i, senone_scores, 00799 senone_active, n_senone_active); 00800 default: 00801 return get_scores_4b_feat_any(s, i, topn, senone_scores, 00802 senone_active, n_senone_active); 00803 } 00804 } 00805 00806 static int32 00807 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores) 00808 { 00809 int j, last_sen; 00810 00811 j = 0; 00812 /* Number of senones is always even, but don't overrun if it isn't. */ 00813 last_sen = s->n_sen & ~1; 00814 while (j < last_sen) { 00815 uint8 *pid_cw; 00816 int32 tmp0, tmp1; 00817 int k; 00818 00819 pid_cw = s->mixw[i][s->f[i][0].codeword]; 00820 tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score; 00821 tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score; 00822 for (k = 1; k < topn; ++k) { 00823 int32 w_den0, w_den1; 00824 00825 pid_cw = s->mixw[i][s->f[i][k].codeword]; 00826 w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score; 00827 w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score; 00828 tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0); 00829 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1); 00830 } 00831 senone_scores[j++] += tmp0; 00832 senone_scores[j++] += tmp1; 00833 } 00834 return 0; 00835 } 00836 00837 /* 00838 * Compute senone scores for the active senones. 00839 */ 00840 int32 00841 s2_semi_mgau_frame_eval(ps_mgau_t *ps, 00842 int16 *senone_scores, 00843 uint8 *senone_active, 00844 int32 n_senone_active, 00845 mfcc_t ** featbuf, int32 frame, 00846 int32 compallsen) 00847 { 00848 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; 00849 int i, topn_idx; 00850 00851 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores)); 00852 /* No bounds checking is done here, which just means you'll get 00853 * semi-random crap if you request a frame in the future or one 00854 * that's too far in the past. */ 00855 topn_idx = frame % s->n_topn_hist; 00856 s->f = s->topn_hist[topn_idx]; 00857 for (i = 0; i < s->n_feat; ++i) { 00858 /* For past frames this will already be computed. */ 00859 if (frame >= ps_mgau_base(ps)->frame_idx) { 00860 vqFeature_t **lastf; 00861 if (topn_idx == 0) 00862 lastf = s->topn_hist[s->n_topn_hist-1]; 00863 else 00864 lastf = s->topn_hist[topn_idx-1]; 00865 memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn); 00866 mgau_dist(s, frame, i, featbuf[i]); 00867 s->topn_hist_n[topn_idx][i] = mgau_norm(s, i); 00868 } 00869 if (s->mixw_cb) { 00870 if (compallsen) 00871 get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores); 00872 else 00873 get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores, 00874 senone_active, n_senone_active); 00875 } 00876 else { 00877 if (compallsen) 00878 get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores); 00879 else 00880 get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores, 00881 senone_active, n_senone_active); 00882 } 00883 } 00884 00885 return 0; 00886 } 00887 00888 static int32 00889 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file) 00890 { 00891 FILE *fp; 00892 char line[1000]; 00893 int32 i, n, r, c; 00894 int32 do_swap, do_mmap; 00895 size_t filesize, offset; 00896 int n_clust = 0; 00897 int n_feat = s->n_feat; 00898 int n_density = s->n_density; 00899 int n_sen = bin_mdef_n_sen(mdef); 00900 int n_bits = 8; 00901 00902 s->n_sen = n_sen; /* FIXME: Should have been done earlier */ 00903 do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); 00904 00905 if ((fp = fopen(file, "rb")) == NULL) 00906 return -1; 00907 00908 E_INFO("Loading senones from dump file %s\n", file); 00909 /* Read title size, title */ 00910 if (fread(&n, sizeof(int32), 1, fp) != 1) { 00911 E_ERROR_SYSTEM("Failed to read title size from %s", file); 00912 goto error_out; 00913 } 00914 /* This is extremely bogus */ 00915 do_swap = 0; 00916 if (n < 1 || n > 999) { 00917 SWAP_INT32(&n); 00918 if (n < 1 || n > 999) { 00919 E_ERROR("Title length %x in dump file %s out of range\n", n, file); 00920 goto error_out; 00921 } 00922 do_swap = 1; 00923 } 00924 if (fread(line, sizeof(char), n, fp) != n) { 00925 E_ERROR_SYSTEM("Cannot read title"); 00926 goto error_out; 00927 } 00928 if (line[n - 1] != '\0') { 00929 E_ERROR("Bad title in dump file\n"); 00930 goto error_out; 00931 } 00932 E_INFO("%s\n", line); 00933 00934 /* Read header size, header */ 00935 if (fread(&n, sizeof(n), 1, fp) != 1) { 00936 E_ERROR_SYSTEM("Failed to read header size from %s", file); 00937 goto error_out; 00938 } 00939 if (do_swap) SWAP_INT32(&n); 00940 if (fread(line, sizeof(char), n, fp) != n) { 00941 E_ERROR_SYSTEM("Cannot read header"); 00942 goto error_out; 00943 } 00944 if (line[n - 1] != '\0') { 00945 E_ERROR("Bad header in dump file\n"); 00946 goto error_out; 00947 } 00948 00949 /* Read other header strings until string length = 0 */ 00950 for (;;) { 00951 if (fread(&n, sizeof(n), 1, fp) != 1) { 00952 E_ERROR_SYSTEM("Failed to read header string size from %s", file); 00953 goto error_out; 00954 } 00955 if (do_swap) SWAP_INT32(&n); 00956 if (n == 0) 00957 break; 00958 if (fread(line, sizeof(char), n, fp) != n) { 00959 E_ERROR_SYSTEM("Cannot read header"); 00960 goto error_out; 00961 } 00962 /* Look for a cluster count, if present */ 00963 if (!strncmp(line, "feature_count ", strlen("feature_count "))) { 00964 n_feat = atoi(line + strlen("feature_count ")); 00965 } 00966 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) { 00967 n_density = atoi(line + strlen("mixture_count ")); 00968 } 00969 if (!strncmp(line, "model_count ", strlen("model_count "))) { 00970 n_sen = atoi(line + strlen("model_count ")); 00971 } 00972 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { 00973 n_clust = atoi(line + strlen("cluster_count ")); 00974 } 00975 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) { 00976 n_bits = atoi(line + strlen("cluster_bits ")); 00977 } 00978 } 00979 00980 /* Defaults for #rows, #columns in mixw array. */ 00981 c = n_sen; 00982 r = n_density; 00983 if (n_clust == 0) { 00984 /* Older mixw files have them here, and they might be padded. */ 00985 if (fread(&r, sizeof(r), 1, fp) != 1) { 00986 E_ERROR_SYSTEM("Cannot read #rows"); 00987 goto error_out; 00988 } 00989 if (do_swap) SWAP_INT32(&r); 00990 if (fread(&c, sizeof(c), 1, fp) != 1) { 00991 E_ERROR_SYSTEM("Cannot read #columns"); 00992 goto error_out; 00993 } 00994 if (do_swap) SWAP_INT32(&c); 00995 E_INFO("Rows: %d, Columns: %d\n", r, c); 00996 } 00997 00998 if (n_feat != s->n_feat) { 00999 E_ERROR("Number of feature streams mismatch: %d != %d\n", 01000 n_feat, s->n_feat); 01001 goto error_out; 01002 } 01003 if (n_density != s->n_density) { 01004 E_ERROR("Number of densities mismatch: %d != %d\n", 01005 n_density, s->n_density); 01006 goto error_out; 01007 } 01008 if (n_sen != s->n_sen) { 01009 E_ERROR("Number of senones mismatch: %d != %d\n", 01010 n_sen, s->n_sen); 01011 goto error_out; 01012 } 01013 01014 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) { 01015 E_ERROR("Cluster count must be 0, 15, or 16\n"); 01016 goto error_out; 01017 } 01018 if (n_clust == 15) 01019 ++n_clust; 01020 01021 if (!((n_bits == 8) || (n_bits == 4))) { 01022 E_ERROR("Cluster count must be 4 or 8\n"); 01023 goto error_out; 01024 } 01025 01026 if (do_mmap) { 01027 E_INFO("Using memory-mapped I/O for senones\n"); 01028 } 01029 offset = ftell(fp); 01030 fseek(fp, 0, SEEK_END); 01031 filesize = ftell(fp); 01032 fseek(fp, offset, SEEK_SET); 01033 01034 /* Allocate memory for pdfs (or memory map them) */ 01035 if (do_mmap) { 01036 s->sendump_mmap = mmio_file_read(file); 01037 /* Get cluster codebook if any. */ 01038 if (n_clust) { 01039 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 01040 offset += n_clust; 01041 } 01042 } 01043 else { 01044 /* Get cluster codebook if any. */ 01045 if (n_clust) { 01046 s->mixw_cb = ckd_calloc(1, n_clust); 01047 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) { 01048 E_ERROR("Failed to read %d bytes from sendump\n", n_clust); 01049 goto error_out; 01050 } 01051 } 01052 } 01053 01054 /* Set up pointers, or read, or whatever */ 01055 if (s->sendump_mmap) { 01056 s->mixw = ckd_calloc_2d(s->n_feat, n_density, sizeof(*s->mixw)); 01057 for (n = 0; n < n_feat; n++) { 01058 int step = c; 01059 if (n_bits == 4) 01060 step = (step + 1) / 2; 01061 for (i = 0; i < r; i++) { 01062 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 01063 offset += step; 01064 } 01065 } 01066 } 01067 else { 01068 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw)); 01069 /* Read pdf values and ids */ 01070 for (n = 0; n < n_feat; n++) { 01071 int step = c; 01072 if (n_bits == 4) 01073 step = (step + 1) / 2; 01074 for (i = 0; i < r; i++) { 01075 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp) 01076 != (size_t) step) { 01077 E_ERROR("Failed to read %d bytes from sendump\n", step); 01078 goto error_out; 01079 } 01080 } 01081 } 01082 } 01083 01084 fclose(fp); 01085 return 0; 01086 error_out: 01087 fclose(fp); 01088 return -1; 01089 } 01090 01091 static int32 01092 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin) 01093 { 01094 char **argname, **argval; 01095 char eofchk; 01096 FILE *fp; 01097 int32 byteswap, chksum_present; 01098 uint32 chksum; 01099 float32 *pdf; 01100 int32 i, f, c, n; 01101 int32 n_sen; 01102 int32 n_feat; 01103 int32 n_comp; 01104 int32 n_err; 01105 01106 E_INFO("Reading mixture weights file '%s'\n", file_name); 01107 01108 if ((fp = fopen(file_name, "rb")) == NULL) 01109 E_FATAL("Failed to open mixture weights file '%s' for reading: %s\n", file_name, strerror(errno)); 01110 01111 /* Read header, including argument-value info and 32-bit byteorder magic */ 01112 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 01113 E_FATAL("Failed to read header from file '%s'\n", file_name); 01114 01115 /* Parse argument-value list */ 01116 chksum_present = 0; 01117 for (i = 0; argname[i]; i++) { 01118 if (strcmp(argname[i], "version") == 0) { 01119 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) 01120 E_WARN("Version mismatch(%s): %s, expecting %s\n", 01121 file_name, argval[i], MGAU_MIXW_VERSION); 01122 } 01123 else if (strcmp(argname[i], "chksum0") == 0) { 01124 chksum_present = 1; /* Ignore the associated value */ 01125 } 01126 } 01127 bio_hdrarg_free(argname, argval); 01128 argname = argval = NULL; 01129 01130 chksum = 0; 01131 01132 /* Read #senones, #features, #codewords, arraysize */ 01133 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) 01134 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 01135 1) 01136 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 01137 1) 01138 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 01139 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); 01140 } 01141 if (n_feat != s->n_feat) 01142 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); 01143 if (n != n_sen * n_feat * n_comp) { 01144 E_FATAL 01145 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", 01146 file_name, i, n_sen, n_feat, n_comp); 01147 } 01148 01149 /* n_sen = number of mixture weights per codeword, which is 01150 * fixed at the number of senones since we have only one codebook. 01151 */ 01152 s->n_sen = n_sen; 01153 01154 /* Quantized mixture weight arrays. */ 01155 s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw)); 01156 01157 /* Temporary structure to read in floats before conversion to (int32) logs3 */ 01158 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); 01159 01160 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ 01161 n_err = 0; 01162 for (i = 0; i < n_sen; i++) { 01163 for (f = 0; f < n_feat; f++) { 01164 if (bio_fread((void *) pdf, sizeof(float32), 01165 n_comp, fp, byteswap, &chksum) != n_comp) { 01166 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); 01167 } 01168 01169 /* Normalize and floor */ 01170 if (vector_sum_norm(pdf, n_comp) <= 0.0) 01171 n_err++; 01172 vector_floor(pdf, n_comp, SmoothMin); 01173 vector_sum_norm(pdf, n_comp); 01174 01175 /* Convert to LOG, quantize, and transpose */ 01176 for (c = 0; c < n_comp; c++) { 01177 int32 qscr; 01178 01179 qscr = -logmath_log(s->lmath_8b, pdf[c]); 01180 if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) 01181 qscr = MAX_NEG_MIXW; 01182 s->mixw[f][c][i] = qscr; 01183 } 01184 } 01185 } 01186 if (n_err > 0) 01187 E_WARN("Weight normalization failed for %d senones\n", n_err); 01188 01189 ckd_free(pdf); 01190 01191 if (chksum_present) 01192 bio_verify_chksum(fp, byteswap, chksum); 01193 01194 if (fread(&eofchk, 1, 1, fp) == 1) 01195 E_FATAL("More data than expected in %s\n", file_name); 01196 01197 fclose(fp); 01198 01199 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); 01200 return n_sen; 01201 } 01202 01203 01204 static int 01205 split_topn(char const *str, uint8 *out, int nfeat) 01206 { 01207 char *topn_list = ckd_salloc(str); 01208 char *c, *cc; 01209 int i, maxn; 01210 01211 c = topn_list; 01212 i = 0; 01213 maxn = 0; 01214 while (i < nfeat && (cc = strchr(c, ',')) != NULL) { 01215 *cc = '\0'; 01216 out[i] = atoi(c); 01217 if (out[i] > maxn) maxn = out[i]; 01218 c = cc + 1; 01219 ++i; 01220 } 01221 if (i < nfeat && *c != '\0') { 01222 out[i] = atoi(c); 01223 if (out[i] > maxn) maxn = out[i]; 01224 ++i; 01225 } 01226 while (i < nfeat) 01227 out[i++] = maxn; 01228 01229 ckd_free(topn_list); 01230 return maxn; 01231 } 01232 01233 01234 ps_mgau_t * 01235 s2_semi_mgau_init(acmod_t *acmod) 01236 { 01237 s2_semi_mgau_t *s; 01238 ps_mgau_t *ps; 01239 char const *sendump_path; 01240 int i; 01241 01242 s = ckd_calloc(1, sizeof(*s)); 01243 s->config = acmod->config; 01244 01245 s->lmath = logmath_retain(acmod->lmath); 01246 /* Log-add table. */ 01247 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE); 01248 if (s->lmath_8b == NULL) 01249 goto error_out; 01250 /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ 01251 if (logmath_get_width(s->lmath_8b) != 1) { 01252 E_ERROR("Log base %f is too small to represent add table in 8 bits\n", 01253 logmath_get_base(s->lmath_8b)); 01254 goto error_out; 01255 } 01256 01257 /* Read means and variances. */ 01258 if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"), 01259 cmd_ln_str_r(s->config, "-var"), 01260 cmd_ln_float32_r(s->config, "-varfloor"), 01261 s->lmath)) == NULL) 01262 goto error_out; 01263 /* Currently only a single codebook is supported. */ 01264 if (s->g->n_mgau != 1) 01265 goto error_out; 01266 /* FIXME: maintaining pointers for convenience for now */ 01267 s->means = s->g->mean[0]; 01268 s->vars = s->g->var[0]; 01269 s->dets = s->g->det[0]; 01270 s->veclen = s->g->featlen; 01271 /* Verify n_feat and veclen, against acmod. */ 01272 s->n_feat = s->g->n_feat; 01273 if (s->n_feat != feat_dimension1(acmod->fcb)) { 01274 E_ERROR("Number of streams does not match: %d != %d\n", 01275 s->n_feat, feat_dimension(acmod->fcb)); 01276 goto error_out; 01277 } 01278 for (i = 0; i < s->n_feat; ++i) { 01279 if (s->veclen[i] != feat_dimension2(acmod->fcb, i)) { 01280 E_ERROR("Dimension of stream %d does not match: %d != %d\n", 01281 s->veclen[i], feat_dimension2(acmod->fcb, i)); 01282 goto error_out; 01283 } 01284 } 01285 s->n_density = s->g->n_density; 01286 /* Read mixture weights */ 01287 if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) { 01288 if (read_sendump(s, acmod->mdef, sendump_path) < 0) { 01289 goto error_out; 01290 } 01291 } 01292 else { 01293 if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), 01294 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) { 01295 goto error_out; 01296 } 01297 } 01298 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); 01299 01300 /* Determine top-N for each feature */ 01301 s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam)); 01302 s->max_topn = cmd_ln_int32_r(s->config, "-topn"); 01303 split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat); 01304 E_INFO("Maximum top-N: %d ", s->max_topn); 01305 E_INFOCONT("Top-N beams:"); 01306 for (i = 0; i < s->n_feat; ++i) { 01307 E_INFOCONT(" %d", s->topn_beam[i]); 01308 } 01309 E_INFOCONT("\n"); 01310 01311 /* Top-N scores from recent frames */ 01312 s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; 01313 s->topn_hist = (vqFeature_t ***) 01314 ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn, 01315 sizeof(***s->topn_hist)); 01316 s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat, 01317 sizeof(**s->topn_hist_n)); 01318 for (i = 0; i < s->n_topn_hist; ++i) { 01319 int j; 01320 for (j = 0; j < s->n_feat; ++j) { 01321 int k; 01322 for (k = 0; k < s->max_topn; ++k) { 01323 s->topn_hist[i][j][k].score = WORST_DIST; 01324 s->topn_hist[i][j][k].codeword = k; 01325 } 01326 } 01327 } 01328 01329 ps = (ps_mgau_t *)s; 01330 ps->vt = &s2_semi_mgau_funcs; 01331 return ps; 01332 error_out: 01333 s2_semi_mgau_free(ps_mgau_base(s)); 01334 return NULL; 01335 } 01336 01337 int 01338 s2_semi_mgau_mllr_transform(ps_mgau_t *ps, 01339 ps_mllr_t *mllr) 01340 { 01341 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; 01342 return gauden_mllr_transform(s->g, mllr, s->config); 01343 } 01344 01345 void 01346 s2_semi_mgau_free(ps_mgau_t *ps) 01347 { 01348 s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; 01349 01350 logmath_free(s->lmath); 01351 logmath_free(s->lmath_8b); 01352 if (s->sendump_mmap) { 01353 ckd_free_2d(s->mixw); 01354 mmio_file_unmap(s->sendump_mmap); 01355 } 01356 else { 01357 ckd_free_3d(s->mixw); 01358 } 01359 gauden_free(s->g); 01360 ckd_free(s->topn_beam); 01361 ckd_free_2d(s->topn_hist_n); 01362 ckd_free_3d((void **)s->topn_hist); 01363 ckd_free(s); 01364 }