PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2010 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* System headers */ 00039 #include <stdio.h> 00040 #include <stdlib.h> 00041 #include <string.h> 00042 #include <assert.h> 00043 #include <limits.h> 00044 #include <math.h> 00045 #if defined(__ADSPBLACKFIN__) 00046 #elif !defined(_WIN32_WCE) 00047 #include <sys/types.h> 00048 #endif 00049 00050 #ifndef M_PI 00051 #define M_PI 3.14159265358979323846 00052 #endif 00053 00054 /* SphinxBase headers */ 00055 #include <sphinx_config.h> 00056 #include <sphinxbase/cmd_ln.h> 00057 #include <sphinxbase/fixpoint.h> 00058 #include <sphinxbase/ckd_alloc.h> 00059 #include <sphinxbase/bio.h> 00060 #include <sphinxbase/err.h> 00061 #include <sphinxbase/prim_type.h> 00062 00063 /* Local headers */ 00064 #include "tied_mgau_common.h" 00065 #include "ptm_mgau.h" 00066 #include "posixwin32.h" 00067 00068 static ps_mgaufuncs_t ptm_mgau_funcs = { 00069 "ptm", 00070 &ptm_mgau_frame_eval, /* frame_eval */ 00071 &ptm_mgau_mllr_transform, /* transform */ 00072 &ptm_mgau_free /* free */ 00073 }; 00074 00075 #define COMPUTE_GMM_MAP(_idx) \ 00076 diff[_idx] = obs[_idx] - mean[_idx]; \ 00077 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \ 00078 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]); 00079 #define COMPUTE_GMM_REDUCE(_idx) \ 00080 d = GMMSUB(d, compl[_idx]); 00081 00082 static void 00083 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d) 00084 { 00085 ptm_topn_t vtmp; 00086 int j; 00087 00088 topn[i].score = d; 00089 if (i == 0) 00090 return; 00091 vtmp = topn[i]; 00092 for (j = i - 1; j >= 0 && d > topn[j].score; j--) { 00093 topn[j + 1] = topn[j]; 00094 } 00095 topn[j + 1] = vtmp; 00096 } 00097 00098 static int 00099 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) 00100 { 00101 ptm_topn_t *topn; 00102 int i, ceplen; 00103 00104 topn = s->f->topn[cb][feat]; 00105 ceplen = s->g->featlen[feat]; 00106 00107 for (i = 0; i < s->max_topn; i++) { 00108 mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ 00109 mfcc_t *var, d; 00110 mfcc_t *obs; 00111 int32 cw, j; 00112 00113 cw = topn[i].cw; 00114 mean = s->g->mean[cb][feat][0] + cw * ceplen; 00115 var = s->g->var[cb][feat][0] + cw * ceplen; 00116 d = s->g->det[cb][feat][cw]; 00117 obs = z; 00118 for (j = 0; j < ceplen % 4; ++j) { 00119 diff[0] = *obs++ - *mean++; 00120 sqdiff[0] = MFCCMUL(diff[0], diff[0]); 00121 compl[0] = MFCCMUL(sqdiff[0], *var); 00122 d = GMMSUB(d, compl[0]); 00123 ++var; 00124 } 00125 /* We could vectorize this but it's unlikely to make much 00126 * difference as the outer loop here isn't very big. */ 00127 for (;j < ceplen; j += 4) { 00128 COMPUTE_GMM_MAP(0); 00129 COMPUTE_GMM_MAP(1); 00130 COMPUTE_GMM_MAP(2); 00131 COMPUTE_GMM_MAP(3); 00132 COMPUTE_GMM_REDUCE(0); 00133 COMPUTE_GMM_REDUCE(1); 00134 COMPUTE_GMM_REDUCE(2); 00135 COMPUTE_GMM_REDUCE(3); 00136 var += 4; 00137 obs += 4; 00138 mean += 4; 00139 } 00140 insertion_sort_topn(topn, i, (int32)d); 00141 } 00142 00143 return topn[0].score; 00144 } 00145 00146 /* This looks bad, but it actually isn't. Less than 1% of eval_cb's 00147 * time is spent doing this. */ 00148 static void 00149 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best, 00150 int cw, int32 intd) 00151 { 00152 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur) 00153 memcpy(*cur + 1, *cur, sizeof(**cur)); 00154 ++*cur; 00155 (*cur)->cw = cw; 00156 (*cur)->score = intd; 00157 } 00158 00159 static int 00160 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) 00161 { 00162 ptm_topn_t *worst, *best, *topn; 00163 mfcc_t *mean; 00164 mfcc_t *var, *det, *detP, *detE; 00165 int32 i, ceplen; 00166 00167 best = topn = s->f->topn[cb][feat]; 00168 worst = topn + (s->max_topn - 1); 00169 mean = s->g->mean[cb][feat][0]; 00170 var = s->g->var[cb][feat][0]; 00171 det = s->g->det[cb][feat]; 00172 detE = det + s->g->n_density; 00173 ceplen = s->g->featlen[feat]; 00174 00175 for (detP = det; detP < detE; ++detP) { 00176 mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ 00177 mfcc_t d, thresh; 00178 mfcc_t *obs; 00179 ptm_topn_t *cur; 00180 int32 cw, j; 00181 00182 d = *detP; 00183 thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */ 00184 obs = z; 00185 cw = detP - det; 00186 00187 /* Unroll the loop starting with the first dimension(s). In 00188 * theory this might be a bit faster if this Gaussian gets 00189 * "knocked out" by C0. In practice not. */ 00190 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) { 00191 diff[0] = *obs++ - *mean++; 00192 sqdiff[0] = MFCCMUL(diff[0], diff[0]); 00193 compl[0] = MFCCMUL(sqdiff[0], *var++); 00194 d = GMMSUB(d, compl[0]); 00195 } 00196 /* Now do 4 dimensions at a time. You'd think that GCC would 00197 * vectorize this? Apparently not. And it's right, because 00198 * that won't make this any faster, at least on x86-64. */ 00199 for (; j < ceplen && d >= thresh; j += 4) { 00200 COMPUTE_GMM_MAP(0); 00201 COMPUTE_GMM_MAP(1); 00202 COMPUTE_GMM_MAP(2); 00203 COMPUTE_GMM_MAP(3); 00204 COMPUTE_GMM_REDUCE(0); 00205 COMPUTE_GMM_REDUCE(1); 00206 COMPUTE_GMM_REDUCE(2); 00207 COMPUTE_GMM_REDUCE(3); 00208 var += 4; 00209 obs += 4; 00210 mean += 4; 00211 } 00212 if (j < ceplen) { 00213 /* terminated early, so not in topn */ 00214 mean += (ceplen - j); 00215 var += (ceplen - j); 00216 continue; 00217 } 00218 if (d < thresh) 00219 continue; 00220 for (i = 0; i < s->max_topn; i++) { 00221 /* already there, so don't need to insert */ 00222 if (topn[i].cw == cw) 00223 break; 00224 } 00225 if (i < s->max_topn) 00226 continue; /* already there. Don't insert */ 00227 insertion_sort_cb(&cur, worst, best, cw, (int32)d); 00228 } 00229 00230 return best->score; 00231 } 00232 00236 static int 00237 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame) 00238 { 00239 int i, j; 00240 00241 /* First evaluate top-N from previous frame. */ 00242 for (i = 0; i < s->g->n_mgau; ++i) 00243 for (j = 0; j < s->g->n_feat; ++j) 00244 eval_topn(s, i, j, z[j]); 00245 00246 /* If frame downsampling is in effect, possibly do nothing else. */ 00247 if (frame % s->ds_ratio) 00248 return 0; 00249 00250 /* Evaluate remaining codebooks. */ 00251 for (i = 0; i < s->g->n_mgau; ++i) { 00252 if (bitvec_is_clear(s->f->mgau_active, i)) 00253 continue; 00254 for (j = 0; j < s->g->n_feat; ++j) { 00255 eval_cb(s, i, j, z[j]); 00256 } 00257 } 00258 00259 /* Normalize densities to produce "posterior probabilities", 00260 * i.e. things with a reasonable dynamic range, then scale and 00261 * clamp them to the acceptable range. This is actually done 00262 * solely to ensure that we can use fast_logmath_add(). Note that 00263 * unless we share the same normalizer across all codebooks for 00264 * each feature stream we get defective scores (that's why these 00265 * loops are inside out - doing it per-feature should give us 00266 * greater precision). */ 00267 for (j = 0; j < s->g->n_feat; ++j) { 00268 int32 norm = 0x7fffffff; 00269 for (i = 0; i < s->g->n_mgau; ++i) { 00270 if (bitvec_is_clear(s->f->mgau_active, i)) 00271 continue; 00272 if (norm > s->f->topn[i][j][0].score >> SENSCR_SHIFT) 00273 norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT; 00274 } 00275 assert(norm != 0x7fffffff); 00276 for (i = 0; i < s->g->n_mgau; ++i) { 00277 int32 k; 00278 if (bitvec_is_clear(s->f->mgau_active, i)) 00279 continue; 00280 for (k = 0; k < s->max_topn; ++k) { 00281 s->f->topn[i][j][k].score >>= SENSCR_SHIFT; 00282 s->f->topn[i][j][k].score -= norm; 00283 s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score; 00284 if (s->f->topn[i][j][k].score > MAX_NEG_ASCR) 00285 s->f->topn[i][j][k].score = MAX_NEG_ASCR; 00286 } 00287 } 00288 } 00289 00290 return 0; 00291 } 00292 00293 static int 00294 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active, 00295 int32 n_senone_active, int compallsen) 00296 { 00297 int i, lastsen; 00298 00299 if (compallsen) { 00300 bitvec_set_all(s->f->mgau_active, s->g->n_mgau); 00301 return 0; 00302 } 00303 bitvec_clear_all(s->f->mgau_active, s->g->n_mgau); 00304 for (lastsen = i = 0; i < n_senone_active; ++i) { 00305 int sen = senone_active[i] + lastsen; 00306 int cb = s->sen2cb[sen]; 00307 bitvec_set(s->f->mgau_active, cb); 00308 lastsen = sen; 00309 } 00310 E_DEBUG(1, ("Active codebooks:")); 00311 for (i = 0; i < s->g->n_mgau; ++i) { 00312 if (bitvec_is_clear(s->f->mgau_active, i)) 00313 continue; 00314 E_DEBUGCONT(1, (" %d", i)); 00315 } 00316 E_DEBUGCONT(1, ("\n")); 00317 return 0; 00318 } 00319 00323 static int 00324 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores, 00325 uint8 *senone_active, int32 n_senone_active, 00326 int compall) 00327 { 00328 int i, lastsen, bestscore; 00329 00330 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores)); 00331 /* FIXME: This is the non-cache-efficient way to do this. We want 00332 * to evaluate one codeword at a time but this requires us to have 00333 * a reverse codebook to senone mapping, which we don't have 00334 * (yet), since different codebooks have different top-N 00335 * codewords. */ 00336 if (compall) 00337 n_senone_active = s->n_sen; 00338 bestscore = 0x7fffffff; 00339 for (lastsen = i = 0; i < n_senone_active; ++i) { 00340 int sen, f, cb; 00341 int ascore; 00342 00343 if (compall) 00344 sen = i; 00345 else 00346 sen = senone_active[i] + lastsen; 00347 lastsen = sen; 00348 cb = s->sen2cb[sen]; 00349 00350 if (bitvec_is_clear(s->f->mgau_active, cb)) { 00351 int j; 00352 /* Because senone_active is deltas we can't really "knock 00353 * out" senones from pruned codebooks, and in any case, 00354 * it wouldn't make any difference to the search code, 00355 * which doesn't expect senone_active to change. */ 00356 for (f = 0; f < s->g->n_feat; ++f) { 00357 for (j = 0; j < s->max_topn; ++j) { 00358 s->f->topn[cb][f][j].score = MAX_NEG_ASCR; 00359 } 00360 } 00361 } 00362 /* For each feature, log-sum codeword scores + mixw to get 00363 * feature density, then sum (multiply) to get ascore */ 00364 ascore = 0; 00365 for (f = 0; f < s->g->n_feat; ++f) { 00366 ptm_topn_t *topn; 00367 int j, fden = 0; 00368 topn = s->f->topn[cb][f]; 00369 for (j = 0; j < s->max_topn; ++j) { 00370 int mixw; 00371 /* Find mixture weight for this codeword. */ 00372 if (s->mixw_cb) { 00373 int dcw = s->mixw[f][topn[j].cw][sen/2]; 00374 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f; 00375 mixw = s->mixw_cb[dcw]; 00376 } 00377 else { 00378 mixw = s->mixw[f][topn[j].cw][sen]; 00379 } 00380 if (j == 0) 00381 fden = mixw + topn[j].score; 00382 else 00383 fden = fast_logmath_add(s->lmath_8b, fden, 00384 mixw + topn[j].score); 00385 E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n", 00386 sen, f, mixw, topn[j].score, fden)); 00387 } 00388 ascore += fden; 00389 } 00390 if (ascore < bestscore) bestscore = ascore; 00391 senone_scores[sen] = ascore; 00392 } 00393 /* Normalize the scores again (finishing the job we started above 00394 * in ptm_mgau_codebook_eval...) */ 00395 for (i = 0; i < s->n_sen; ++i) { 00396 senone_scores[i] -= bestscore; 00397 } 00398 00399 return 0; 00400 } 00401 00405 int32 00406 ptm_mgau_frame_eval(ps_mgau_t *ps, 00407 int16 *senone_scores, 00408 uint8 *senone_active, 00409 int32 n_senone_active, 00410 mfcc_t ** featbuf, int32 frame, 00411 int32 compallsen) 00412 { 00413 ptm_mgau_t *s = (ptm_mgau_t *)ps; 00414 int fast_eval_idx; 00415 00416 /* Find the appropriate frame in the rotating history buffer 00417 * corresponding to the requested input frame. No bounds checking 00418 * is done here, which just means you'll get semi-random crap if 00419 * you request a frame in the future or one that's too far in the 00420 * past. Since the history buffer is just used for fast match 00421 * that might not be fatal. */ 00422 fast_eval_idx = frame % s->n_fast_hist; 00423 s->f = s->hist + fast_eval_idx; 00424 /* Compute the top-N codewords for every codebook, unless this 00425 * is a past frame, in which case we already have them (we 00426 * hope!) */ 00427 if (frame >= ps_mgau_base(ps)->frame_idx) { 00428 ptm_fast_eval_t *lastf; 00429 /* Get the previous frame's top-N information (on the 00430 * first frame of the input this is just all WORST_DIST, 00431 * no harm in that) */ 00432 if (fast_eval_idx == 0) 00433 lastf = s->hist + s->n_fast_hist - 1; 00434 else 00435 lastf = s->hist + fast_eval_idx - 1; 00436 /* Copy in initial top-N info */ 00437 memcpy(s->f->topn[0][0], lastf->topn[0][0], 00438 s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t)); 00439 /* Generate initial active codebook list (this might not be 00440 * necessary) */ 00441 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen); 00442 /* Now evaluate top-N, prune, and evaluate remaining codebooks. */ 00443 ptm_mgau_codebook_eval(s, featbuf, frame); 00444 } 00445 /* Evaluate intersection of active senones and active codebooks. */ 00446 ptm_mgau_senone_eval(s, senone_scores, senone_active, 00447 n_senone_active, compallsen); 00448 00449 return 0; 00450 } 00451 00452 static int32 00453 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file) 00454 { 00455 FILE *fp; 00456 char line[1000]; 00457 int32 i, n, r, c; 00458 int32 do_swap, do_mmap; 00459 size_t filesize, offset; 00460 int n_clust = 0; 00461 int n_feat = s->g->n_feat; 00462 int n_density = s->g->n_density; 00463 int n_sen = bin_mdef_n_sen(mdef); 00464 int n_bits = 8; 00465 00466 s->n_sen = n_sen; /* FIXME: Should have been done earlier */ 00467 do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); 00468 00469 if ((fp = fopen(file, "rb")) == NULL) 00470 return -1; 00471 00472 E_INFO("Loading senones from dump file %s\n", file); 00473 /* Read title size, title */ 00474 if (fread(&n, sizeof(int32), 1, fp) != 1) { 00475 E_ERROR_SYSTEM("Failed to read title size from %s", file); 00476 goto error_out; 00477 } 00478 /* This is extremely bogus */ 00479 do_swap = 0; 00480 if (n < 1 || n > 999) { 00481 SWAP_INT32(&n); 00482 if (n < 1 || n > 999) { 00483 E_ERROR("Title length %x in dump file %s out of range\n", n, file); 00484 goto error_out; 00485 } 00486 do_swap = 1; 00487 } 00488 if (fread(line, sizeof(char), n, fp) != n) { 00489 E_ERROR_SYSTEM("Cannot read title"); 00490 goto error_out; 00491 } 00492 if (line[n - 1] != '\0') { 00493 E_ERROR("Bad title in dump file\n"); 00494 goto error_out; 00495 } 00496 E_INFO("%s\n", line); 00497 00498 /* Read header size, header */ 00499 if (fread(&n, sizeof(n), 1, fp) != 1) { 00500 E_ERROR_SYSTEM("Failed to read header size from %s", file); 00501 goto error_out; 00502 } 00503 if (do_swap) SWAP_INT32(&n); 00504 if (fread(line, sizeof(char), n, fp) != n) { 00505 E_ERROR_SYSTEM("Cannot read header"); 00506 goto error_out; 00507 } 00508 if (line[n - 1] != '\0') { 00509 E_ERROR("Bad header in dump file\n"); 00510 goto error_out; 00511 } 00512 00513 /* Read other header strings until string length = 0 */ 00514 for (;;) { 00515 if (fread(&n, sizeof(n), 1, fp) != 1) { 00516 E_ERROR_SYSTEM("Failed to read header string size from %s", file); 00517 goto error_out; 00518 } 00519 if (do_swap) SWAP_INT32(&n); 00520 if (n == 0) 00521 break; 00522 if (fread(line, sizeof(char), n, fp) != n) { 00523 E_ERROR_SYSTEM("Cannot read header"); 00524 goto error_out; 00525 } 00526 /* Look for a cluster count, if present */ 00527 if (!strncmp(line, "feature_count ", strlen("feature_count "))) { 00528 n_feat = atoi(line + strlen("feature_count ")); 00529 } 00530 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) { 00531 n_density = atoi(line + strlen("mixture_count ")); 00532 } 00533 if (!strncmp(line, "model_count ", strlen("model_count "))) { 00534 n_sen = atoi(line + strlen("model_count ")); 00535 } 00536 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { 00537 n_clust = atoi(line + strlen("cluster_count ")); 00538 } 00539 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) { 00540 n_bits = atoi(line + strlen("cluster_bits ")); 00541 } 00542 } 00543 00544 /* Defaults for #rows, #columns in mixw array. */ 00545 c = n_sen; 00546 r = n_density; 00547 if (n_clust == 0) { 00548 /* Older mixw files have them here, and they might be padded. */ 00549 if (fread(&r, sizeof(r), 1, fp) != 1) { 00550 E_ERROR_SYSTEM("Cannot read #rows"); 00551 goto error_out; 00552 } 00553 if (do_swap) SWAP_INT32(&r); 00554 if (fread(&c, sizeof(c), 1, fp) != 1) { 00555 E_ERROR_SYSTEM("Cannot read #columns"); 00556 goto error_out; 00557 } 00558 if (do_swap) SWAP_INT32(&c); 00559 E_INFO("Rows: %d, Columns: %d\n", r, c); 00560 } 00561 00562 if (n_feat != s->g->n_feat) { 00563 E_ERROR("Number of feature streams mismatch: %d != %d\n", 00564 n_feat, s->g->n_feat); 00565 goto error_out; 00566 } 00567 if (n_density != s->g->n_density) { 00568 E_ERROR("Number of densities mismatch: %d != %d\n", 00569 n_density, s->g->n_density); 00570 goto error_out; 00571 } 00572 if (n_sen != s->n_sen) { 00573 E_ERROR("Number of senones mismatch: %d != %d\n", 00574 n_sen, s->n_sen); 00575 goto error_out; 00576 } 00577 00578 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) { 00579 E_ERROR("Cluster count must be 0, 15, or 16\n"); 00580 goto error_out; 00581 } 00582 if (n_clust == 15) 00583 ++n_clust; 00584 00585 if (!((n_bits == 8) || (n_bits == 4))) { 00586 E_ERROR("Cluster count must be 4 or 8\n"); 00587 goto error_out; 00588 } 00589 00590 if (do_mmap) { 00591 E_INFO("Using memory-mapped I/O for senones\n"); 00592 } 00593 offset = ftell(fp); 00594 fseek(fp, 0, SEEK_END); 00595 filesize = ftell(fp); 00596 fseek(fp, offset, SEEK_SET); 00597 00598 /* Allocate memory for pdfs (or memory map them) */ 00599 if (do_mmap) { 00600 s->sendump_mmap = mmio_file_read(file); 00601 /* Get cluster codebook if any. */ 00602 if (n_clust) { 00603 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 00604 offset += n_clust; 00605 } 00606 } 00607 else { 00608 /* Get cluster codebook if any. */ 00609 if (n_clust) { 00610 s->mixw_cb = ckd_calloc(1, n_clust); 00611 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) { 00612 E_ERROR("Failed to read %d bytes from sendump\n", n_clust); 00613 goto error_out; 00614 } 00615 } 00616 } 00617 00618 /* Set up pointers, or read, or whatever */ 00619 if (s->sendump_mmap) { 00620 s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw)); 00621 for (n = 0; n < n_feat; n++) { 00622 int step = c; 00623 if (n_bits == 4) 00624 step = (step + 1) / 2; 00625 for (i = 0; i < r; i++) { 00626 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 00627 offset += step; 00628 } 00629 } 00630 } 00631 else { 00632 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw)); 00633 /* Read pdf values and ids */ 00634 for (n = 0; n < n_feat; n++) { 00635 int step = c; 00636 if (n_bits == 4) 00637 step = (step + 1) / 2; 00638 for (i = 0; i < r; i++) { 00639 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp) 00640 != (size_t) step) { 00641 E_ERROR("Failed to read %d bytes from sendump\n", step); 00642 goto error_out; 00643 } 00644 } 00645 } 00646 } 00647 00648 fclose(fp); 00649 return 0; 00650 error_out: 00651 fclose(fp); 00652 return -1; 00653 } 00654 00655 static int32 00656 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin) 00657 { 00658 char **argname, **argval; 00659 char eofchk; 00660 FILE *fp; 00661 int32 byteswap, chksum_present; 00662 uint32 chksum; 00663 float32 *pdf; 00664 int32 i, f, c, n; 00665 int32 n_sen; 00666 int32 n_feat; 00667 int32 n_comp; 00668 int32 n_err; 00669 00670 E_INFO("Reading mixture weights file '%s'\n", file_name); 00671 00672 if ((fp = fopen(file_name, "rb")) == NULL) 00673 E_FATAL("Failed to open mixture file '%s' for reading: %s\n", file_name, strerror(errno)); 00674 00675 /* Read header, including argument-value info and 32-bit byteorder magic */ 00676 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 00677 E_FATAL("Failed to read header from '%s'\n", file_name); 00678 00679 /* Parse argument-value list */ 00680 chksum_present = 0; 00681 for (i = 0; argname[i]; i++) { 00682 if (strcmp(argname[i], "version") == 0) { 00683 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) 00684 E_WARN("Version mismatch(%s): %s, expecting %s\n", 00685 file_name, argval[i], MGAU_MIXW_VERSION); 00686 } 00687 else if (strcmp(argname[i], "chksum0") == 0) { 00688 chksum_present = 1; /* Ignore the associated value */ 00689 } 00690 } 00691 bio_hdrarg_free(argname, argval); 00692 argname = argval = NULL; 00693 00694 chksum = 0; 00695 00696 /* Read #senones, #features, #codewords, arraysize */ 00697 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) 00698 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 00699 1) 00700 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 00701 1) 00702 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 00703 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); 00704 } 00705 if (n_feat != s->g->n_feat) 00706 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat); 00707 if (n != n_sen * n_feat * n_comp) { 00708 E_FATAL 00709 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", 00710 file_name, i, n_sen, n_feat, n_comp); 00711 } 00712 00713 /* n_sen = number of mixture weights per codeword, which is 00714 * fixed at the number of senones since we have only one codebook. 00715 */ 00716 s->n_sen = n_sen; 00717 00718 /* Quantized mixture weight arrays. */ 00719 s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density, 00720 n_sen, sizeof(***s->mixw)); 00721 00722 /* Temporary structure to read in floats before conversion to (int32) logs3 */ 00723 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); 00724 00725 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ 00726 n_err = 0; 00727 for (i = 0; i < n_sen; i++) { 00728 for (f = 0; f < n_feat; f++) { 00729 if (bio_fread((void *) pdf, sizeof(float32), 00730 n_comp, fp, byteswap, &chksum) != n_comp) { 00731 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); 00732 } 00733 00734 /* Normalize and floor */ 00735 if (vector_sum_norm(pdf, n_comp) <= 0.0) 00736 n_err++; 00737 vector_floor(pdf, n_comp, SmoothMin); 00738 vector_sum_norm(pdf, n_comp); 00739 00740 /* Convert to LOG, quantize, and transpose */ 00741 for (c = 0; c < n_comp; c++) { 00742 int32 qscr; 00743 00744 qscr = -logmath_log(s->lmath_8b, pdf[c]); 00745 if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) 00746 qscr = MAX_NEG_MIXW; 00747 s->mixw[f][c][i] = qscr; 00748 } 00749 } 00750 } 00751 if (n_err > 0) 00752 E_WARN("Weight normalization failed for %d senones\n", n_err); 00753 00754 ckd_free(pdf); 00755 00756 if (chksum_present) 00757 bio_verify_chksum(fp, byteswap, chksum); 00758 00759 if (fread(&eofchk, 1, 1, fp) == 1) 00760 E_FATAL("More data than expected in %s\n", file_name); 00761 00762 fclose(fp); 00763 00764 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); 00765 return n_sen; 00766 } 00767 00768 ps_mgau_t * 00769 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef) 00770 { 00771 ptm_mgau_t *s; 00772 ps_mgau_t *ps; 00773 char const *sendump_path; 00774 int i; 00775 00776 s = ckd_calloc(1, sizeof(*s)); 00777 s->config = acmod->config; 00778 00779 s->lmath = logmath_retain(acmod->lmath); 00780 /* Log-add table. */ 00781 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE); 00782 if (s->lmath_8b == NULL) 00783 goto error_out; 00784 /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ 00785 if (logmath_get_width(s->lmath_8b) != 1) { 00786 E_ERROR("Log base %f is too small to represent add table in 8 bits\n", 00787 logmath_get_base(s->lmath_8b)); 00788 goto error_out; 00789 } 00790 00791 /* Read means and variances. */ 00792 if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"), 00793 cmd_ln_str_r(s->config, "-var"), 00794 cmd_ln_float32_r(s->config, "-varfloor"), 00795 s->lmath)) == NULL) 00796 goto error_out; 00797 /* We only support 256 codebooks or less (like 640k or 2GB, this 00798 * should be enough for anyone) */ 00799 if (s->g->n_mgau > 256) { 00800 E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau); 00801 goto error_out; 00802 } 00803 if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) { 00804 E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef)); 00805 goto error_out; 00806 } 00807 /* Verify n_feat and veclen, against acmod. */ 00808 if (s->g->n_feat != feat_dimension1(acmod->fcb)) { 00809 E_ERROR("Number of streams does not match: %d != %d\n", 00810 s->g->n_feat, feat_dimension(acmod->fcb)); 00811 goto error_out; 00812 } 00813 for (i = 0; i < s->g->n_feat; ++i) { 00814 if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) { 00815 E_ERROR("Dimension of stream %d does not match: %d != %d\n", 00816 s->g->featlen[i], feat_dimension2(acmod->fcb, i)); 00817 goto error_out; 00818 } 00819 } 00820 /* Read mixture weights. */ 00821 if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) { 00822 if (read_sendump(s, acmod->mdef, sendump_path) < 0) { 00823 goto error_out; 00824 } 00825 } 00826 else { 00827 if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), 00828 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) { 00829 goto error_out; 00830 } 00831 } 00832 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); 00833 s->max_topn = cmd_ln_int32_r(s->config, "-topn"); 00834 E_INFO("Maximum top-N: %d\n", s->max_topn); 00835 00836 /* Assume mapping of senones to their base phones, though this 00837 * will become more flexible in the future. */ 00838 s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb)); 00839 for (i = 0; i < s->n_sen; ++i) 00840 s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i); 00841 00842 /* Allocate fast-match history buffers. We need enough for the 00843 * phoneme lookahead window, plus the current frame, plus one for 00844 * good measure? (FIXME: I don't remember why) */ 00845 s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; 00846 s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist)); 00847 /* s->f will be a rotating pointer into s->hist. */ 00848 s->f = s->hist; 00849 for (i = 0; i < s->n_fast_hist; ++i) { 00850 int j, k, m; 00851 /* Top-N codewords for every codebook and feature. */ 00852 s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat, 00853 s->max_topn, sizeof(ptm_topn_t)); 00854 /* Initialize them to sane (yet arbitrary) defaults. */ 00855 for (j = 0; j < s->g->n_mgau; ++j) { 00856 for (k = 0; k < s->g->n_feat; ++k) { 00857 for (m = 0; m < s->max_topn; ++m) { 00858 s->hist[i].topn[j][k][m].cw = m; 00859 s->hist[i].topn[j][k][m].score = WORST_DIST; 00860 } 00861 } 00862 } 00863 /* Active codebook mapping (just codebook, not features, 00864 at least not yet) */ 00865 s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau); 00866 /* Start with them all on, prune them later. */ 00867 bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau); 00868 } 00869 00870 ps = (ps_mgau_t *)s; 00871 ps->vt = &ptm_mgau_funcs; 00872 return ps; 00873 error_out: 00874 ptm_mgau_free(ps_mgau_base(s)); 00875 return NULL; 00876 } 00877 00878 int 00879 ptm_mgau_mllr_transform(ps_mgau_t *ps, 00880 ps_mllr_t *mllr) 00881 { 00882 ptm_mgau_t *s = (ptm_mgau_t *)ps; 00883 return gauden_mllr_transform(s->g, mllr, s->config); 00884 } 00885 00886 void 00887 ptm_mgau_free(ps_mgau_t *ps) 00888 { 00889 ptm_mgau_t *s = (ptm_mgau_t *)ps; 00890 00891 logmath_free(s->lmath); 00892 logmath_free(s->lmath_8b); 00893 if (s->sendump_mmap) { 00894 ckd_free_2d(s->mixw); 00895 mmio_file_unmap(s->sendump_mmap); 00896 } 00897 else { 00898 ckd_free_3d(s->mixw); 00899 } 00900 ckd_free(s->sen2cb); 00901 gauden_free(s->g); 00902 ckd_free(s); 00903 }