PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* System headers. */ 00039 #include <string.h> 00040 #include <stdio.h> 00041 #include <assert.h> 00042 00043 /* SphinxBase headers. */ 00044 #include <sphinxbase/bio.h> 00045 00046 /* Local headers. */ 00047 #include "ms_senone.h" 00048 00049 00050 #define MIXW_PARAM_VERSION "1.0" 00051 #define SPDEF_PARAM_VERSION "1.2" 00052 00053 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L) 00054 #define LOGMATH_INLINE inline 00055 #elif defined(__GNUC__) 00056 #define LOGMATH_INLINE static inline 00057 #elif defined(_MSC_VER) 00058 #define LOGMATH_INLINE __inline 00059 #else 00060 #define LOGMATH_INLINE static 00061 #endif 00062 00063 static int32 00064 senone_mgau_map_read(senone_t * s, char const *file_name) 00065 { 00066 FILE *fp; 00067 int32 byteswap, chksum_present, n_gauden_present; 00068 uint32 chksum; 00069 int32 i; 00070 char eofchk; 00071 char **argname, **argval; 00072 void *ptr; 00073 float32 v; 00074 00075 E_INFO("Reading senone gauden-codebook map file: %s\n", file_name); 00076 00077 if ((fp = fopen(file_name, "rb")) == NULL) 00078 E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name); 00079 00080 /* Read header, including argument-value info and 32-bit byteorder magic */ 00081 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 00082 E_FATAL("Failed to read header from file '%s'\n", file_name); 00083 00084 /* Parse argument-value list */ 00085 chksum_present = 0; 00086 n_gauden_present = 0; 00087 for (i = 0; argname[i]; i++) { 00088 if (strcmp(argname[i], "version") == 0) { 00089 if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) { 00090 E_WARN("Version mismatch(%s): %s, expecting %s\n", 00091 file_name, argval[i], SPDEF_PARAM_VERSION); 00092 } 00093 00094 /* HACK!! Convert version# to float32 and take appropriate action */ 00095 if (sscanf(argval[i], "%f", &v) != 1) 00096 E_FATAL("%s: Bad version no. string: %s\n", file_name, 00097 argval[i]); 00098 00099 n_gauden_present = (v > 1.1) ? 1 : 0; 00100 } 00101 else if (strcmp(argname[i], "chksum0") == 0) { 00102 chksum_present = 1; /* Ignore the associated value */ 00103 } 00104 } 00105 bio_hdrarg_free(argname, argval); 00106 argname = argval = NULL; 00107 00108 chksum = 0; 00109 00110 /* Read #gauden (if version matches) */ 00111 if (n_gauden_present) { 00112 E_INFO("Reading number of codebooks from %s\n", file_name); 00113 if (bio_fread 00114 (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1) 00115 E_FATAL("fread(%s) (#gauden) failed\n", file_name); 00116 } 00117 00118 /* Read 1d array data */ 00119 if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp, 00120 byteswap, &chksum) < 0) { 00121 E_FATAL("bio_fread_1d(%s) failed\n", file_name); 00122 } 00123 s->mgau = ptr; 00124 E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden); 00125 00126 /* Infer n_gauden if not present in this version */ 00127 if (!n_gauden_present) { 00128 s->n_gauden = 1; 00129 for (i = 0; i < s->n_sen; i++) 00130 if (s->mgau[i] >= s->n_gauden) 00131 s->n_gauden = s->mgau[i] + 1; 00132 } 00133 00134 if (chksum_present) 00135 bio_verify_chksum(fp, byteswap, chksum); 00136 00137 if (fread(&eofchk, 1, 1, fp) == 1) 00138 E_FATAL("More data than expected in %s: %d\n", file_name, eofchk); 00139 00140 fclose(fp); 00141 00142 E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen, 00143 s->n_gauden); 00144 00145 return 1; 00146 } 00147 00148 00149 static int32 00150 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath) 00151 { 00152 char eofchk; 00153 FILE *fp; 00154 int32 byteswap, chksum_present; 00155 uint32 chksum; 00156 float32 *pdf; 00157 int32 i, f, c, p, n_err; 00158 char **argname, **argval; 00159 00160 E_INFO("Reading senone mixture weights: %s\n", file_name); 00161 00162 if ((fp = fopen(file_name, "rb")) == NULL) 00163 E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); 00164 00165 /* Read header, including argument-value info and 32-bit byteorder magic */ 00166 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 00167 E_FATAL("Failed to read header from file '%s'\n", file_name); 00168 00169 /* Parse argument-value list */ 00170 chksum_present = 0; 00171 for (i = 0; argname[i]; i++) { 00172 if (strcmp(argname[i], "version") == 0) { 00173 if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) 00174 E_WARN("Version mismatch(%s): %s, expecting %s\n", 00175 file_name, argval[i], MIXW_PARAM_VERSION); 00176 } 00177 else if (strcmp(argname[i], "chksum0") == 0) { 00178 chksum_present = 1; /* Ignore the associated value */ 00179 } 00180 } 00181 bio_hdrarg_free(argname, argval); 00182 argname = argval = NULL; 00183 00184 chksum = 0; 00185 00186 /* Read #senones, #features, #codewords, arraysize */ 00187 if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 00188 1) 00189 || 00190 (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) 00191 != 1) 00192 || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) 00193 != 1) 00194 || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 00195 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); 00196 } 00197 if (i != s->n_sen * s->n_feat * s->n_cw) { 00198 E_FATAL 00199 ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", 00200 file_name, i, s->n_sen, s->n_feat, s->n_cw); 00201 } 00202 00203 /* 00204 * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. 00205 * All PDF values will be truncated (in the LSB positions) by these many bits. 00206 */ 00207 if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0)) 00208 E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); 00209 00210 /* Use a fixed shift for compatibility with everything else. */ 00211 E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT); 00212 00213 /* 00214 * Allocate memory for senone PDF data. Organize normally or transposed depending on 00215 * s->n_gauden. 00216 */ 00217 if (s->n_gauden > 1) { 00218 E_INFO("Not transposing mixture weights in memory\n"); 00219 s->pdf = 00220 (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, 00221 sizeof(senprob_t)); 00222 } 00223 else { 00224 E_INFO("Transposing mixture weights in memory\n"); 00225 s->pdf = 00226 (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, 00227 sizeof(senprob_t)); 00228 } 00229 00230 /* Temporary structure to read in floats */ 00231 pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); 00232 00233 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ 00234 n_err = 0; 00235 for (i = 0; i < s->n_sen; i++) { 00236 for (f = 0; f < s->n_feat; f++) { 00237 if (bio_fread 00238 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, 00239 &chksum) 00240 != s->n_cw) { 00241 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); 00242 } 00243 00244 /* Normalize and floor */ 00245 if (vector_sum_norm(pdf, s->n_cw) <= 0.0) 00246 n_err++; 00247 vector_floor(pdf, s->n_cw, s->mixwfloor); 00248 vector_sum_norm(pdf, s->n_cw); 00249 00250 /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ 00251 for (c = 0; c < s->n_cw; c++) { 00252 p = -(logmath_log(lmath, pdf[c])); 00253 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */ 00254 00255 if (s->n_gauden > 1) 00256 s->pdf[i][f][c] = 00257 (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; 00258 else 00259 s->pdf[f][c][i] = 00260 (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; 00261 } 00262 } 00263 } 00264 if (n_err > 0) 00265 E_WARN("Weight normalization failed for %d senones\n", n_err); 00266 00267 ckd_free(pdf); 00268 00269 if (chksum_present) 00270 bio_verify_chksum(fp, byteswap, chksum); 00271 00272 if (fread(&eofchk, 1, 1, fp) == 1) 00273 E_FATAL("More data than expected in %s\n", file_name); 00274 00275 fclose(fp); 00276 00277 E_INFO 00278 ("Read mixture weights for %d senones: %d features x %d codewords\n", 00279 s->n_sen, s->n_feat, s->n_cw); 00280 00281 return 1; 00282 } 00283 00284 00285 senone_t * 00286 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file, 00287 float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef) 00288 { 00289 senone_t *s; 00290 int32 n = 0, i; 00291 00292 s = (senone_t *) ckd_calloc(1, sizeof(senone_t)); 00293 s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE); 00294 s->mixwfloor = mixwfloor; 00295 00296 s->n_gauden = g->n_mgau; 00297 if (sen2mgau_map_file) { 00298 if (!(strcmp(sen2mgau_map_file, ".semi.") == 0 00299 || strcmp(sen2mgau_map_file, ".ptm.") == 0 00300 || strcmp(sen2mgau_map_file, ".cont.") == 0)) { 00301 senone_mgau_map_read(s, sen2mgau_map_file); 00302 n = s->n_sen; 00303 } 00304 } 00305 else { 00306 if (s->n_gauden == 1) 00307 sen2mgau_map_file = ".semi."; 00308 else if (s->n_gauden == bin_mdef_n_ciphone(mdef)) 00309 sen2mgau_map_file = ".ptm."; 00310 else 00311 sen2mgau_map_file = ".cont."; 00312 } 00313 00314 senone_mixw_read(s, mixwfile, lmath); 00315 00316 if (strcmp(sen2mgau_map_file, ".semi.") == 0) { 00317 /* All-to-1 senones-codebook mapping */ 00318 E_INFO("Mapping all senones to one codebook\n"); 00319 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); 00320 } 00321 else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) { 00322 /* All-to-ciphone-id senones-codebook mapping */ 00323 E_INFO("Mapping senones to context-independent phone codebooks\n"); 00324 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); 00325 for (i = 0; i < s->n_sen; i++) 00326 s->mgau[i] = bin_mdef_sen2cimap(mdef, i); 00327 } 00328 else if (strcmp(sen2mgau_map_file, ".cont.") == 0 00329 || strcmp(sen2mgau_map_file, ".s3cont.") == 0) { 00330 /* 1-to-1 senone-codebook mapping */ 00331 E_INFO("Mapping senones to individual codebooks\n"); 00332 if (s->n_sen <= 1) 00333 E_FATAL("#senone=%d; must be >1\n", s->n_sen); 00334 00335 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); 00336 for (i = 0; i < s->n_sen; i++) 00337 s->mgau[i] = i; 00338 /* Not sure why this is here, it probably does nothing. */ 00339 s->n_gauden = s->n_sen; 00340 } 00341 else { 00342 if (s->n_sen != n) 00343 E_FATAL("#senones inconsistent: %d in %s; %d in %s\n", 00344 n, sen2mgau_map_file, s->n_sen, mixwfile); 00345 } 00346 00347 s->featscr = NULL; 00348 return s; 00349 } 00350 00351 void 00352 senone_free(senone_t * s) 00353 { 00354 if (s == NULL) 00355 return; 00356 if (s->pdf) 00357 ckd_free_3d((void *) s->pdf); 00358 if (s->mgau) 00359 ckd_free(s->mgau); 00360 if (s->featscr) 00361 ckd_free(s->featscr); 00362 logmath_free(s->lmath); 00363 ckd_free(s); 00364 } 00365 00366 00367 /* 00368 * Compute senone score for one senone. 00369 * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values. 00370 * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden. 00371 */ 00372 int32 00373 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top) 00374 { 00375 int32 scr; /* total senone score */ 00376 int32 fden; /* Gaussian density */ 00377 int32 fscr; /* senone score for one feature */ 00378 int32 fwscr; /* senone score for one feature, one codeword */ 00379 int32 f, t; 00380 gauden_dist_t *fdist; 00381 00382 assert((id >= 0) && (id < s->n_sen)); 00383 assert((n_top > 0) && (n_top <= s->n_cw)); 00384 00385 scr = 0; 00386 00387 for (f = 0; f < s->n_feat; f++) { 00388 int top; 00389 fdist = dist[f]; 00390 00391 /* Top codeword for feature f */ 00392 top = fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT; 00393 fscr = (s->n_gauden > 1) 00394 ? (fden + -s->pdf[id][f][fdist[0].id]) /* untransposed */ 00395 : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */ 00396 E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n", 00397 id, f, -(fscr - fden), -(fden-top), -(fscr-top))); 00398 /* Remaining of n_top codewords for feature f */ 00399 for (t = 1; t < n_top; t++) { 00400 fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT; 00401 fwscr = (s->n_gauden > 1) ? 00402 (fden + -s->pdf[id][f][fdist[t].id]) : 00403 (fden + -s->pdf[f][fdist[t].id][id]); 00404 fscr = logmath_add(s->lmath, fscr, fwscr); 00405 E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n", 00406 id, f, -(fwscr - fden), -(fden-top), -(fscr-top))); 00407 } 00408 /* Senone scores are also scaled, negated logs3 values. Hence 00409 * we have to negate the stuff we calculated above. */ 00410 scr -= fscr; 00411 } 00412 /* Downscale scores. */ 00413 scr /= s->aw; 00414 00415 /* Avoid overflowing int16 */ 00416 if (scr > 32767) 00417 scr = 32767; 00418 if (scr < -32768) 00419 scr = -32768; 00420 return scr; 00421 }