• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ms_senone.c

00001 /* ====================================================================
00002  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  *
00035  */
00036 
00037 /* System headers. */
00038 #include <string.h>
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <bio.h>
00044 
00045 /* Local headers. */
00046 #include "ms_senone.h"
00047 
00048 
00049 #define MIXW_PARAM_VERSION      "1.0"
00050 #define SPDEF_PARAM_VERSION     "1.2"
00051 
00052 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00053 #define LOGMATH_INLINE inline
00054 #elif defined(__GNUC__)
00055 #define LOGMATH_INLINE static inline
00056 #elif defined(_MSC_VER)
00057 #define LOGMATH_INLINE __inline
00058 #else
00059 #define LOGMATH_INLINE static
00060 #endif
00061 
00062 static int32
00063 senone_mgau_map_read(senone_t * s, char const *file_name)
00064 {
00065     FILE *fp;
00066     int32 byteswap, chksum_present, n_gauden_present;
00067     uint32 chksum;
00068     int32 i;
00069     char eofchk;
00070     char **argname, **argval;
00071     void *ptr;
00072     float32 v;
00073 
00074     E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
00075 
00076     if ((fp = fopen(file_name, "rb")) == NULL)
00077         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
00078 
00079     /* Read header, including argument-value info and 32-bit byteorder magic */
00080     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00081         E_FATAL("bio_readhdr(%s) failed\n", file_name);
00082 
00083     /* Parse argument-value list */
00084     chksum_present = 0;
00085     n_gauden_present = 0;
00086     for (i = 0; argname[i]; i++) {
00087         if (strcmp(argname[i], "version") == 0) {
00088             if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
00089                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00090                        file_name, argval[i], SPDEF_PARAM_VERSION);
00091             }
00092 
00093             /* HACK!! Convert version# to float32 and take appropriate action */
00094             if (sscanf(argval[i], "%f", &v) != 1)
00095                 E_FATAL("%s: Bad version no. string: %s\n", file_name,
00096                         argval[i]);
00097 
00098             n_gauden_present = (v > 1.1) ? 1 : 0;
00099         }
00100         else if (strcmp(argname[i], "chksum0") == 0) {
00101             chksum_present = 1; /* Ignore the associated value */
00102         }
00103     }
00104     bio_hdrarg_free(argname, argval);
00105     argname = argval = NULL;
00106 
00107     chksum = 0;
00108 
00109     /* Read #gauden (if version matches) */
00110     if (n_gauden_present) {
00111         if (bio_fread
00112             (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
00113             E_FATAL("fread(%s) (#gauden) failed\n", file_name);
00114     }
00115 
00116     /* Read 1d array data */
00117     if (bio_fread_1d(&ptr, sizeof(int16), &(s->n_sen), fp,
00118                      byteswap, &chksum) < 0) {
00119         E_FATAL("bio_fread_1d(%s) failed\n", file_name);
00120     }
00121     s->mgau = ptr;
00122 
00123     /* Infer n_gauden if not present in this version */
00124     if (!n_gauden_present) {
00125         s->n_gauden = 1;
00126         for (i = 0; i < s->n_sen; i++)
00127             if (s->mgau[i] >= s->n_gauden)
00128                 s->n_gauden = s->mgau[i] + 1;
00129     }
00130 
00131     if (chksum_present)
00132         bio_verify_chksum(fp, byteswap, chksum);
00133 
00134     if (fread(&eofchk, 1, 1, fp) == 1)
00135         E_FATAL("More data than expected in %s\n", file_name);
00136 
00137     fclose(fp);
00138 
00139     E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
00140            s->n_gauden);
00141 
00142     return 1;
00143 }
00144 
00145 
00146 static int32
00147 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
00148 {
00149     char eofchk;
00150     FILE *fp;
00151     int32 byteswap, chksum_present;
00152     uint32 chksum;
00153     float32 *pdf;
00154     int32 i, f, c, p, n_err;
00155     char **argname, **argval;
00156 
00157     E_INFO("Reading senone mixture weights: %s\n", file_name);
00158 
00159     if ((fp = fopen(file_name, "rb")) == NULL)
00160         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
00161 
00162     /* Read header, including argument-value info and 32-bit byteorder magic */
00163     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00164         E_FATAL("bio_readhdr(%s) failed\n", file_name);
00165 
00166     /* Parse argument-value list */
00167     chksum_present = 0;
00168     for (i = 0; argname[i]; i++) {
00169         if (strcmp(argname[i], "version") == 0) {
00170             if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
00171                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00172                        file_name, argval[i], MIXW_PARAM_VERSION);
00173         }
00174         else if (strcmp(argname[i], "chksum0") == 0) {
00175             chksum_present = 1; /* Ignore the associated value */
00176         }
00177     }
00178     bio_hdrarg_free(argname, argval);
00179     argname = argval = NULL;
00180 
00181     chksum = 0;
00182 
00183     /* Read #senones, #features, #codewords, arraysize */
00184     if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
00185          1)
00186         ||
00187         (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
00188          != 1)
00189         || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
00190             != 1)
00191         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00192         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
00193     }
00194     if (i != s->n_sen * s->n_feat * s->n_cw) {
00195         E_FATAL
00196             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
00197              file_name, i, s->n_sen, s->n_feat, s->n_cw);
00198     }
00199 
00200     /*
00201      * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
00202      * All PDF values will be truncated (in the LSB positions) by these many bits.
00203      */
00204     if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
00205         E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
00206 
00207     /* Use a fixed shift for compatibility with everything else. */
00208     E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
00209 
00210     /*
00211      * Allocate memory for senone PDF data.  Organize normally or transposed depending on
00212      * s->n_gauden.
00213      */
00214     if (s->n_gauden > 1) {
00215         s->pdf =
00216             (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
00217                                           sizeof(senprob_t));
00218     }
00219     else {
00220         s->pdf =
00221             (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
00222                                           sizeof(senprob_t));
00223     }
00224 
00225     /* Temporary structure to read in floats */
00226     pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
00227 
00228     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
00229     n_err = 0;
00230     for (i = 0; i < s->n_sen; i++) {
00231         for (f = 0; f < s->n_feat; f++) {
00232             if (bio_fread
00233                 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
00234                  &chksum)
00235                 != s->n_cw) {
00236                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
00237             }
00238 
00239             /* Normalize and floor */
00240             if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
00241                 n_err++;
00242             vector_floor(pdf, s->n_cw, s->mixwfloor);
00243             vector_sum_norm(pdf, s->n_cw);
00244 
00245             /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
00246             for (c = 0; c < s->n_cw; c++) {
00247                 p = -(logmath_log(lmath, pdf[c]));
00248                 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
00249 
00250                 if (s->n_gauden > 1)
00251                     s->pdf[i][f][c] =
00252                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00253                 else
00254                     s->pdf[f][c][i] =
00255                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00256             }
00257         }
00258     }
00259     if (n_err > 0)
00260         E_ERROR("Weight normalization failed for %d senones\n", n_err);
00261 
00262     ckd_free(pdf);
00263 
00264     if (chksum_present)
00265         bio_verify_chksum(fp, byteswap, chksum);
00266 
00267     if (fread(&eofchk, 1, 1, fp) == 1)
00268         E_FATAL("More data than expected in %s\n", file_name);
00269 
00270     fclose(fp);
00271 
00272     E_INFO
00273         ("Read mixture weights for %d senones: %d features x %d codewords\n",
00274          s->n_sen, s->n_feat, s->n_cw);
00275 
00276     return 1;
00277 }
00278 
00279 
00280 senone_t *
00281 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
00282             float32 mixwfloor, logmath_t *lmath)
00283 {
00284     senone_t *s;
00285     int32 n = 0, i;
00286 
00287     s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
00288     s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
00289     s->mixwfloor = mixwfloor;
00290 
00291     s->n_gauden = g->n_mgau;
00292     if (sen2mgau_map_file) {
00293         if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
00294               || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
00295             senone_mgau_map_read(s, sen2mgau_map_file);
00296             n = s->n_sen;
00297         }
00298     }
00299     else {
00300         if (s->n_gauden == 1)
00301             sen2mgau_map_file = ".semi.";
00302         else
00303             sen2mgau_map_file = ".cont.";
00304     }
00305 
00306     senone_mixw_read(s, mixwfile, lmath);
00307 
00308     if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
00309         /* All-to-1 senones-codebook mapping */
00310         s->mgau = (int16 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00311     }
00312     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
00313              || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
00314         /* 1-to-1 senone-codebook mapping */
00315         if (s->n_sen <= 1)
00316             E_FATAL("#senone=%d; must be >1\n", s->n_sen);
00317 
00318         s->mgau = (int16 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00319         for (i = 0; i < s->n_sen; i++)
00320             s->mgau[i] = i;
00321 
00322         s->n_gauden = s->n_sen;
00323     }
00324     else {
00325         if (s->n_sen != n)
00326             E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
00327                     n, sen2mgau_map_file, s->n_sen, mixwfile);
00328     }
00329 
00330     s->featscr = NULL;
00331     return s;
00332 }
00333 
00334 void
00335 senone_free(senone_t * s)
00336 {
00337     if (s == NULL)
00338         return;
00339     if (s->pdf)
00340         ckd_free_3d((void *) s->pdf);
00341     if (s->mgau)
00342         ckd_free(s->mgau);
00343     if (s->featscr)
00344         ckd_free(s->featscr);
00345     logmath_free(s->lmath);
00346     ckd_free(s);
00347 }
00348 
00349 
00350 /*
00351  * Compute senone score for one senone.
00352  * NOTE:  Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
00353  * NOTE:  Remember also that PDF data may be transposed or not depending on s->n_gauden.
00354  */
00355 int32
00356 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
00357 {
00358     int32 scr;                  /* total senone score */
00359     int32 fden;                 /* Gaussian density */
00360     int32 fscr;                 /* senone score for one feature */
00361     int32 fwscr;                /* senone score for one feature, one codeword */
00362     int32 f, t;
00363     gauden_dist_t *fdist;
00364 
00365     assert((id >= 0) && (id < s->n_sen));
00366     assert((n_top > 0) && (n_top <= s->n_cw));
00367 
00368     scr = 0;
00369 
00370     for (f = 0; f < s->n_feat; f++) {
00371         fdist = dist[f];
00372 
00373         /* Top codeword for feature f */
00374         fden = ((int32)fdist[0].dist) >> SENSCR_SHIFT;
00375         fscr = (s->n_gauden > 1)
00376             ? (fden - s->pdf[id][f][fdist[0].id])  /* untransposed */
00377             : (fden - s->pdf[f][fdist[0].id][id]); /* transposed */
00378 
00379         /* Remaining of n_top codewords for feature f */
00380         for (t = 1; t < n_top; t++) {
00381             fden = ((int32)fdist[t].dist) >> SENSCR_SHIFT;
00382             fwscr = (s->n_gauden > 1) ?
00383                 (fden - s->pdf[id][f][fdist[t].id]) :
00384                 (fden - s->pdf[f][fdist[t].id][id]);
00385             fscr = logmath_add(s->lmath, fscr, fwscr);
00386         }
00387         /* Senone scores are also scaled, negated logs3 values.  Hence
00388          * we have to negate the stuff we calculated above. */
00389         scr -= fscr;
00390     }
00391 
00392     /* Avoid overflowing int16 */
00393     if (scr > 32767)
00394       scr = 32767;
00395     if (scr < -32768)
00396       scr = -32768;
00397     return scr;
00398 }
00399 
00400 
00401 /*
00402  * Optimized for special case of all senones sharing one codebook (perhaps many features).
00403  * In particular, the PDF tables are transposed in memory.
00404  */
00405 void
00406 senone_eval_all(senone_t * s, gauden_dist_t ** dist, int32 n_top,
00407                 int16 * senscr)
00408 {
00409     int32 i, f, k, cwdist, scr;
00410 
00411     senprob_t *pdf;
00412     int32 *featscr = NULL;
00413     featscr = s->featscr;
00414 
00415     assert(s->n_gauden == 1);
00416     assert((n_top > 0) && (n_top <= s->n_cw));
00417 
00418     if ((s->n_feat > 1) && (!featscr))
00419         featscr = (int32 *) ckd_calloc(s->n_sen, sizeof(int32));
00420 
00421     /* Feature 0 */
00422     /* Top-N codeword 0 */
00423     cwdist = ((int32)dist[0][0].dist) >> SENSCR_SHIFT;
00424     pdf = s->pdf[0][dist[0][0].id];
00425 
00426     for (i = 0; i < s->n_sen; i++)
00427         senscr[i] = cwdist - pdf[i];
00428 
00429     /* Remaining top-N codewords */
00430     for (k = 1; k < n_top; k++) {
00431         cwdist = ((int32)dist[0][k].dist) >> SENSCR_SHIFT;
00432         pdf = s->pdf[0][dist[0][k].id];
00433 
00434         for (i = 0; i < s->n_sen; i++) {
00435             scr = cwdist - pdf[i];
00436             senscr[i] = logmath_add(s->lmath, senscr[i], scr);
00437             if (k == n_top - 1)
00438                 senscr[i] = -senscr[i];
00439         }
00440     }
00441 
00442     /* Remaining features */
00443     for (f = 1; f < s->n_feat; f++) {
00444         /* Top-N codeword 0 */
00445         cwdist = ((int32)dist[f][0].dist) >> SENSCR_SHIFT;
00446         pdf = s->pdf[f][dist[f][0].id];
00447 
00448         for (i = 0; i < s->n_sen; i++)
00449             featscr[i] = cwdist - pdf[i];
00450 
00451         /* Remaining top-N codewords */
00452         for (k = 1; k < n_top; k++) {
00453             cwdist = ((int32)dist[f][k].dist) >> SENSCR_SHIFT;
00454             pdf = s->pdf[f][dist[f][k].id];
00455 
00456             for (i = 0; i < s->n_sen; i++) {
00457                 scr = cwdist - pdf[i];
00458                 featscr[i] = logmath_add(s->lmath, featscr[i], scr);
00459             }
00460         }
00461 
00462         for (i = 0; i < s->n_sen; i++)
00463             senscr[i] -= featscr[i];
00464     }
00465 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1