PocketSphinx  0.6
src/libpocketsphinx/ms_senone.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers. */
00039 #include <string.h>
00040 #include <stdio.h>
00041 #include <assert.h>
00042 
00043 /* SphinxBase headers. */
00044 #include <sphinxbase/bio.h>
00045 
00046 /* Local headers. */
00047 #include "ms_senone.h"
00048 
00049 
00050 #define MIXW_PARAM_VERSION      "1.0"
00051 #define SPDEF_PARAM_VERSION     "1.2"
00052 
00053 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00054 #define LOGMATH_INLINE inline
00055 #elif defined(__GNUC__)
00056 #define LOGMATH_INLINE static inline
00057 #elif defined(_MSC_VER)
00058 #define LOGMATH_INLINE __inline
00059 #else
00060 #define LOGMATH_INLINE static
00061 #endif
00062 
00063 static int32
00064 senone_mgau_map_read(senone_t * s, char const *file_name)
00065 {
00066     FILE *fp;
00067     int32 byteswap, chksum_present, n_gauden_present;
00068     uint32 chksum;
00069     int32 i;
00070     char eofchk;
00071     char **argname, **argval;
00072     void *ptr;
00073     float32 v;
00074 
00075     E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
00076 
00077     if ((fp = fopen(file_name, "rb")) == NULL)
00078         E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name);
00079 
00080     /* Read header, including argument-value info and 32-bit byteorder magic */
00081     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00082         E_FATAL("Failed to read header from file '%s'\n", file_name);
00083 
00084     /* Parse argument-value list */
00085     chksum_present = 0;
00086     n_gauden_present = 0;
00087     for (i = 0; argname[i]; i++) {
00088         if (strcmp(argname[i], "version") == 0) {
00089             if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
00090                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00091                        file_name, argval[i], SPDEF_PARAM_VERSION);
00092             }
00093 
00094             /* HACK!! Convert version# to float32 and take appropriate action */
00095             if (sscanf(argval[i], "%f", &v) != 1)
00096                 E_FATAL("%s: Bad version no. string: %s\n", file_name,
00097                         argval[i]);
00098 
00099             n_gauden_present = (v > 1.1) ? 1 : 0;
00100         }
00101         else if (strcmp(argname[i], "chksum0") == 0) {
00102             chksum_present = 1; /* Ignore the associated value */
00103         }
00104     }
00105     bio_hdrarg_free(argname, argval);
00106     argname = argval = NULL;
00107 
00108     chksum = 0;
00109 
00110     /* Read #gauden (if version matches) */
00111     if (n_gauden_present) {
00112         E_INFO("Reading number of codebooks from %s\n", file_name);
00113         if (bio_fread
00114             (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
00115             E_FATAL("fread(%s) (#gauden) failed\n", file_name);
00116     }
00117 
00118     /* Read 1d array data */
00119     if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp,
00120                      byteswap, &chksum) < 0) {
00121         E_FATAL("bio_fread_1d(%s) failed\n", file_name);
00122     }
00123     s->mgau = ptr;
00124     E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden);
00125 
00126     /* Infer n_gauden if not present in this version */
00127     if (!n_gauden_present) {
00128         s->n_gauden = 1;
00129         for (i = 0; i < s->n_sen; i++)
00130             if (s->mgau[i] >= s->n_gauden)
00131                 s->n_gauden = s->mgau[i] + 1;
00132     }
00133 
00134     if (chksum_present)
00135         bio_verify_chksum(fp, byteswap, chksum);
00136 
00137     if (fread(&eofchk, 1, 1, fp) == 1)
00138         E_FATAL("More data than expected in %s: %d\n", file_name, eofchk);
00139 
00140     fclose(fp);
00141 
00142     E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
00143            s->n_gauden);
00144 
00145     return 1;
00146 }
00147 
00148 
00149 static int32
00150 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
00151 {
00152     char eofchk;
00153     FILE *fp;
00154     int32 byteswap, chksum_present;
00155     uint32 chksum;
00156     float32 *pdf;
00157     int32 i, f, c, p, n_err;
00158     char **argname, **argval;
00159 
00160     E_INFO("Reading senone mixture weights: %s\n", file_name);
00161 
00162     if ((fp = fopen(file_name, "rb")) == NULL)
00163         E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
00164 
00165     /* Read header, including argument-value info and 32-bit byteorder magic */
00166     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00167         E_FATAL("Failed to read header from file '%s'\n", file_name);
00168 
00169     /* Parse argument-value list */
00170     chksum_present = 0;
00171     for (i = 0; argname[i]; i++) {
00172         if (strcmp(argname[i], "version") == 0) {
00173             if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
00174                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00175                        file_name, argval[i], MIXW_PARAM_VERSION);
00176         }
00177         else if (strcmp(argname[i], "chksum0") == 0) {
00178             chksum_present = 1; /* Ignore the associated value */
00179         }
00180     }
00181     bio_hdrarg_free(argname, argval);
00182     argname = argval = NULL;
00183 
00184     chksum = 0;
00185 
00186     /* Read #senones, #features, #codewords, arraysize */
00187     if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
00188          1)
00189         ||
00190         (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
00191          != 1)
00192         || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
00193             != 1)
00194         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00195         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
00196     }
00197     if (i != s->n_sen * s->n_feat * s->n_cw) {
00198         E_FATAL
00199             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
00200              file_name, i, s->n_sen, s->n_feat, s->n_cw);
00201     }
00202 
00203     /*
00204      * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
00205      * All PDF values will be truncated (in the LSB positions) by these many bits.
00206      */
00207     if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
00208         E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
00209 
00210     /* Use a fixed shift for compatibility with everything else. */
00211     E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
00212 
00213     /*
00214      * Allocate memory for senone PDF data.  Organize normally or transposed depending on
00215      * s->n_gauden.
00216      */
00217     if (s->n_gauden > 1) {
00218         E_INFO("Not transposing mixture weights in memory\n");
00219         s->pdf =
00220             (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
00221                                           sizeof(senprob_t));
00222     }
00223     else {
00224         E_INFO("Transposing mixture weights in memory\n");
00225         s->pdf =
00226             (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
00227                                           sizeof(senprob_t));
00228     }
00229 
00230     /* Temporary structure to read in floats */
00231     pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
00232 
00233     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
00234     n_err = 0;
00235     for (i = 0; i < s->n_sen; i++) {
00236         for (f = 0; f < s->n_feat; f++) {
00237             if (bio_fread
00238                 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
00239                  &chksum)
00240                 != s->n_cw) {
00241                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
00242             }
00243 
00244             /* Normalize and floor */
00245             if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
00246                 n_err++;
00247             vector_floor(pdf, s->n_cw, s->mixwfloor);
00248             vector_sum_norm(pdf, s->n_cw);
00249 
00250             /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
00251             for (c = 0; c < s->n_cw; c++) {
00252                 p = -(logmath_log(lmath, pdf[c]));
00253                 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
00254 
00255                 if (s->n_gauden > 1)
00256                     s->pdf[i][f][c] =
00257                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00258                 else
00259                     s->pdf[f][c][i] =
00260                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
00261             }
00262         }
00263     }
00264     if (n_err > 0)
00265         E_WARN("Weight normalization failed for %d senones\n", n_err);
00266 
00267     ckd_free(pdf);
00268 
00269     if (chksum_present)
00270         bio_verify_chksum(fp, byteswap, chksum);
00271 
00272     if (fread(&eofchk, 1, 1, fp) == 1)
00273         E_FATAL("More data than expected in %s\n", file_name);
00274 
00275     fclose(fp);
00276 
00277     E_INFO
00278         ("Read mixture weights for %d senones: %d features x %d codewords\n",
00279          s->n_sen, s->n_feat, s->n_cw);
00280 
00281     return 1;
00282 }
00283 
00284 
00285 senone_t *
00286 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
00287             float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
00288 {
00289     senone_t *s;
00290     int32 n = 0, i;
00291 
00292     s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
00293     s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
00294     s->mixwfloor = mixwfloor;
00295 
00296     s->n_gauden = g->n_mgau;
00297     if (sen2mgau_map_file) {
00298         if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
00299               || strcmp(sen2mgau_map_file, ".ptm.") == 0
00300               || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
00301             senone_mgau_map_read(s, sen2mgau_map_file);
00302             n = s->n_sen;
00303         }
00304     }
00305     else {
00306         if (s->n_gauden == 1)
00307             sen2mgau_map_file = ".semi.";
00308         else if (s->n_gauden == bin_mdef_n_ciphone(mdef))
00309             sen2mgau_map_file = ".ptm.";
00310         else
00311             sen2mgau_map_file = ".cont.";
00312     }
00313 
00314     senone_mixw_read(s, mixwfile, lmath);
00315 
00316     if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
00317         /* All-to-1 senones-codebook mapping */
00318         E_INFO("Mapping all senones to one codebook\n");
00319         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00320     }
00321     else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) {
00322         /* All-to-ciphone-id senones-codebook mapping */
00323         E_INFO("Mapping senones to context-independent phone codebooks\n");
00324         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00325         for (i = 0; i < s->n_sen; i++)
00326             s->mgau[i] = bin_mdef_sen2cimap(mdef, i);
00327     }
00328     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
00329              || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
00330         /* 1-to-1 senone-codebook mapping */
00331         E_INFO("Mapping senones to individual codebooks\n");
00332         if (s->n_sen <= 1)
00333             E_FATAL("#senone=%d; must be >1\n", s->n_sen);
00334 
00335         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
00336         for (i = 0; i < s->n_sen; i++)
00337             s->mgau[i] = i;
00338         /* Not sure why this is here, it probably does nothing. */
00339         s->n_gauden = s->n_sen;
00340     }
00341     else {
00342         if (s->n_sen != n)
00343             E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
00344                     n, sen2mgau_map_file, s->n_sen, mixwfile);
00345     }
00346 
00347     s->featscr = NULL;
00348     return s;
00349 }
00350 
00351 void
00352 senone_free(senone_t * s)
00353 {
00354     if (s == NULL)
00355         return;
00356     if (s->pdf)
00357         ckd_free_3d((void *) s->pdf);
00358     if (s->mgau)
00359         ckd_free(s->mgau);
00360     if (s->featscr)
00361         ckd_free(s->featscr);
00362     logmath_free(s->lmath);
00363     ckd_free(s);
00364 }
00365 
00366 
00367 /*
00368  * Compute senone score for one senone.
00369  * NOTE:  Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
00370  * NOTE:  Remember also that PDF data may be transposed or not depending on s->n_gauden.
00371  */
00372 int32
00373 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
00374 {
00375     int32 scr;                  /* total senone score */
00376     int32 fden;                 /* Gaussian density */
00377     int32 fscr;                 /* senone score for one feature */
00378     int32 fwscr;                /* senone score for one feature, one codeword */
00379     int32 f, t;
00380     gauden_dist_t *fdist;
00381 
00382     assert((id >= 0) && (id < s->n_sen));
00383     assert((n_top > 0) && (n_top <= s->n_cw));
00384 
00385     scr = 0;
00386 
00387     for (f = 0; f < s->n_feat; f++) {
00388         int top;
00389         fdist = dist[f];
00390 
00391         /* Top codeword for feature f */
00392         top = fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
00393         fscr = (s->n_gauden > 1)
00394             ? (fden + -s->pdf[id][f][fdist[0].id])  /* untransposed */
00395             : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
00396         E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
00397                     id, f, -(fscr - fden), -(fden-top), -(fscr-top)));
00398         /* Remaining of n_top codewords for feature f */
00399         for (t = 1; t < n_top; t++) {
00400             fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
00401             fwscr = (s->n_gauden > 1) ?
00402                 (fden + -s->pdf[id][f][fdist[t].id]) :
00403                 (fden + -s->pdf[f][fdist[t].id][id]);
00404             fscr = logmath_add(s->lmath, fscr, fwscr);
00405             E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
00406                         id, f, -(fwscr - fden), -(fden-top), -(fscr-top)));
00407         }
00408         /* Senone scores are also scaled, negated logs3 values.  Hence
00409          * we have to negate the stuff we calculated above. */
00410         scr -= fscr;
00411     }
00412     /* Downscale scores. */
00413     scr /= s->aw;
00414 
00415     /* Avoid overflowing int16 */
00416     if (scr > 32767)
00417       scr = 32767;
00418     if (scr < -32768)
00419       scr = -32768;
00420     return scr;
00421 }