PocketSphinx  0.6
src/libpocketsphinx/ptm_mgau.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2010 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers */
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <string.h>
00042 #include <assert.h>
00043 #include <limits.h>
00044 #include <math.h>
00045 #if defined(__ADSPBLACKFIN__)
00046 #elif !defined(_WIN32_WCE)
00047 #include <sys/types.h>
00048 #endif
00049 
00050 #ifndef M_PI 
00051 #define M_PI 3.14159265358979323846 
00052 #endif
00053 
00054 /* SphinxBase headers */
00055 #include <sphinx_config.h>
00056 #include <sphinxbase/cmd_ln.h>
00057 #include <sphinxbase/fixpoint.h>
00058 #include <sphinxbase/ckd_alloc.h>
00059 #include <sphinxbase/bio.h>
00060 #include <sphinxbase/err.h>
00061 #include <sphinxbase/prim_type.h>
00062 
00063 /* Local headers */
00064 #include "tied_mgau_common.h"
00065 #include "ptm_mgau.h"
00066 #include "posixwin32.h"
00067 
00068 static ps_mgaufuncs_t ptm_mgau_funcs = {
00069     "ptm",
00070     &ptm_mgau_frame_eval,      /* frame_eval */
00071     &ptm_mgau_mllr_transform,  /* transform */
00072     &ptm_mgau_free             /* free */
00073 };
00074 
00075 #define COMPUTE_GMM_MAP(_idx)                           \
00076     diff[_idx] = obs[_idx] - mean[_idx];                \
00077     sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]);     \
00078     compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
00079 #define COMPUTE_GMM_REDUCE(_idx)                \
00080     d = GMMSUB(d, compl[_idx]);
00081 
00082 static void
00083 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
00084 {
00085     ptm_topn_t vtmp;
00086     int j;
00087 
00088     topn[i].score = d;
00089     if (i == 0)
00090         return;
00091     vtmp = topn[i];
00092     for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
00093         topn[j + 1] = topn[j];
00094     }
00095     topn[j + 1] = vtmp;
00096 }
00097 
00098 static int
00099 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
00100 {
00101     ptm_topn_t *topn;
00102     int i, ceplen;
00103 
00104     topn = s->f->topn[cb][feat];
00105     ceplen = s->g->featlen[feat];
00106 
00107     for (i = 0; i < s->max_topn; i++) {
00108         mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
00109         mfcc_t *var, d;
00110         mfcc_t *obs;
00111         int32 cw, j;
00112 
00113         cw = topn[i].cw;
00114         mean = s->g->mean[cb][feat][0] + cw * ceplen;
00115         var = s->g->var[cb][feat][0] + cw * ceplen;
00116         d = s->g->det[cb][feat][cw];
00117         obs = z;
00118         for (j = 0; j < ceplen % 4; ++j) {
00119             diff[0] = *obs++ - *mean++;
00120             sqdiff[0] = MFCCMUL(diff[0], diff[0]);
00121             compl[0] = MFCCMUL(sqdiff[0], *var);
00122             d = GMMSUB(d, compl[0]);
00123             ++var;
00124         }
00125         /* We could vectorize this but it's unlikely to make much
00126          * difference as the outer loop here isn't very big. */
00127         for (;j < ceplen; j += 4) {
00128             COMPUTE_GMM_MAP(0);
00129             COMPUTE_GMM_MAP(1);
00130             COMPUTE_GMM_MAP(2);
00131             COMPUTE_GMM_MAP(3);
00132             COMPUTE_GMM_REDUCE(0);
00133             COMPUTE_GMM_REDUCE(1);
00134             COMPUTE_GMM_REDUCE(2);
00135             COMPUTE_GMM_REDUCE(3);
00136             var += 4;
00137             obs += 4;
00138             mean += 4;
00139         }
00140         insertion_sort_topn(topn, i, (int32)d);
00141     }
00142 
00143     return topn[0].score;
00144 }
00145 
00146 /* This looks bad, but it actually isn't.  Less than 1% of eval_cb's
00147  * time is spent doing this. */
00148 static void
00149 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
00150                   int cw, int32 intd)
00151 {
00152     for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
00153         memcpy(*cur + 1, *cur, sizeof(**cur));
00154     ++*cur;
00155     (*cur)->cw = cw;
00156     (*cur)->score = intd;
00157 }
00158 
00159 static int
00160 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
00161 {
00162     ptm_topn_t *worst, *best, *topn;
00163     mfcc_t *mean;
00164     mfcc_t *var, *det, *detP, *detE;
00165     int32 i, ceplen;
00166 
00167     best = topn = s->f->topn[cb][feat];
00168     worst = topn + (s->max_topn - 1);
00169     mean = s->g->mean[cb][feat][0];
00170     var = s->g->var[cb][feat][0];
00171     det = s->g->det[cb][feat];
00172     detE = det + s->g->n_density;
00173     ceplen = s->g->featlen[feat];
00174 
00175     for (detP = det; detP < detE; ++detP) {
00176         mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
00177         mfcc_t d, thresh;
00178         mfcc_t *obs;
00179         ptm_topn_t *cur;
00180         int32 cw, j;
00181 
00182         d = *detP;
00183         thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
00184         obs = z;
00185         cw = detP - det;
00186 
00187         /* Unroll the loop starting with the first dimension(s).  In
00188          * theory this might be a bit faster if this Gaussian gets
00189          * "knocked out" by C0. In practice not. */
00190         for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
00191             diff[0] = *obs++ - *mean++;
00192             sqdiff[0] = MFCCMUL(diff[0], diff[0]);
00193             compl[0] = MFCCMUL(sqdiff[0], *var++);
00194             d = GMMSUB(d, compl[0]);
00195         }
00196         /* Now do 4 dimensions at a time.  You'd think that GCC would
00197          * vectorize this?  Apparently not.  And it's right, because
00198          * that won't make this any faster, at least on x86-64. */
00199         for (; j < ceplen && d >= thresh; j += 4) {
00200             COMPUTE_GMM_MAP(0);
00201             COMPUTE_GMM_MAP(1);
00202             COMPUTE_GMM_MAP(2);
00203             COMPUTE_GMM_MAP(3);
00204             COMPUTE_GMM_REDUCE(0);
00205             COMPUTE_GMM_REDUCE(1);
00206             COMPUTE_GMM_REDUCE(2);
00207             COMPUTE_GMM_REDUCE(3);
00208             var += 4;
00209             obs += 4;
00210             mean += 4;
00211         }
00212         if (j < ceplen) {
00213             /* terminated early, so not in topn */
00214             mean += (ceplen - j);
00215             var += (ceplen - j);
00216             continue;
00217         }
00218         if (d < thresh)
00219             continue;
00220         for (i = 0; i < s->max_topn; i++) {
00221             /* already there, so don't need to insert */
00222             if (topn[i].cw == cw)
00223                 break;
00224         }
00225         if (i < s->max_topn)
00226             continue;       /* already there.  Don't insert */
00227         insertion_sort_cb(&cur, worst, best, cw, (int32)d);
00228     }
00229 
00230     return best->score;
00231 }
00232 
00236 static int
00237 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
00238 {
00239     int i, j;
00240 
00241     /* First evaluate top-N from previous frame. */
00242     for (i = 0; i < s->g->n_mgau; ++i)
00243         for (j = 0; j < s->g->n_feat; ++j)
00244             eval_topn(s, i, j, z[j]);
00245 
00246     /* If frame downsampling is in effect, possibly do nothing else. */
00247     if (frame % s->ds_ratio)
00248         return 0;
00249 
00250     /* Evaluate remaining codebooks. */
00251     for (i = 0; i < s->g->n_mgau; ++i) {
00252         if (bitvec_is_clear(s->f->mgau_active, i))
00253             continue;
00254         for (j = 0; j < s->g->n_feat; ++j) {
00255             eval_cb(s, i, j, z[j]);
00256         }
00257     }
00258 
00259     /* Normalize densities to produce "posterior probabilities",
00260      * i.e. things with a reasonable dynamic range, then scale and
00261      * clamp them to the acceptable range.  This is actually done
00262      * solely to ensure that we can use fast_logmath_add().  Note that
00263      * unless we share the same normalizer across all codebooks for
00264      * each feature stream we get defective scores (that's why these
00265      * loops are inside out - doing it per-feature should give us
00266      * greater precision). */
00267     for (j = 0; j < s->g->n_feat; ++j) {
00268         int32 norm = 0x7fffffff;
00269         for (i = 0; i < s->g->n_mgau; ++i) {
00270             if (bitvec_is_clear(s->f->mgau_active, i))
00271                 continue;
00272             if (norm > s->f->topn[i][j][0].score >> SENSCR_SHIFT)
00273                 norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
00274         }
00275         assert(norm != 0x7fffffff);
00276         for (i = 0; i < s->g->n_mgau; ++i) {
00277             int32 k;
00278             if (bitvec_is_clear(s->f->mgau_active, i))
00279                 continue;
00280             for (k = 0; k < s->max_topn; ++k) {
00281                 s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
00282                 s->f->topn[i][j][k].score -= norm;
00283                 s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
00284                 if (s->f->topn[i][j][k].score > MAX_NEG_ASCR) 
00285                     s->f->topn[i][j][k].score = MAX_NEG_ASCR;
00286             }
00287         }
00288     }
00289 
00290     return 0;
00291 }
00292 
00293 static int
00294 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
00295                         int32 n_senone_active, int compallsen)
00296 {
00297     int i, lastsen;
00298 
00299     if (compallsen) {
00300         bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
00301         return 0;
00302     }
00303     bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
00304     for (lastsen = i = 0; i < n_senone_active; ++i) {
00305         int sen = senone_active[i] + lastsen;
00306         int cb = s->sen2cb[sen];
00307         bitvec_set(s->f->mgau_active, cb);
00308         lastsen = sen;
00309     }
00310     E_DEBUG(1, ("Active codebooks:"));
00311     for (i = 0; i < s->g->n_mgau; ++i) {
00312         if (bitvec_is_clear(s->f->mgau_active, i))
00313             continue;
00314         E_DEBUGCONT(1, (" %d", i));
00315     }
00316     E_DEBUGCONT(1, ("\n"));
00317     return 0;
00318 }
00319 
00323 static int
00324 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
00325                      uint8 *senone_active, int32 n_senone_active,
00326                      int compall)
00327 {
00328     int i, lastsen, bestscore;
00329 
00330     memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00331     /* FIXME: This is the non-cache-efficient way to do this.  We want
00332      * to evaluate one codeword at a time but this requires us to have
00333      * a reverse codebook to senone mapping, which we don't have
00334      * (yet), since different codebooks have different top-N
00335      * codewords. */
00336     if (compall)
00337         n_senone_active = s->n_sen;
00338     bestscore = 0x7fffffff;
00339     for (lastsen = i = 0; i < n_senone_active; ++i) {
00340         int sen, f, cb;
00341         int ascore;
00342 
00343         if (compall)
00344             sen = i;
00345         else
00346             sen = senone_active[i] + lastsen;
00347         lastsen = sen;
00348         cb = s->sen2cb[sen];
00349 
00350         if (bitvec_is_clear(s->f->mgau_active, cb)) {
00351             int j;
00352             /* Because senone_active is deltas we can't really "knock
00353              * out" senones from pruned codebooks, and in any case,
00354              * it wouldn't make any difference to the search code,
00355              * which doesn't expect senone_active to change. */
00356             for (f = 0; f < s->g->n_feat; ++f) {
00357                 for (j = 0; j < s->max_topn; ++j) {
00358                     s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
00359                 }
00360             }
00361         }
00362         /* For each feature, log-sum codeword scores + mixw to get
00363          * feature density, then sum (multiply) to get ascore */
00364         ascore = 0;
00365         for (f = 0; f < s->g->n_feat; ++f) {
00366             ptm_topn_t *topn;
00367             int j, fden = 0;
00368             topn = s->f->topn[cb][f];
00369             for (j = 0; j < s->max_topn; ++j) {
00370                 int mixw;
00371                 /* Find mixture weight for this codeword. */
00372                 if (s->mixw_cb) {
00373                     int dcw = s->mixw[f][topn[j].cw][sen/2];
00374                     dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
00375                     mixw = s->mixw_cb[dcw];
00376                 }
00377                 else {
00378                     mixw = s->mixw[f][topn[j].cw][sen];
00379                 }
00380                 if (j == 0)
00381                     fden = mixw + topn[j].score;
00382                 else
00383                     fden = fast_logmath_add(s->lmath_8b, fden,
00384                                        mixw + topn[j].score);
00385                 E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
00386                             sen, f, mixw, topn[j].score, fden));
00387             }
00388             ascore += fden;
00389         }
00390         if (ascore < bestscore) bestscore = ascore;
00391         senone_scores[sen] = ascore;
00392     }
00393     /* Normalize the scores again (finishing the job we started above
00394      * in ptm_mgau_codebook_eval...) */
00395     for (i = 0; i < s->n_sen; ++i) {
00396         senone_scores[i] -= bestscore;
00397     }
00398 
00399     return 0;
00400 }
00401 
00405 int32
00406 ptm_mgau_frame_eval(ps_mgau_t *ps,
00407                     int16 *senone_scores,
00408                     uint8 *senone_active,
00409                     int32 n_senone_active,
00410                     mfcc_t ** featbuf, int32 frame,
00411                     int32 compallsen)
00412 {
00413     ptm_mgau_t *s = (ptm_mgau_t *)ps;
00414     int fast_eval_idx;
00415 
00416     /* Find the appropriate frame in the rotating history buffer
00417      * corresponding to the requested input frame.  No bounds checking
00418      * is done here, which just means you'll get semi-random crap if
00419      * you request a frame in the future or one that's too far in the
00420      * past.  Since the history buffer is just used for fast match
00421      * that might not be fatal. */
00422     fast_eval_idx = frame % s->n_fast_hist;
00423     s->f = s->hist + fast_eval_idx;
00424     /* Compute the top-N codewords for every codebook, unless this
00425      * is a past frame, in which case we already have them (we
00426      * hope!) */
00427     if (frame >= ps_mgau_base(ps)->frame_idx) {
00428         ptm_fast_eval_t *lastf;
00429         /* Get the previous frame's top-N information (on the
00430          * first frame of the input this is just all WORST_DIST,
00431          * no harm in that) */
00432         if (fast_eval_idx == 0)
00433             lastf = s->hist + s->n_fast_hist - 1;
00434         else
00435             lastf = s->hist + fast_eval_idx - 1;
00436         /* Copy in initial top-N info */
00437         memcpy(s->f->topn[0][0], lastf->topn[0][0],
00438                s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
00439         /* Generate initial active codebook list (this might not be
00440          * necessary) */
00441         ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
00442         /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
00443         ptm_mgau_codebook_eval(s, featbuf, frame);
00444     }
00445     /* Evaluate intersection of active senones and active codebooks. */
00446     ptm_mgau_senone_eval(s, senone_scores, senone_active,
00447                          n_senone_active, compallsen);
00448 
00449     return 0;
00450 }
00451 
00452 static int32
00453 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
00454 {
00455     FILE *fp;
00456     char line[1000];
00457     int32 i, n, r, c;
00458     int32 do_swap, do_mmap;
00459     size_t filesize, offset;
00460     int n_clust = 0;
00461     int n_feat = s->g->n_feat;
00462     int n_density = s->g->n_density;
00463     int n_sen = bin_mdef_n_sen(mdef);
00464     int n_bits = 8;
00465 
00466     s->n_sen = n_sen; /* FIXME: Should have been done earlier */
00467     do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
00468 
00469     if ((fp = fopen(file, "rb")) == NULL)
00470         return -1;
00471 
00472     E_INFO("Loading senones from dump file %s\n", file);
00473     /* Read title size, title */
00474     if (fread(&n, sizeof(int32), 1, fp) != 1) {
00475         E_ERROR_SYSTEM("Failed to read title size from %s", file);
00476         goto error_out;
00477     }
00478     /* This is extremely bogus */
00479     do_swap = 0;
00480     if (n < 1 || n > 999) {
00481         SWAP_INT32(&n);
00482         if (n < 1 || n > 999) {
00483             E_ERROR("Title length %x in dump file %s out of range\n", n, file);
00484             goto error_out;
00485         }
00486         do_swap = 1;
00487     }
00488     if (fread(line, sizeof(char), n, fp) != n) {
00489         E_ERROR_SYSTEM("Cannot read title");
00490         goto error_out;
00491     }
00492     if (line[n - 1] != '\0') {
00493         E_ERROR("Bad title in dump file\n");
00494         goto error_out;
00495     }
00496     E_INFO("%s\n", line);
00497 
00498     /* Read header size, header */
00499     if (fread(&n, sizeof(n), 1, fp) != 1) {
00500         E_ERROR_SYSTEM("Failed to read header size from %s", file);
00501         goto error_out;
00502     }
00503     if (do_swap) SWAP_INT32(&n);
00504     if (fread(line, sizeof(char), n, fp) != n) {
00505         E_ERROR_SYSTEM("Cannot read header");
00506         goto error_out;
00507     }
00508     if (line[n - 1] != '\0') {
00509         E_ERROR("Bad header in dump file\n");
00510         goto error_out;
00511     }
00512 
00513     /* Read other header strings until string length = 0 */
00514     for (;;) {
00515         if (fread(&n, sizeof(n), 1, fp) != 1) {
00516             E_ERROR_SYSTEM("Failed to read header string size from %s", file);
00517             goto error_out;
00518         }
00519         if (do_swap) SWAP_INT32(&n);
00520         if (n == 0)
00521             break;
00522         if (fread(line, sizeof(char), n, fp) != n) {
00523             E_ERROR_SYSTEM("Cannot read header");
00524             goto error_out;
00525         }
00526         /* Look for a cluster count, if present */
00527         if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
00528             n_feat = atoi(line + strlen("feature_count "));
00529         }
00530         if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
00531             n_density = atoi(line + strlen("mixture_count "));
00532         }
00533         if (!strncmp(line, "model_count ", strlen("model_count "))) {
00534             n_sen = atoi(line + strlen("model_count "));
00535         }
00536         if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
00537             n_clust = atoi(line + strlen("cluster_count "));
00538         }
00539         if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
00540             n_bits = atoi(line + strlen("cluster_bits "));
00541         }
00542     }
00543 
00544     /* Defaults for #rows, #columns in mixw array. */
00545     c = n_sen;
00546     r = n_density;
00547     if (n_clust == 0) {
00548         /* Older mixw files have them here, and they might be padded. */
00549         if (fread(&r, sizeof(r), 1, fp) != 1) {
00550             E_ERROR_SYSTEM("Cannot read #rows");
00551             goto error_out;
00552         }
00553         if (do_swap) SWAP_INT32(&r);
00554         if (fread(&c, sizeof(c), 1, fp) != 1) {
00555             E_ERROR_SYSTEM("Cannot read #columns");
00556             goto error_out;
00557         }
00558         if (do_swap) SWAP_INT32(&c);
00559         E_INFO("Rows: %d, Columns: %d\n", r, c);
00560     }
00561 
00562     if (n_feat != s->g->n_feat) {
00563         E_ERROR("Number of feature streams mismatch: %d != %d\n",
00564                 n_feat, s->g->n_feat);
00565         goto error_out;
00566     }
00567     if (n_density != s->g->n_density) {
00568         E_ERROR("Number of densities mismatch: %d != %d\n",
00569                 n_density, s->g->n_density);
00570         goto error_out;
00571     }
00572     if (n_sen != s->n_sen) {
00573         E_ERROR("Number of senones mismatch: %d != %d\n",
00574                 n_sen, s->n_sen);
00575         goto error_out;
00576     }
00577 
00578     if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
00579         E_ERROR("Cluster count must be 0, 15, or 16\n");
00580         goto error_out;
00581     }
00582     if (n_clust == 15)
00583         ++n_clust;
00584 
00585     if (!((n_bits == 8) || (n_bits == 4))) {
00586         E_ERROR("Cluster count must be 4 or 8\n");
00587         goto error_out;
00588     }
00589 
00590     if (do_mmap) {
00591             E_INFO("Using memory-mapped I/O for senones\n");
00592     }
00593     offset = ftell(fp);
00594     fseek(fp, 0, SEEK_END);
00595     filesize = ftell(fp);
00596     fseek(fp, offset, SEEK_SET);
00597 
00598     /* Allocate memory for pdfs (or memory map them) */
00599     if (do_mmap) {
00600         s->sendump_mmap = mmio_file_read(file);
00601         /* Get cluster codebook if any. */
00602         if (n_clust) {
00603             s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
00604             offset += n_clust;
00605         }
00606     }
00607     else {
00608         /* Get cluster codebook if any. */
00609         if (n_clust) {
00610             s->mixw_cb = ckd_calloc(1, n_clust);
00611             if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
00612                 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
00613                 goto error_out;
00614             }
00615         }
00616     }
00617 
00618     /* Set up pointers, or read, or whatever */
00619     if (s->sendump_mmap) {
00620         s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
00621         for (n = 0; n < n_feat; n++) {
00622             int step = c;
00623             if (n_bits == 4)
00624                 step = (step + 1) / 2;
00625             for (i = 0; i < r; i++) {
00626                 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
00627                 offset += step;
00628             }
00629         }
00630     }
00631     else {
00632         s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
00633         /* Read pdf values and ids */
00634         for (n = 0; n < n_feat; n++) {
00635             int step = c;
00636             if (n_bits == 4)
00637                 step = (step + 1) / 2;
00638             for (i = 0; i < r; i++) {
00639                 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
00640                     != (size_t) step) {
00641                     E_ERROR("Failed to read %d bytes from sendump\n", step);
00642                     goto error_out;
00643                 }
00644             }
00645         }
00646     }
00647 
00648     fclose(fp);
00649     return 0;
00650 error_out:
00651     fclose(fp);
00652     return -1;
00653 }
00654 
00655 static int32
00656 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
00657 {
00658     char **argname, **argval;
00659     char eofchk;
00660     FILE *fp;
00661     int32 byteswap, chksum_present;
00662     uint32 chksum;
00663     float32 *pdf;
00664     int32 i, f, c, n;
00665     int32 n_sen;
00666     int32 n_feat;
00667     int32 n_comp;
00668     int32 n_err;
00669 
00670     E_INFO("Reading mixture weights file '%s'\n", file_name);
00671 
00672     if ((fp = fopen(file_name, "rb")) == NULL)
00673         E_FATAL("Failed to open mixture file '%s' for reading: %s\n", file_name, strerror(errno));
00674 
00675     /* Read header, including argument-value info and 32-bit byteorder magic */
00676     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00677         E_FATAL("Failed to read header from '%s'\n", file_name);
00678 
00679     /* Parse argument-value list */
00680     chksum_present = 0;
00681     for (i = 0; argname[i]; i++) {
00682         if (strcmp(argname[i], "version") == 0) {
00683             if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
00684                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00685                        file_name, argval[i], MGAU_MIXW_VERSION);
00686         }
00687         else if (strcmp(argname[i], "chksum0") == 0) {
00688             chksum_present = 1; /* Ignore the associated value */
00689         }
00690     }
00691     bio_hdrarg_free(argname, argval);
00692     argname = argval = NULL;
00693 
00694     chksum = 0;
00695 
00696     /* Read #senones, #features, #codewords, arraysize */
00697     if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
00698         || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
00699             1)
00700         || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
00701             1)
00702         || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00703         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
00704     }
00705     if (n_feat != s->g->n_feat)
00706         E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
00707     if (n != n_sen * n_feat * n_comp) {
00708         E_FATAL
00709             ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
00710              file_name, i, n_sen, n_feat, n_comp);
00711     }
00712 
00713     /* n_sen = number of mixture weights per codeword, which is
00714      * fixed at the number of senones since we have only one codebook.
00715      */
00716     s->n_sen = n_sen;
00717 
00718     /* Quantized mixture weight arrays. */
00719     s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
00720                             n_sen, sizeof(***s->mixw));
00721 
00722     /* Temporary structure to read in floats before conversion to (int32) logs3 */
00723     pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
00724 
00725     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
00726     n_err = 0;
00727     for (i = 0; i < n_sen; i++) {
00728         for (f = 0; f < n_feat; f++) {
00729             if (bio_fread((void *) pdf, sizeof(float32),
00730                           n_comp, fp, byteswap, &chksum) != n_comp) {
00731                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
00732             }
00733 
00734             /* Normalize and floor */
00735             if (vector_sum_norm(pdf, n_comp) <= 0.0)
00736                 n_err++;
00737             vector_floor(pdf, n_comp, SmoothMin);
00738             vector_sum_norm(pdf, n_comp);
00739 
00740             /* Convert to LOG, quantize, and transpose */
00741             for (c = 0; c < n_comp; c++) {
00742                 int32 qscr;
00743 
00744                 qscr = -logmath_log(s->lmath_8b, pdf[c]);
00745                 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
00746                     qscr = MAX_NEG_MIXW;
00747                 s->mixw[f][c][i] = qscr;
00748             }
00749         }
00750     }
00751     if (n_err > 0)
00752         E_WARN("Weight normalization failed for %d senones\n", n_err);
00753 
00754     ckd_free(pdf);
00755 
00756     if (chksum_present)
00757         bio_verify_chksum(fp, byteswap, chksum);
00758 
00759     if (fread(&eofchk, 1, 1, fp) == 1)
00760         E_FATAL("More data than expected in %s\n", file_name);
00761 
00762     fclose(fp);
00763 
00764     E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
00765     return n_sen;
00766 }
00767 
00768 ps_mgau_t *
00769 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
00770 {
00771     ptm_mgau_t *s;
00772     ps_mgau_t *ps;
00773     char const *sendump_path;
00774     int i;
00775 
00776     s = ckd_calloc(1, sizeof(*s));
00777     s->config = acmod->config;
00778 
00779     s->lmath = logmath_retain(acmod->lmath);
00780     /* Log-add table. */
00781     s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
00782     if (s->lmath_8b == NULL)
00783         goto error_out;
00784     /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
00785     if (logmath_get_width(s->lmath_8b) != 1) {
00786         E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
00787                 logmath_get_base(s->lmath_8b));
00788         goto error_out;
00789     }
00790 
00791     /* Read means and variances. */
00792     if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
00793                             cmd_ln_str_r(s->config, "-var"),
00794                             cmd_ln_float32_r(s->config, "-varfloor"),
00795                             s->lmath)) == NULL)
00796         goto error_out;
00797     /* We only support 256 codebooks or less (like 640k or 2GB, this
00798      * should be enough for anyone) */
00799     if (s->g->n_mgau > 256) {
00800         E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
00801         goto error_out;
00802     }
00803     if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
00804         E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
00805         goto error_out;
00806     }
00807     /* Verify n_feat and veclen, against acmod. */
00808     if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
00809         E_ERROR("Number of streams does not match: %d != %d\n",
00810                 s->g->n_feat, feat_dimension(acmod->fcb));
00811         goto error_out;
00812     }
00813     for (i = 0; i < s->g->n_feat; ++i) {
00814         if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
00815             E_ERROR("Dimension of stream %d does not match: %d != %d\n",
00816                     s->g->featlen[i], feat_dimension2(acmod->fcb, i));
00817             goto error_out;
00818         }
00819     }
00820     /* Read mixture weights. */
00821     if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
00822         if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
00823             goto error_out;
00824         }
00825     }
00826     else {
00827         if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
00828                       cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
00829             goto error_out;
00830         }
00831     }
00832     s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
00833     s->max_topn = cmd_ln_int32_r(s->config, "-topn");
00834     E_INFO("Maximum top-N: %d\n", s->max_topn);
00835 
00836     /* Assume mapping of senones to their base phones, though this
00837      * will become more flexible in the future. */
00838     s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
00839     for (i = 0; i < s->n_sen; ++i)
00840         s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
00841 
00842     /* Allocate fast-match history buffers.  We need enough for the
00843      * phoneme lookahead window, plus the current frame, plus one for
00844      * good measure? (FIXME: I don't remember why) */
00845     s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
00846     s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
00847     /* s->f will be a rotating pointer into s->hist. */
00848     s->f = s->hist;
00849     for (i = 0; i < s->n_fast_hist; ++i) {
00850         int j, k, m;
00851         /* Top-N codewords for every codebook and feature. */
00852         s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
00853                                         s->max_topn, sizeof(ptm_topn_t));
00854         /* Initialize them to sane (yet arbitrary) defaults. */
00855         for (j = 0; j < s->g->n_mgau; ++j) {
00856             for (k = 0; k < s->g->n_feat; ++k) {
00857                 for (m = 0; m < s->max_topn; ++m) {
00858                     s->hist[i].topn[j][k][m].cw = m;
00859                     s->hist[i].topn[j][k][m].score = WORST_DIST;
00860                 }
00861             }
00862         }
00863         /* Active codebook mapping (just codebook, not features,
00864            at least not yet) */
00865         s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
00866         /* Start with them all on, prune them later. */
00867         bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
00868     }
00869 
00870     ps = (ps_mgau_t *)s;
00871     ps->vt = &ptm_mgau_funcs;
00872     return ps;
00873 error_out:
00874     ptm_mgau_free(ps_mgau_base(s));
00875     return NULL;
00876 }
00877 
00878 int
00879 ptm_mgau_mllr_transform(ps_mgau_t *ps,
00880                             ps_mllr_t *mllr)
00881 {
00882     ptm_mgau_t *s = (ptm_mgau_t *)ps;
00883     return gauden_mllr_transform(s->g, mllr, s->config);
00884 }
00885 
00886 void
00887 ptm_mgau_free(ps_mgau_t *ps)
00888 {
00889     ptm_mgau_t *s = (ptm_mgau_t *)ps;
00890 
00891     logmath_free(s->lmath);
00892     logmath_free(s->lmath_8b);
00893     if (s->sendump_mmap) {
00894         ckd_free_2d(s->mixw); 
00895         mmio_file_unmap(s->sendump_mmap);
00896     }
00897     else {
00898         ckd_free_3d(s->mixw);
00899     }
00900     ckd_free(s->sen2cb);
00901     gauden_free(s->g);
00902     ckd_free(s);
00903 }