PocketSphinx  0.6
src/libpocketsphinx/s2_semi_mgau.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers */
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <string.h>
00042 #include <assert.h>
00043 #include <limits.h>
00044 #include <math.h>
00045 #if defined(__ADSPBLACKFIN__)
00046 #elif !defined(_WIN32_WCE)
00047 #include <sys/types.h>
00048 #endif
00049 
00050 #ifndef M_PI 
00051 #define M_PI 3.14159265358979323846 
00052 #endif
00053 
00054 /* SphinxBase headers */
00055 #include <sphinx_config.h>
00056 #include <sphinxbase/cmd_ln.h>
00057 #include <sphinxbase/fixpoint.h>
00058 #include <sphinxbase/ckd_alloc.h>
00059 #include <sphinxbase/bio.h>
00060 #include <sphinxbase/err.h>
00061 #include <sphinxbase/prim_type.h>
00062 
00063 /* Local headers */
00064 #include "s2_semi_mgau.h"
00065 #include "tied_mgau_common.h"
00066 #include "posixwin32.h"
00067 
00068 static ps_mgaufuncs_t s2_semi_mgau_funcs = {
00069     "s2_semi",
00070     &s2_semi_mgau_frame_eval,      /* frame_eval */
00071     &s2_semi_mgau_mllr_transform,  /* transform */
00072     &s2_semi_mgau_free             /* free */
00073 };
00074 
00075 struct vqFeature_s {
00076     int32 score; /* score or distance */
00077     int32 codeword; /* codeword (vector index) */
00078 };
00079 
00080 static void
00081 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00082 {
00083     int i, ceplen;
00084     vqFeature_t *topn;
00085 
00086     topn = s->f[feat];
00087     ceplen = s->veclen[feat];
00088 
00089     for (i = 0; i < s->max_topn; i++) {
00090         mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
00091         vqFeature_t vtmp;
00092         mfcc_t *var, d;
00093         mfcc_t *obs;
00094         int32 cw, j;
00095 
00096         cw = topn[i].codeword;
00097         mean = s->means[feat][0] + cw * ceplen;
00098         var = s->vars[feat][0] + cw * ceplen;
00099         d = s->dets[feat][cw];
00100         obs = z;
00101         for (j = 0; j < ceplen; j++) {
00102             diff = *obs++ - *mean++;
00103             sqdiff = MFCCMUL(diff, diff);
00104             compl = MFCCMUL(sqdiff, *var);
00105             d = GMMSUB(d, compl);
00106             ++var;
00107         }
00108         topn[i].score = (int32)d;
00109         if (i == 0)
00110             continue;
00111         vtmp = topn[i];
00112         for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
00113             topn[j + 1] = topn[j];
00114         }
00115         topn[j + 1] = vtmp;
00116     }
00117 }
00118 
00119 static void
00120 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00121 {
00122     vqFeature_t *worst, *best, *topn;
00123     mfcc_t *mean;
00124     mfcc_t *var, *det, *detP, *detE;
00125     int32 i, ceplen;
00126 
00127     best = topn = s->f[feat];
00128     worst = topn + (s->max_topn - 1);
00129     mean = s->means[feat][0];
00130     var = s->vars[feat][0];
00131     det = s->dets[feat];
00132     detE = det + s->n_density;
00133     ceplen = s->veclen[feat];
00134 
00135     for (detP = det; detP < detE; ++detP) {
00136         mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */
00137         mfcc_t d;
00138         mfcc_t *obs;
00139         vqFeature_t *cur;
00140         int32 cw, j;
00141 
00142         d = *detP;
00143         obs = z;
00144         cw = detP - det;
00145         for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
00146             diff = *obs++ - *mean++;
00147             sqdiff = MFCCMUL(diff, diff);
00148             compl = MFCCMUL(sqdiff, *var);
00149             d = GMMSUB(d, compl);
00150             ++var;
00151         }
00152         if (j < ceplen) {
00153             /* terminated early, so not in topn */
00154             mean += (ceplen - j);
00155             var += (ceplen - j);
00156             continue;
00157         }
00158         if ((int32)d < worst->score)
00159             continue;
00160         for (i = 0; i < s->max_topn; i++) {
00161             /* already there, so don't need to insert */
00162             if (topn[i].codeword == cw)
00163                 break;
00164         }
00165         if (i < s->max_topn)
00166             continue;       /* already there.  Don't insert */
00167         /* remaining code inserts codeword and dist in correct spot */
00168         for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00169             memcpy(cur + 1, cur, sizeof(vqFeature_t));
00170         ++cur;
00171         cur->codeword = cw;
00172         cur->score = (int32)d;
00173     }
00174 }
00175 
00176 static void
00177 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
00178 {
00179     eval_topn(s, feat, z);
00180 
00181     /* If this frame is skipped, do nothing else. */
00182     if (frame % s->ds_ratio)
00183         return;
00184 
00185     /* Evaluate the rest of the codebook (or subset thereof). */
00186     eval_cb(s, feat, z);
00187 }
00188 
00189 static int
00190 mgau_norm(s2_semi_mgau_t *s, int feat)
00191 {
00192     int32 norm;
00193     int j;
00194 
00195     /* Compute quantized normalizing constant. */
00196     norm = s->f[feat][0].score >> SENSCR_SHIFT;
00197 
00198     /* Normalize the scores, negate them, and clamp their dynamic range. */
00199     for (j = 0; j < s->max_topn; ++j) {
00200         s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
00201         if (s->f[feat][j].score > MAX_NEG_ASCR)
00202             s->f[feat][j].score = MAX_NEG_ASCR;
00203         if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
00204             break;
00205     }
00206     return j;
00207 }
00208 
00209 static int32
00210 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
00211                      int16 *senone_scores, uint8 *senone_active,
00212                      int32 n_senone_active)
00213 {
00214     int32 j, l;
00215     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00216 
00217     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00218     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00219     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00220     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00221     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00222     pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00223 
00224     for (l = j = 0; j < n_senone_active; j++) {
00225         int sen = senone_active[j] + l;
00226         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00227 
00228         tmp = fast_logmath_add(s->lmath_8b, tmp,
00229                                pid_cw1[sen] + s->f[i][1].score);
00230         tmp = fast_logmath_add(s->lmath_8b, tmp,
00231                                pid_cw2[sen] + s->f[i][2].score);
00232         tmp = fast_logmath_add(s->lmath_8b, tmp,
00233                                pid_cw3[sen] + s->f[i][3].score);
00234         tmp = fast_logmath_add(s->lmath_8b, tmp,
00235                                pid_cw4[sen] + s->f[i][4].score);
00236         tmp = fast_logmath_add(s->lmath_8b, tmp,
00237                                pid_cw5[sen] + s->f[i][5].score);
00238 
00239         senone_scores[sen] += tmp;
00240         l = sen;
00241     }
00242     return 0;
00243 }
00244 
00245 static int32
00246 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
00247                      int16 *senone_scores, uint8 *senone_active,
00248                      int32 n_senone_active)
00249 {
00250     int32 j, l;
00251     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00252 
00253     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00254     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00255     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00256     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00257     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00258 
00259     for (l = j = 0; j < n_senone_active; j++) {
00260         int sen = senone_active[j] + l;
00261         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00262 
00263         tmp = fast_logmath_add(s->lmath_8b, tmp,
00264                                pid_cw1[sen] + s->f[i][1].score);
00265         tmp = fast_logmath_add(s->lmath_8b, tmp,
00266                                pid_cw2[sen] + s->f[i][2].score);
00267         tmp = fast_logmath_add(s->lmath_8b, tmp,
00268                                pid_cw3[sen] + s->f[i][3].score);
00269         tmp = fast_logmath_add(s->lmath_8b, tmp,
00270                                pid_cw4[sen] + s->f[i][4].score);
00271 
00272         senone_scores[sen] += tmp;
00273         l = sen;
00274     }
00275     return 0;
00276 }
00277 
00278 static int32
00279 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
00280                      int16 *senone_scores, uint8 *senone_active,
00281                      int32 n_senone_active)
00282 {
00283     int32 j, l;
00284     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00285 
00286     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00287     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00288     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00289     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00290 
00291     for (l = j = 0; j < n_senone_active; j++) {
00292         int sen = senone_active[j] + l;
00293         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00294 
00295         tmp = fast_logmath_add(s->lmath_8b, tmp,
00296                                pid_cw1[sen] + s->f[i][1].score);
00297         tmp = fast_logmath_add(s->lmath_8b, tmp,
00298                                pid_cw2[sen] + s->f[i][2].score);
00299         tmp = fast_logmath_add(s->lmath_8b, tmp,
00300                                pid_cw3[sen] + s->f[i][3].score);
00301 
00302         senone_scores[sen] += tmp;
00303         l = sen;
00304     }
00305     return 0;
00306 }
00307 
00308 static int32
00309 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
00310                      int16 *senone_scores, uint8 *senone_active,
00311                      int32 n_senone_active)
00312 {
00313     int32 j, l;
00314     uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00315 
00316     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00317     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00318     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00319 
00320     for (l = j = 0; j < n_senone_active; j++) {
00321         int sen = senone_active[j] + l;
00322         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00323 
00324         tmp = fast_logmath_add(s->lmath_8b, tmp,
00325                                pid_cw1[sen] + s->f[i][1].score);
00326         tmp = fast_logmath_add(s->lmath_8b, tmp,
00327                                pid_cw2[sen] + s->f[i][2].score);
00328 
00329         senone_scores[sen] += tmp;
00330         l = sen;
00331     }
00332     return 0;
00333 }
00334 
00335 static int32
00336 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
00337                      int16 *senone_scores, uint8 *senone_active,
00338                      int32 n_senone_active)
00339 {
00340     int32 j, l;
00341     uint8 *pid_cw0, *pid_cw1;
00342 
00343     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00344     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00345 
00346     for (l = j = 0; j < n_senone_active; j++) {
00347         int sen = senone_active[j] + l;
00348         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00349 
00350         tmp = fast_logmath_add(s->lmath_8b, tmp,
00351                                pid_cw1[sen] + s->f[i][1].score);
00352 
00353         senone_scores[sen] += tmp;
00354         l = sen;
00355     }
00356     return 0;
00357 }
00358 
00359 static int32
00360 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
00361                      int16 *senone_scores, uint8 *senone_active,
00362                      int32 n_senone_active)
00363 {
00364     int32 j, l;
00365     uint8 *pid_cw0;
00366 
00367     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00368     for (l = j = 0; j < n_senone_active; j++) {
00369         int sen = senone_active[j] + l;
00370         int32 tmp = pid_cw0[sen] + s->f[i][0].score;
00371         senone_scores[sen] += tmp;
00372         l = sen;
00373     }
00374     return 0;
00375 }
00376 
00377 static int32
00378 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00379                        int16 *senone_scores, uint8 *senone_active,
00380                        int32 n_senone_active)
00381 {
00382     int32 j, k, l;
00383 
00384     for (l = j = 0; j < n_senone_active; j++) {
00385         int sen = senone_active[j] + l;
00386         uint8 *pid_cw;
00387         int32 tmp;
00388         pid_cw = s->mixw[i][s->f[i][0].codeword];
00389         tmp = pid_cw[sen] + s->f[i][0].score;
00390         for (k = 1; k < topn; ++k) {
00391             pid_cw = s->mixw[i][s->f[i][k].codeword];
00392             tmp = fast_logmath_add(s->lmath_8b, tmp,
00393                                    pid_cw[sen] + s->f[i][k].score);
00394         }
00395         senone_scores[sen] += tmp;
00396         l = sen;
00397     }
00398     return 0;
00399 }
00400 
00401 static int32
00402 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
00403                    int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00404 {
00405     switch (topn) {
00406     case 6:
00407         return get_scores_8b_feat_6(s, i, senone_scores,
00408                                     senone_active, n_senone_active);
00409     case 5:
00410         return get_scores_8b_feat_5(s, i, senone_scores,
00411                                     senone_active, n_senone_active);
00412     case 4:
00413         return get_scores_8b_feat_4(s, i, senone_scores,
00414                                     senone_active, n_senone_active);
00415     case 3:
00416         return get_scores_8b_feat_3(s, i, senone_scores,
00417                                     senone_active, n_senone_active);
00418     case 2:
00419         return get_scores_8b_feat_2(s, i, senone_scores,
00420                                     senone_active, n_senone_active);
00421     case 1:
00422         return get_scores_8b_feat_1(s, i, senone_scores,
00423                                     senone_active, n_senone_active);
00424     default:
00425         return get_scores_8b_feat_any(s, i, topn, senone_scores,
00426                                       senone_active, n_senone_active);
00427     }
00428 }
00429 
00430 static int32
00431 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00432 {
00433     int32 j, k;
00434 
00435     for (j = 0; j < s->n_sen; j++) {
00436         uint8 *pid_cw;
00437         int32 tmp;
00438         pid_cw = s->mixw[i][s->f[i][0].codeword];
00439         tmp = pid_cw[j] + s->f[i][0].score;
00440         for (k = 1; k < topn; ++k) {
00441             pid_cw = s->mixw[i][s->f[i][k].codeword];
00442             tmp = fast_logmath_add(s->lmath_8b, tmp,
00443                                    pid_cw[j] + s->f[i][k].score);
00444         }
00445         senone_scores[j] += tmp;
00446     }
00447     return 0;
00448 }
00449 
00450 static int32
00451 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
00452                      int16 *senone_scores, uint8 *senone_active,
00453                      int32 n_senone_active)
00454 {
00455     int32 j, l;
00456     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
00457     uint8 w_den[6][16];
00458 
00459     /* Precompute scaled densities. */
00460     for (j = 0; j < 16; ++j) {
00461         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00462         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00463         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00464         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00465         w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00466         w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
00467     }
00468 
00469     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00470     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00471     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00472     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00473     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00474     pid_cw5 = s->mixw[i][s->f[i][5].codeword];
00475 
00476     for (l = j = 0; j < n_senone_active; j++) {
00477         int n = senone_active[j] + l;
00478         int tmp, cw;
00479 
00480         if (n & 1) {
00481             cw = pid_cw0[n/2] >> 4;
00482             tmp = w_den[0][cw];
00483             cw = pid_cw1[n/2] >> 4;
00484             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00485             cw = pid_cw2[n/2] >> 4;
00486             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00487             cw = pid_cw3[n/2] >> 4;
00488             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00489             cw = pid_cw4[n/2] >> 4;
00490             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00491             cw = pid_cw5[n/2] >> 4;
00492             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00493         }
00494         else {
00495             cw = pid_cw0[n/2] & 0x0f;
00496             tmp = w_den[0][cw];
00497             cw = pid_cw1[n/2] & 0x0f;
00498             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00499             cw = pid_cw2[n/2] & 0x0f;
00500             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00501             cw = pid_cw3[n/2] & 0x0f;
00502             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00503             cw = pid_cw4[n/2] & 0x0f;
00504             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00505             cw = pid_cw5[n/2] & 0x0f;
00506             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
00507         }
00508         senone_scores[n] += tmp;
00509         l = n;
00510     }
00511     return 0;
00512 }
00513 
00514 static int32
00515 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
00516                      int16 *senone_scores, uint8 *senone_active,
00517                      int32 n_senone_active)
00518 {
00519     int32 j, l;
00520     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
00521     uint8 w_den[5][16];
00522 
00523     /* Precompute scaled densities. */
00524     for (j = 0; j < 16; ++j) {
00525         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00526         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00527         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00528         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00529         w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
00530     }
00531 
00532     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00533     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00534     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00535     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00536     pid_cw4 = s->mixw[i][s->f[i][4].codeword];
00537 
00538     for (l = j = 0; j < n_senone_active; j++) {
00539         int n = senone_active[j] + l;
00540         int tmp, cw;
00541 
00542         if (n & 1) {
00543             cw = pid_cw0[n/2] >> 4;
00544             tmp = w_den[0][cw];
00545             cw = pid_cw1[n/2] >> 4;
00546             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00547             cw = pid_cw2[n/2] >> 4;
00548             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00549             cw = pid_cw3[n/2] >> 4;
00550             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00551             cw = pid_cw4[n/2] >> 4;
00552             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00553         }
00554         else {
00555             cw = pid_cw0[n/2] & 0x0f;
00556             tmp = w_den[0][cw];
00557             cw = pid_cw1[n/2] & 0x0f;
00558             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00559             cw = pid_cw2[n/2] & 0x0f;
00560             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00561             cw = pid_cw3[n/2] & 0x0f;
00562             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00563             cw = pid_cw4[n/2] & 0x0f;
00564             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
00565         }
00566         senone_scores[n] += tmp;
00567         l = n;
00568     }
00569     return 0;
00570 }
00571 
00572 static int32
00573 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
00574                      int16 *senone_scores, uint8 *senone_active,
00575                      int32 n_senone_active)
00576 {
00577     int32 j, l;
00578     uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00579     uint8 w_den[4][16];
00580 
00581     /* Precompute scaled densities. */
00582     for (j = 0; j < 16; ++j) {
00583         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00584         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00585         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00586         w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
00587     }
00588 
00589     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00590     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00591     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00592     pid_cw3 = s->mixw[i][s->f[i][3].codeword];
00593 
00594     for (l = j = 0; j < n_senone_active; j++) {
00595         int n = senone_active[j] + l;
00596         int tmp, cw;
00597 
00598         if (n & 1) {
00599             cw = pid_cw0[n/2] >> 4;
00600             tmp = w_den[0][cw];
00601             cw = pid_cw1[n/2] >> 4;
00602             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00603             cw = pid_cw2[n/2] >> 4;
00604             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00605             cw = pid_cw3[n/2] >> 4;
00606             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00607         }
00608         else {
00609             cw = pid_cw0[n/2] & 0x0f;
00610             tmp = w_den[0][cw];
00611             cw = pid_cw1[n/2] & 0x0f;
00612             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00613             cw = pid_cw2[n/2] & 0x0f;
00614             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00615             cw = pid_cw3[n/2] & 0x0f;
00616             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
00617         }
00618         senone_scores[n] += tmp;
00619         l = n;
00620     }
00621     return 0;
00622 }
00623 
00624 static int32
00625 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
00626                      int16 *senone_scores, uint8 *senone_active,
00627                      int32 n_senone_active)
00628 {
00629     int32 j, l;
00630     uint8 *pid_cw0, *pid_cw1, *pid_cw2;
00631     uint8 w_den[3][16];
00632 
00633     /* Precompute scaled densities. */
00634     for (j = 0; j < 16; ++j) {
00635         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00636         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00637         w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
00638     }
00639 
00640     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00641     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00642     pid_cw2 = s->mixw[i][s->f[i][2].codeword];
00643 
00644     for (l = j = 0; j < n_senone_active; j++) {
00645         int n = senone_active[j] + l;
00646         int tmp, cw;
00647 
00648         if (n & 1) {
00649             cw = pid_cw0[n/2] >> 4;
00650             tmp = w_den[0][cw];
00651             cw = pid_cw1[n/2] >> 4;
00652             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00653             cw = pid_cw2[n/2] >> 4;
00654             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00655         }
00656         else {
00657             cw = pid_cw0[n/2] & 0x0f;
00658             tmp = w_den[0][cw];
00659             cw = pid_cw1[n/2] & 0x0f;
00660             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00661             cw = pid_cw2[n/2] & 0x0f;
00662             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
00663         }
00664         senone_scores[n] += tmp;
00665         l = n;
00666     }
00667     return 0;
00668 }
00669 
00670 static int32
00671 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
00672                      int16 *senone_scores, uint8 *senone_active,
00673                      int32 n_senone_active)
00674 {
00675     int32 j, l;
00676     uint8 *pid_cw0, *pid_cw1;
00677     uint8 w_den[2][16];
00678 
00679     /* Precompute scaled densities. */
00680     for (j = 0; j < 16; ++j) {
00681         w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
00682         w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
00683     }
00684 
00685     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00686     pid_cw1 = s->mixw[i][s->f[i][1].codeword];
00687 
00688     for (l = j = 0; j < n_senone_active; j++) {
00689         int n = senone_active[j] + l;
00690         int tmp, cw;
00691 
00692         if (n & 1) {
00693             cw = pid_cw0[n/2] >> 4;
00694             tmp = w_den[0][cw];
00695             cw = pid_cw1[n/2] >> 4;
00696             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00697         }
00698         else {
00699             cw = pid_cw0[n/2] & 0x0f;
00700             tmp = w_den[0][cw];
00701             cw = pid_cw1[n/2] & 0x0f;
00702             tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
00703         }
00704         senone_scores[n] += tmp;
00705         l = n;
00706     }
00707     return 0;
00708 }
00709 
00710 static int32
00711 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
00712                      int16 *senone_scores, uint8 *senone_active,
00713                      int32 n_senone_active)
00714 {
00715     int32 j, l;
00716     uint8 *pid_cw0;
00717     uint8 w_den[16];
00718 
00719     /* Precompute scaled densities. */
00720     for (j = 0; j < 16; ++j) {
00721         w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
00722     }
00723 
00724     pid_cw0 = s->mixw[i][s->f[i][0].codeword];
00725 
00726     for (l = j = 0; j < n_senone_active; j++) {
00727         int n = senone_active[j] + l;
00728         int tmp, cw;
00729 
00730         if (n & 1) {
00731             cw = pid_cw0[n/2] >> 4;
00732             tmp = w_den[cw];
00733         }
00734         else {
00735             cw = pid_cw0[n/2] & 0x0f;
00736             tmp = w_den[cw];
00737         }
00738         senone_scores[n] += tmp;
00739         l = n;
00740     }
00741     return 0;
00742 }
00743 
00744 static int32
00745 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
00746                        int16 *senone_scores, uint8 *senone_active,
00747                        int32 n_senone_active)
00748 {
00749     int32 j, k, l;
00750 
00751     for (l = j = 0; j < n_senone_active; j++) {
00752         int n = senone_active[j] + l;
00753         int tmp, cw;
00754         uint8 *pid_cw;
00755     
00756         pid_cw = s->mixw[i][s->f[i][0].codeword];
00757         if (n & 1)
00758             cw = pid_cw[n/2] >> 4;
00759         else
00760             cw = pid_cw[n/2] & 0x0f;
00761         tmp = s->mixw_cb[cw] + s->f[i][0].score;
00762         for (k = 1; k < topn; ++k) {
00763             pid_cw = s->mixw[i][s->f[i][k].codeword];
00764             if (n & 1)
00765                 cw = pid_cw[n/2] >> 4;
00766             else
00767                 cw = pid_cw[n/2] & 0x0f;
00768             tmp = fast_logmath_add(s->lmath_8b, tmp,
00769                                    s->mixw_cb[cw] + s->f[i][k].score);
00770         }
00771         senone_scores[n] += tmp;
00772         l = n;
00773     }
00774     return 0;
00775 }
00776 
00777 static int32
00778 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
00779                    int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
00780 {
00781     switch (topn) {
00782     case 6:
00783         return get_scores_4b_feat_6(s, i, senone_scores,
00784                                     senone_active, n_senone_active);
00785     case 5:
00786         return get_scores_4b_feat_5(s, i, senone_scores,
00787                                     senone_active, n_senone_active);
00788     case 4:
00789         return get_scores_4b_feat_4(s, i, senone_scores,
00790                                     senone_active, n_senone_active);
00791     case 3:
00792         return get_scores_4b_feat_3(s, i, senone_scores,
00793                                     senone_active, n_senone_active);
00794     case 2:
00795         return get_scores_4b_feat_2(s, i, senone_scores,
00796                                     senone_active, n_senone_active);
00797     case 1:
00798         return get_scores_4b_feat_1(s, i, senone_scores,
00799                                     senone_active, n_senone_active);
00800     default:
00801         return get_scores_4b_feat_any(s, i, topn, senone_scores,
00802                                       senone_active, n_senone_active);
00803     }
00804 }
00805 
00806 static int32
00807 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
00808 {
00809     int j, last_sen;
00810 
00811     j = 0;
00812     /* Number of senones is always even, but don't overrun if it isn't. */
00813     last_sen = s->n_sen & ~1;
00814     while (j < last_sen) {
00815         uint8 *pid_cw;
00816         int32 tmp0, tmp1;
00817         int k;
00818 
00819         pid_cw = s->mixw[i][s->f[i][0].codeword];
00820         tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score;
00821         tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score;
00822         for (k = 1; k < topn; ++k) {
00823             int32 w_den0, w_den1;
00824 
00825             pid_cw = s->mixw[i][s->f[i][k].codeword];
00826             w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score;
00827             w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score;
00828             tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0);
00829             tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1);
00830         }
00831         senone_scores[j++] += tmp0;
00832         senone_scores[j++] += tmp1;
00833     }
00834     return 0;
00835 }
00836 
00837 /*
00838  * Compute senone scores for the active senones.
00839  */
00840 int32
00841 s2_semi_mgau_frame_eval(ps_mgau_t *ps,
00842                         int16 *senone_scores,
00843                         uint8 *senone_active,
00844                         int32 n_senone_active,
00845                         mfcc_t ** featbuf, int32 frame,
00846                         int32 compallsen)
00847 {
00848     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
00849     int i, topn_idx;
00850 
00851     memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00852     /* No bounds checking is done here, which just means you'll get
00853      * semi-random crap if you request a frame in the future or one
00854      * that's too far in the past. */
00855     topn_idx = frame % s->n_topn_hist;
00856     s->f = s->topn_hist[topn_idx];
00857     for (i = 0; i < s->n_feat; ++i) {
00858         /* For past frames this will already be computed. */
00859         if (frame >= ps_mgau_base(ps)->frame_idx) {
00860             vqFeature_t **lastf;
00861             if (topn_idx == 0)
00862                 lastf = s->topn_hist[s->n_topn_hist-1];
00863             else
00864                 lastf = s->topn_hist[topn_idx-1];
00865             memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
00866             mgau_dist(s, frame, i, featbuf[i]);
00867             s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
00868         }
00869         if (s->mixw_cb) {
00870             if (compallsen)
00871                 get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
00872             else
00873                 get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
00874                                    senone_active, n_senone_active);
00875         }
00876         else {
00877             if (compallsen)
00878                 get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
00879             else
00880                 get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
00881                                    senone_active, n_senone_active);
00882         }
00883     }
00884 
00885     return 0;
00886 }
00887 
00888 static int32
00889 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
00890 {
00891     FILE *fp;
00892     char line[1000];
00893     int32 i, n, r, c;
00894     int32 do_swap, do_mmap;
00895     size_t filesize, offset;
00896     int n_clust = 0;
00897     int n_feat = s->n_feat;
00898     int n_density = s->n_density;
00899     int n_sen = bin_mdef_n_sen(mdef);
00900     int n_bits = 8;
00901 
00902     s->n_sen = n_sen; /* FIXME: Should have been done earlier */
00903     do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
00904 
00905     if ((fp = fopen(file, "rb")) == NULL)
00906         return -1;
00907 
00908     E_INFO("Loading senones from dump file %s\n", file);
00909     /* Read title size, title */
00910     if (fread(&n, sizeof(int32), 1, fp) != 1) {
00911         E_ERROR_SYSTEM("Failed to read title size from %s", file);
00912         goto error_out;
00913     }
00914     /* This is extremely bogus */
00915     do_swap = 0;
00916     if (n < 1 || n > 999) {
00917         SWAP_INT32(&n);
00918         if (n < 1 || n > 999) {
00919             E_ERROR("Title length %x in dump file %s out of range\n", n, file);
00920             goto error_out;
00921         }
00922         do_swap = 1;
00923     }
00924     if (fread(line, sizeof(char), n, fp) != n) {
00925         E_ERROR_SYSTEM("Cannot read title");
00926         goto error_out;
00927     }
00928     if (line[n - 1] != '\0') {
00929         E_ERROR("Bad title in dump file\n");
00930         goto error_out;
00931     }
00932     E_INFO("%s\n", line);
00933 
00934     /* Read header size, header */
00935     if (fread(&n, sizeof(n), 1, fp) != 1) {
00936         E_ERROR_SYSTEM("Failed to read header size from %s", file);
00937         goto error_out;
00938     }
00939     if (do_swap) SWAP_INT32(&n);
00940     if (fread(line, sizeof(char), n, fp) != n) {
00941         E_ERROR_SYSTEM("Cannot read header");
00942         goto error_out;
00943     }
00944     if (line[n - 1] != '\0') {
00945         E_ERROR("Bad header in dump file\n");
00946         goto error_out;
00947     }
00948 
00949     /* Read other header strings until string length = 0 */
00950     for (;;) {
00951         if (fread(&n, sizeof(n), 1, fp) != 1) {
00952             E_ERROR_SYSTEM("Failed to read header string size from %s", file);
00953             goto error_out;
00954         }
00955         if (do_swap) SWAP_INT32(&n);
00956         if (n == 0)
00957             break;
00958         if (fread(line, sizeof(char), n, fp) != n) {
00959             E_ERROR_SYSTEM("Cannot read header");
00960             goto error_out;
00961         }
00962         /* Look for a cluster count, if present */
00963         if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
00964             n_feat = atoi(line + strlen("feature_count "));
00965         }
00966         if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
00967             n_density = atoi(line + strlen("mixture_count "));
00968         }
00969         if (!strncmp(line, "model_count ", strlen("model_count "))) {
00970             n_sen = atoi(line + strlen("model_count "));
00971         }
00972         if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
00973             n_clust = atoi(line + strlen("cluster_count "));
00974         }
00975         if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
00976             n_bits = atoi(line + strlen("cluster_bits "));
00977         }
00978     }
00979 
00980     /* Defaults for #rows, #columns in mixw array. */
00981     c = n_sen;
00982     r = n_density;
00983     if (n_clust == 0) {
00984         /* Older mixw files have them here, and they might be padded. */
00985         if (fread(&r, sizeof(r), 1, fp) != 1) {
00986             E_ERROR_SYSTEM("Cannot read #rows");
00987             goto error_out;
00988         }
00989         if (do_swap) SWAP_INT32(&r);
00990         if (fread(&c, sizeof(c), 1, fp) != 1) {
00991             E_ERROR_SYSTEM("Cannot read #columns");
00992             goto error_out;
00993         }
00994         if (do_swap) SWAP_INT32(&c);
00995         E_INFO("Rows: %d, Columns: %d\n", r, c);
00996     }
00997 
00998     if (n_feat != s->n_feat) {
00999         E_ERROR("Number of feature streams mismatch: %d != %d\n",
01000                 n_feat, s->n_feat);
01001         goto error_out;
01002     }
01003     if (n_density != s->n_density) {
01004         E_ERROR("Number of densities mismatch: %d != %d\n",
01005                 n_density, s->n_density);
01006         goto error_out;
01007     }
01008     if (n_sen != s->n_sen) {
01009         E_ERROR("Number of senones mismatch: %d != %d\n",
01010                 n_sen, s->n_sen);
01011         goto error_out;
01012     }
01013 
01014     if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
01015         E_ERROR("Cluster count must be 0, 15, or 16\n");
01016         goto error_out;
01017     }
01018     if (n_clust == 15)
01019         ++n_clust;
01020 
01021     if (!((n_bits == 8) || (n_bits == 4))) {
01022         E_ERROR("Cluster count must be 4 or 8\n");
01023         goto error_out;
01024     }
01025 
01026     if (do_mmap) {
01027             E_INFO("Using memory-mapped I/O for senones\n");
01028     }
01029     offset = ftell(fp);
01030     fseek(fp, 0, SEEK_END);
01031     filesize = ftell(fp);
01032     fseek(fp, offset, SEEK_SET);
01033 
01034     /* Allocate memory for pdfs (or memory map them) */
01035     if (do_mmap) {
01036         s->sendump_mmap = mmio_file_read(file);
01037         /* Get cluster codebook if any. */
01038         if (n_clust) {
01039             s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01040             offset += n_clust;
01041         }
01042     }
01043     else {
01044         /* Get cluster codebook if any. */
01045         if (n_clust) {
01046             s->mixw_cb = ckd_calloc(1, n_clust);
01047             if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
01048                 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
01049                 goto error_out;
01050             }
01051         }
01052     }
01053 
01054     /* Set up pointers, or read, or whatever */
01055     if (s->sendump_mmap) {
01056         s->mixw = ckd_calloc_2d(s->n_feat, n_density, sizeof(*s->mixw));
01057         for (n = 0; n < n_feat; n++) {
01058             int step = c;
01059             if (n_bits == 4)
01060                 step = (step + 1) / 2;
01061             for (i = 0; i < r; i++) {
01062                 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
01063                 offset += step;
01064             }
01065         }
01066     }
01067     else {
01068         s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
01069         /* Read pdf values and ids */
01070         for (n = 0; n < n_feat; n++) {
01071             int step = c;
01072             if (n_bits == 4)
01073                 step = (step + 1) / 2;
01074             for (i = 0; i < r; i++) {
01075                 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
01076                     != (size_t) step) {
01077                     E_ERROR("Failed to read %d bytes from sendump\n", step);
01078                     goto error_out;
01079                 }
01080             }
01081         }
01082     }
01083 
01084     fclose(fp);
01085     return 0;
01086 error_out:
01087     fclose(fp);
01088     return -1;
01089 }
01090 
01091 static int32
01092 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
01093 {
01094     char **argname, **argval;
01095     char eofchk;
01096     FILE *fp;
01097     int32 byteswap, chksum_present;
01098     uint32 chksum;
01099     float32 *pdf;
01100     int32 i, f, c, n;
01101     int32 n_sen;
01102     int32 n_feat;
01103     int32 n_comp;
01104     int32 n_err;
01105 
01106     E_INFO("Reading mixture weights file '%s'\n", file_name);
01107 
01108     if ((fp = fopen(file_name, "rb")) == NULL)
01109         E_FATAL("Failed to open mixture weights file '%s' for reading: %s\n", file_name, strerror(errno));
01110 
01111     /* Read header, including argument-value info and 32-bit byteorder magic */
01112     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
01113         E_FATAL("Failed to read header from file '%s'\n", file_name);
01114 
01115     /* Parse argument-value list */
01116     chksum_present = 0;
01117     for (i = 0; argname[i]; i++) {
01118         if (strcmp(argname[i], "version") == 0) {
01119             if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
01120                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
01121                        file_name, argval[i], MGAU_MIXW_VERSION);
01122         }
01123         else if (strcmp(argname[i], "chksum0") == 0) {
01124             chksum_present = 1; /* Ignore the associated value */
01125         }
01126     }
01127     bio_hdrarg_free(argname, argval);
01128     argname = argval = NULL;
01129 
01130     chksum = 0;
01131 
01132     /* Read #senones, #features, #codewords, arraysize */
01133     if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01134         || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
01135             1)
01136         || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
01137             1)
01138         || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
01139         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
01140     }
01141     if (n_feat != s->n_feat)
01142         E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01143     if (n != n_sen * n_feat * n_comp) {
01144         E_FATAL
01145             ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
01146              file_name, i, n_sen, n_feat, n_comp);
01147     }
01148 
01149     /* n_sen = number of mixture weights per codeword, which is
01150      * fixed at the number of senones since we have only one codebook.
01151      */
01152     s->n_sen = n_sen;
01153 
01154     /* Quantized mixture weight arrays. */
01155     s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw));
01156 
01157     /* Temporary structure to read in floats before conversion to (int32) logs3 */
01158     pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
01159 
01160     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
01161     n_err = 0;
01162     for (i = 0; i < n_sen; i++) {
01163         for (f = 0; f < n_feat; f++) {
01164             if (bio_fread((void *) pdf, sizeof(float32),
01165                           n_comp, fp, byteswap, &chksum) != n_comp) {
01166                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
01167             }
01168 
01169             /* Normalize and floor */
01170             if (vector_sum_norm(pdf, n_comp) <= 0.0)
01171                 n_err++;
01172             vector_floor(pdf, n_comp, SmoothMin);
01173             vector_sum_norm(pdf, n_comp);
01174 
01175             /* Convert to LOG, quantize, and transpose */
01176             for (c = 0; c < n_comp; c++) {
01177                 int32 qscr;
01178 
01179                 qscr = -logmath_log(s->lmath_8b, pdf[c]);
01180                 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
01181                     qscr = MAX_NEG_MIXW;
01182                 s->mixw[f][c][i] = qscr;
01183             }
01184         }
01185     }
01186     if (n_err > 0)
01187         E_WARN("Weight normalization failed for %d senones\n", n_err);
01188 
01189     ckd_free(pdf);
01190 
01191     if (chksum_present)
01192         bio_verify_chksum(fp, byteswap, chksum);
01193 
01194     if (fread(&eofchk, 1, 1, fp) == 1)
01195         E_FATAL("More data than expected in %s\n", file_name);
01196 
01197     fclose(fp);
01198 
01199     E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
01200     return n_sen;
01201 }
01202 
01203 
01204 static int
01205 split_topn(char const *str, uint8 *out, int nfeat)
01206 {
01207     char *topn_list = ckd_salloc(str);
01208     char *c, *cc;
01209     int i, maxn;
01210 
01211     c = topn_list;
01212     i = 0;
01213     maxn = 0;
01214     while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
01215         *cc = '\0';
01216         out[i] = atoi(c);
01217         if (out[i] > maxn) maxn = out[i];
01218         c = cc + 1;
01219         ++i;
01220     }
01221     if (i < nfeat && *c != '\0') {
01222         out[i] = atoi(c);
01223         if (out[i] > maxn) maxn = out[i];
01224         ++i;
01225     }
01226     while (i < nfeat)
01227         out[i++] = maxn;
01228 
01229     ckd_free(topn_list);
01230     return maxn;
01231 }
01232 
01233 
01234 ps_mgau_t *
01235 s2_semi_mgau_init(acmod_t *acmod)
01236 {
01237     s2_semi_mgau_t *s;
01238     ps_mgau_t *ps;
01239     char const *sendump_path;
01240     int i;
01241 
01242     s = ckd_calloc(1, sizeof(*s));
01243     s->config = acmod->config;
01244 
01245     s->lmath = logmath_retain(acmod->lmath);
01246     /* Log-add table. */
01247     s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
01248     if (s->lmath_8b == NULL)
01249         goto error_out;
01250     /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
01251     if (logmath_get_width(s->lmath_8b) != 1) {
01252         E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
01253                 logmath_get_base(s->lmath_8b));
01254         goto error_out;
01255     }
01256 
01257     /* Read means and variances. */
01258     if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
01259                             cmd_ln_str_r(s->config, "-var"),
01260                             cmd_ln_float32_r(s->config, "-varfloor"),
01261                             s->lmath)) == NULL)
01262         goto error_out;
01263     /* Currently only a single codebook is supported. */
01264     if (s->g->n_mgau != 1)
01265         goto error_out;
01266     /* FIXME: maintaining pointers for convenience for now */
01267     s->means = s->g->mean[0];
01268     s->vars = s->g->var[0];
01269     s->dets = s->g->det[0];
01270     s->veclen = s->g->featlen;    
01271     /* Verify n_feat and veclen, against acmod. */
01272     s->n_feat = s->g->n_feat;
01273     if (s->n_feat != feat_dimension1(acmod->fcb)) {
01274         E_ERROR("Number of streams does not match: %d != %d\n",
01275                 s->n_feat, feat_dimension(acmod->fcb));
01276         goto error_out;
01277     }
01278     for (i = 0; i < s->n_feat; ++i) {
01279         if (s->veclen[i] != feat_dimension2(acmod->fcb, i)) {
01280             E_ERROR("Dimension of stream %d does not match: %d != %d\n",
01281                     s->veclen[i], feat_dimension2(acmod->fcb, i));
01282             goto error_out;
01283         }
01284     }
01285     s->n_density = s->g->n_density;
01286     /* Read mixture weights */
01287     if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
01288         if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
01289             goto error_out;
01290         }
01291     }
01292     else {
01293         if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
01294                       cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
01295             goto error_out;
01296         }
01297     }
01298     s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
01299 
01300     /* Determine top-N for each feature */
01301     s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
01302     s->max_topn = cmd_ln_int32_r(s->config, "-topn");
01303     split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
01304     E_INFO("Maximum top-N: %d ", s->max_topn);
01305     E_INFOCONT("Top-N beams:");
01306     for (i = 0; i < s->n_feat; ++i) {
01307         E_INFOCONT(" %d", s->topn_beam[i]);
01308     }
01309     E_INFOCONT("\n");
01310 
01311     /* Top-N scores from recent frames */
01312     s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
01313     s->topn_hist = (vqFeature_t ***)
01314         ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
01315                       sizeof(***s->topn_hist));
01316     s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
01317                                    sizeof(**s->topn_hist_n));
01318     for (i = 0; i < s->n_topn_hist; ++i) {
01319         int j;
01320         for (j = 0; j < s->n_feat; ++j) {
01321             int k;
01322             for (k = 0; k < s->max_topn; ++k) {
01323                 s->topn_hist[i][j][k].score = WORST_DIST;
01324                 s->topn_hist[i][j][k].codeword = k;
01325             }
01326         }
01327     }
01328 
01329     ps = (ps_mgau_t *)s;
01330     ps->vt = &s2_semi_mgau_funcs;
01331     return ps;
01332 error_out:
01333     s2_semi_mgau_free(ps_mgau_base(s));
01334     return NULL;
01335 }
01336 
01337 int
01338 s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
01339                             ps_mllr_t *mllr)
01340 {
01341     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01342     return gauden_mllr_transform(s->g, mllr, s->config);
01343 }
01344 
01345 void
01346 s2_semi_mgau_free(ps_mgau_t *ps)
01347 {
01348     s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
01349 
01350     logmath_free(s->lmath);
01351     logmath_free(s->lmath_8b);
01352     if (s->sendump_mmap) {
01353         ckd_free_2d(s->mixw); 
01354         mmio_file_unmap(s->sendump_mmap);
01355     }
01356     else {
01357         ckd_free_3d(s->mixw);
01358     }
01359     gauden_free(s->g);
01360     ckd_free(s->topn_beam);
01361     ckd_free_2d(s->topn_hist_n);
01362     ckd_free_3d((void **)s->topn_hist);
01363     ckd_free(s);
01364 }