• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ms_mgau.c

00001 /* ====================================================================
00002  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  *
00035  */
00036 /*
00037  * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
00038  * senone. It supports multi-stream. 
00039  *
00040  *
00041  * **********************************************
00042  * CMU ARPA Speech Project
00043  *
00044  * Copyright (c) 1997 Carnegie Mellon University.
00045  * ALL RIGHTS RESERVED.
00046  * **********************************************
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.2  2006/02/22  16:56:01  arthchan2003
00050  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
00051  * 
00052  * Revision 1.1.2.4  2005/09/25 18:55:19  arthchan2003
00053  * Added a flag to turn on and off precomputation.
00054  *
00055  * Revision 1.1.2.3  2005/08/03 18:53:44  dhdfu
00056  * Add memory deallocation functions.  Also move all the initialization
00057  * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
00058  * from decode_anytopo and friends.
00059  *
00060  * Revision 1.1.2.2  2005/08/02 21:05:38  arthchan2003
00061  * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
00062  *
00063  * Revision 1.1.2.1  2005/07/20 19:37:09  arthchan2003
00064  * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone.  Add ms_mgau_init and model_set_mllr.  This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
00065  *
00066  *
00067  *
00068  */
00069 
00070 /* Local headers. */
00071 #include "ms_mgau.h"
00072 
00073 ms_mgau_model_t *
00074 ms_mgau_init(cmd_ln_t *config, logmath_t *lmath)
00075 {
00076     /* Codebooks */
00077     int32 i;
00078     ms_mgau_model_t *msg;
00079     gauden_t *g;
00080     senone_t *s;
00081     mgau2sen_t *m2s;
00082 
00083     msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
00084 
00085 
00086     msg->g = NULL;
00087     msg->s = NULL;
00088 
00089     msg->g = gauden_init(cmd_ln_str_r(config, "-mean"),
00090                          cmd_ln_str_r(config, "-var"),
00091                          cmd_ln_float32_r(config, "-varfloor"),
00092                          lmath);
00093     msg->s = senone_init(msg->g,
00094                          cmd_ln_str_r(config, "-mixw"), NULL,
00095                          cmd_ln_float32_r(config, "-mixwfloor"), lmath);
00096 
00097     g = ms_mgau_gauden(msg);
00098     s = ms_mgau_senone(msg);
00099 
00100     /* Verify senone parameters against gauden parameters */
00101     if (s->n_feat != g->n_feat)
00102         E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
00103                 s->n_feat);
00104     if (s->n_cw != g->n_density)
00105         E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
00106                 g->n_density, s->n_cw);
00107     if (s->n_gauden > g->n_mgau)
00108         E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
00109                 s->n_gauden, g->n_mgau);
00110     if (s->n_gauden < g->n_mgau)
00111         E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
00112                 s->n_gauden, g->n_mgau);
00113     /* Initialize mapping from mixture Gaussian to senones */
00114     msg->mgau2sen =
00115         (mgau2sen_t **) ckd_calloc(g->n_mgau, sizeof(mgau2sen_t *));
00116     for (i = 0; i < s->n_sen; i++) {
00117         m2s = (mgau2sen_t *) ckd_calloc(1, sizeof(mgau2sen_t));
00118         m2s->sen = i;
00119         m2s->next = msg->mgau2sen[s->mgau[i]];
00120         msg->mgau2sen[s->mgau[i]] = m2s;
00121     }
00122 
00123     msg->topn = cmd_ln_int32_r(config, "-topn");
00124     E_INFO("The value of topn: %d\n", msg->topn);
00125     if (msg->topn == 0 || msg->topn > msg->g->n_density) {
00126         E_WARN
00127             ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
00128              msg->topn, msg->g->n_density);
00129         msg->topn = msg->g->n_density;
00130     }
00131 
00132     msg->dist = (gauden_dist_t ***)
00133         ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
00134                       sizeof(gauden_dist_t));
00135     msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
00136 
00137     return msg;
00138 }
00139 
00140 void
00141 ms_mgau_free(ms_mgau_model_t * msg)
00142 {
00143     if (msg == NULL)
00144         return;
00145 
00146     gauden_free(msg->g);
00147     senone_free(msg->s);
00148     ckd_free_3d((void *) msg->dist);
00149     ckd_free(msg->mgau_active);
00150     ckd_free(msg);
00151 }
00152 
00153 int32
00154 ms_cont_mgau_frame_eval(ms_mgau_model_t * msg,
00155                         int16 *senscr,
00156                         int32 *senone_active,
00157                         int32 n_senone_active,
00158                         mfcc_t ** feat,
00159                         int32 frame,
00160                         int32 compallsen,
00161                         int32 *bestidx)
00162 {
00163     int32 gid;
00164     int32 i;
00165     int32 topn;
00166     int32 best;
00167     gauden_t *g;
00168     senone_t *sen;
00169 
00170     topn = ms_mgau_topn(msg);
00171     g = ms_mgau_gauden(msg);
00172     sen = ms_mgau_senone(msg);
00173 
00174     /*
00175      * Evaluate gaussian density codebooks and senone scores for input codeword.
00176      * Evaluate only active codebooks and senones. (ignore compallsen...)
00177      */
00178 
00179     /* Flag all active mixture-gaussian codebooks */
00180     for (gid = 0; gid < g->n_mgau; gid++)
00181         msg->mgau_active[gid] = 0;
00182 
00183     for (i = 0; i < n_senone_active; i++) {
00184         int32 s = senone_active[i];
00185         msg->mgau_active[sen->mgau[s]] = 1;
00186     }
00187 
00188     /* Compute topn gaussian density values (for active codebooks) */
00189     for (gid = 0; gid < g->n_mgau; gid++) {
00190         if (msg->mgau_active[gid])
00191             gauden_dist(g, gid, topn, feat, msg->dist[gid]);
00192     }
00193 
00194     best = (int32) 0x7fffffff;
00195     *bestidx = -1;
00196     for (i = 0; i < n_senone_active; i++) {
00197         int32 s = senone_active[i];
00198         senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
00199         if (best > senscr[s]) {
00200             best = senscr[s];
00201             *bestidx = s;
00202         }
00203     }
00204 
00205     /* Normalize senone scores */
00206     for (i = 0; i < n_senone_active; i++) {
00207         int32 s = senone_active[i];
00208         int32 bs = senscr[s] - best;
00209         if (bs > 32767)
00210           bs = 32767;
00211         if (bs < -32768)
00212           bs = -32768;
00213         senscr[s] = bs;
00214     }
00215 
00216     return best;
00217 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1