PocketSphinx  0.6
src/libpocketsphinx/tmat.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * tmat.c
00039  *
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1997 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log: tmat.c,v $
00049  * Revision 1.1.1.1  2006/05/23 18:45:01  dhuggins
00050  * re-importation
00051  *
00052  * Revision 1.4  2005/11/14 16:14:34  dhuggins
00053  * Use LOG() instead of logs3() for loading tmats, makes startup
00054  * ***much*** faster.
00055  *
00056  * Revision 1.3  2005/10/10 14:50:35  dhuggins
00057  * Deal properly with empty transition matrices.
00058  *
00059  * Revision 1.2  2005/09/30 15:01:23  dhuggins
00060  * More robust tmat reading - read the tmat in accordance with the fixed s2 topology
00061  *
00062  * Revision 1.1  2005/09/29 21:51:19  dhuggins
00063  * Add support for Sphinx3 tmat files.  Amazingly enough, it Just Works
00064  * (but it isn't terribly robust)
00065  *
00066  * Revision 1.6  2005/07/05 13:12:39  dhdfu
00067  * Add new arguments to logs3_init() in some tests, main_ep
00068  *
00069  * Revision 1.5  2005/06/21 19:23:35  arthchan2003
00070  * 1, Fixed doxygen documentation. 2, Added $ keyword.
00071  *
00072  * Revision 1.5  2005/05/03 04:09:09  archan
00073  * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore.  This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame.  The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century.  But well, after all, everything needs a start.  I will then really get the results from the search and see how it looks.
00074  *
00075  * Revision 1.4  2005/04/21 23:50:26  archan
00076  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00077  *
00078  * Revision 1.3  2005/03/30 01:22:47  archan
00079  * Fixed mistakes in last updates. Add
00080  *
00081  * 
00082  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
00083  *              Added tmat_free to free allocated memory 
00084  *
00085  * 29-Feb-2000  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00086  *              Added tmat_chk_1skip(), and made tmat_chk_uppertri() public.
00087  * 
00088  * 10-Dec-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00089  *              Made tmat_dump() public.
00090  * 
00091  * 11-Mar-97    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00092  *              Started based on original S3 implementation.
00093  */
00094 
00095 /* System headers. */
00096 #include <string.h>
00097 
00098 /* SphinxBase headers. */
00099 #include <sphinxbase/logmath.h>
00100 #include <sphinxbase/err.h>
00101 #include <sphinxbase/ckd_alloc.h>
00102 #include <sphinxbase/bio.h>
00103 
00104 /* Local headers. */
00105 #include "tmat.h"
00106 #include "hmm.h"
00107 #include "vector.h"
00108 
00109 #define TMAT_PARAM_VERSION              "1.0"
00110 
00111 
00116 static int32 tmat_chk_uppertri(tmat_t *tmat, logmath_t *lmath);
00117 
00118 
00125 static int32 tmat_chk_1skip(tmat_t *tmat, logmath_t *lmath);
00126 
00127 
00128 void
00129 tmat_dump(tmat_t * tmat, FILE * fp)
00130 {
00131     int32 i, src, dst;
00132 
00133     for (i = 0; i < tmat->n_tmat; i++) {
00134         fprintf(fp, "TMAT %d = %d x %d\n", i, tmat->n_state,
00135                 tmat->n_state + 1);
00136         for (src = 0; src < tmat->n_state; src++) {
00137             for (dst = 0; dst <= tmat->n_state; dst++)
00138                 fprintf(fp, " %12d", tmat->tp[i][src][dst]);
00139             fprintf(fp, "\n");
00140         }
00141         fprintf(fp, "\n");
00142     }
00143     fflush(fp);
00144 }
00145 
00146 
00147 /*
00148  * Check model tprob matrices that they conform to upper-triangular assumption;
00149  * i.e. no "backward" transitions allowed.
00150  */
00151 int32
00152 tmat_chk_uppertri(tmat_t * tmat, logmath_t *lmath)
00153 {
00154     int32 i, src, dst;
00155 
00156     /* Check that each tmat is upper-triangular */
00157     for (i = 0; i < tmat->n_tmat; i++) {
00158         for (dst = 0; dst < tmat->n_state; dst++)
00159             for (src = dst + 1; src < tmat->n_state; src++)
00160                 if (tmat->tp[i][src][dst] < 255) {
00161                     E_ERROR("tmat[%d][%d][%d] = %d\n",
00162                             i, src, dst, tmat->tp[i][src][dst]);
00163                     return -1;
00164                 }
00165     }
00166 
00167     return 0;
00168 }
00169 
00170 
00171 int32
00172 tmat_chk_1skip(tmat_t * tmat, logmath_t *lmath)
00173 {
00174     int32 i, src, dst;
00175 
00176     for (i = 0; i < tmat->n_tmat; i++) {
00177         for (src = 0; src < tmat->n_state; src++)
00178             for (dst = src + 3; dst <= tmat->n_state; dst++)
00179                 if (tmat->tp[i][src][dst] < 255) {
00180                     E_ERROR("tmat[%d][%d][%d] = %d\n",
00181                             i, src, dst, tmat->tp[i][src][dst]);
00182                     return -1;
00183                 }
00184     }
00185 
00186     return 0;
00187 }
00188 
00189 
00190 tmat_t *
00191 tmat_init(char const *file_name, logmath_t *lmath, float64 tpfloor, int32 breport)
00192 {
00193     char tmp;
00194     int32 n_src, n_dst, n_tmat;
00195     FILE *fp;
00196     int32 byteswap, chksum_present;
00197     uint32 chksum;
00198     float32 **tp;
00199     int32 i, j, k, tp_per_tmat;
00200     char **argname, **argval;
00201     tmat_t *t;
00202 
00203 
00204     if (breport) {
00205         E_INFO("Reading HMM transition probability matrices: %s\n",
00206                file_name);
00207     }
00208 
00209     t = (tmat_t *) ckd_calloc(1, sizeof(tmat_t));
00210 
00211     if ((fp = fopen(file_name, "rb")) == NULL)
00212         E_FATAL_SYSTEM("Failed to open transition file '%s' for reading", file_name);
00213 
00214     /* Read header, including argument-value info and 32-bit byteorder magic */
00215     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00216         E_FATAL("Failed to read header from file '%s'\n", file_name);
00217 
00218     /* Parse argument-value list */
00219     chksum_present = 0;
00220     for (i = 0; argname[i]; i++) {
00221         if (strcmp(argname[i], "version") == 0) {
00222             if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0)
00223                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00224                        file_name, argval[i], TMAT_PARAM_VERSION);
00225         }
00226         else if (strcmp(argname[i], "chksum0") == 0) {
00227             chksum_present = 1; /* Ignore the associated value */
00228         }
00229     }
00230     bio_hdrarg_free(argname, argval);
00231     argname = argval = NULL;
00232 
00233     chksum = 0;
00234 
00235     /* Read #tmat, #from-states, #to-states, arraysize */
00236     if ((bio_fread(&n_tmat, sizeof(int32), 1, fp, byteswap, &chksum)
00237          != 1)
00238         || (bio_fread(&n_src, sizeof(int32), 1, fp, byteswap, &chksum) !=
00239             1)
00240         || (bio_fread(&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) !=
00241             1)
00242         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00243         E_FATAL("Failed to read header from '%s'\n", file_name);
00244     }
00245     if (n_tmat >= MAX_INT16)
00246         E_FATAL("%s: Number of transition matrices (%d) exceeds limit (%d)\n", file_name,
00247                 n_tmat, MAX_INT16);
00248     t->n_tmat = n_tmat;
00249     
00250     if (n_dst != n_src + 1)
00251         E_FATAL("%s: Unsupported transition matrix. Number of source states (%d) != number of target states (%d)-1\n", file_name,
00252                 n_src, n_dst);
00253     t->n_state = n_src;
00254 
00255     if (i != t->n_tmat * n_src * n_dst) {
00256         E_FATAL
00257             ("%s: Invalid transitions. Number of coefficients (%d) doesn't match expected array dimension: %d x %d x %d\n",
00258              file_name, i, t->n_tmat, n_src, n_dst);
00259     }
00260 
00261     /* Allocate memory for tmat data */
00262     t->tp = ckd_calloc_3d(t->n_tmat, n_src, n_dst, sizeof(***t->tp));
00263 
00264     /* Temporary structure to read in the float data */
00265     tp = ckd_calloc_2d(n_src, n_dst, sizeof(**tp));
00266 
00267     /* Read transition matrices, normalize and floor them, and convert to log domain */
00268     tp_per_tmat = n_src * n_dst;
00269     for (i = 0; i < t->n_tmat; i++) {
00270         if (bio_fread(tp[0], sizeof(float32), tp_per_tmat, fp,
00271                       byteswap, &chksum) != tp_per_tmat) {
00272             E_FATAL("Failed to read transition matrix %d from '%s'\n", i, file_name);
00273         }
00274 
00275         /* Normalize and floor */
00276         for (j = 0; j < n_src; j++) {
00277             if (vector_sum_norm(tp[j], n_dst) == 0.0)
00278                 E_WARN("Normalization failed for transition matrix %d from state %d\n",
00279                        i, j);
00280             vector_nz_floor(tp[j], n_dst, tpfloor);
00281             vector_sum_norm(tp[j], n_dst);
00282 
00283             /* Convert to logs3. */
00284             for (k = 0; k < n_dst; k++) {
00285                 int ltp;
00286 #if 0 /* No, don't do this!  It will subtly break 3-state HMMs. */
00287                 /* For these ones, we floor them even if they are
00288                  * zero, otherwise HMM evaluation goes nuts. */
00289                 if (k >= j && k-j < 3 && tp[j][k] == 0.0f)
00290                     tp[j][k] = tpfloor;
00291 #endif
00292                 /* Log and quantize them. */
00293                 ltp = -logmath_log(lmath, tp[j][k]) >> SENSCR_SHIFT;
00294                 if (ltp > 255) ltp = 255;
00295                 t->tp[i][j][k] = (uint8)ltp;
00296             }
00297         }
00298     }
00299 
00300     ckd_free_2d(tp);
00301 
00302     if (chksum_present)
00303         bio_verify_chksum(fp, byteswap, chksum);
00304 
00305     if (fread(&tmp, 1, 1, fp) == 1)
00306         E_ERROR("Non-empty file beyond end of data\n");
00307 
00308     fclose(fp);
00309 
00310     if (tmat_chk_uppertri(t, lmath) < 0)
00311         E_FATAL("Tmat not upper triangular\n");
00312     if (tmat_chk_1skip(t, lmath) < 0)
00313         E_FATAL("Topology not Left-to-Right or Bakis\n");
00314 
00315     return t;
00316 }
00317 
00318 void
00319 tmat_report(tmat_t * t)
00320 {
00321     E_INFO_NOFN("Initialization of tmat_t, report:\n");
00322     E_INFO_NOFN("Read %d transition matrices of size %dx%d\n",
00323                 t->n_tmat, t->n_state, t->n_state + 1);
00324     E_INFO_NOFN("\n");
00325 
00326 }
00327 
00328 /* 
00329  *  RAH, Free memory allocated in tmat_init ()
00330  */
00331 void
00332 tmat_free(tmat_t * t)
00333 {
00334     if (t) {
00335         if (t->tp)
00336             ckd_free_3d(t->tp);
00337         ckd_free(t);
00338     }
00339 }