PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * tmat.c 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1997 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log: tmat.c,v $ 00049 * Revision 1.1.1.1 2006/05/23 18:45:01 dhuggins 00050 * re-importation 00051 * 00052 * Revision 1.4 2005/11/14 16:14:34 dhuggins 00053 * Use LOG() instead of logs3() for loading tmats, makes startup 00054 * ***much*** faster. 00055 * 00056 * Revision 1.3 2005/10/10 14:50:35 dhuggins 00057 * Deal properly with empty transition matrices. 00058 * 00059 * Revision 1.2 2005/09/30 15:01:23 dhuggins 00060 * More robust tmat reading - read the tmat in accordance with the fixed s2 topology 00061 * 00062 * Revision 1.1 2005/09/29 21:51:19 dhuggins 00063 * Add support for Sphinx3 tmat files. Amazingly enough, it Just Works 00064 * (but it isn't terribly robust) 00065 * 00066 * Revision 1.6 2005/07/05 13:12:39 dhdfu 00067 * Add new arguments to logs3_init() in some tests, main_ep 00068 * 00069 * Revision 1.5 2005/06/21 19:23:35 arthchan2003 00070 * 1, Fixed doxygen documentation. 2, Added $ keyword. 00071 * 00072 * Revision 1.5 2005/05/03 04:09:09 archan 00073 * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. 00074 * 00075 * Revision 1.4 2005/04/21 23:50:26 archan 00076 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00077 * 00078 * Revision 1.3 2005/03/30 01:22:47 archan 00079 * Fixed mistakes in last updates. Add 00080 * 00081 * 00082 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) 00083 * Added tmat_free to free allocated memory 00084 * 00085 * 29-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 00086 * Added tmat_chk_1skip(), and made tmat_chk_uppertri() public. 00087 * 00088 * 10-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 00089 * Made tmat_dump() public. 00090 * 00091 * 11-Mar-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 00092 * Started based on original S3 implementation. 00093 */ 00094 00095 /* System headers. */ 00096 #include <string.h> 00097 00098 /* SphinxBase headers. */ 00099 #include <sphinxbase/logmath.h> 00100 #include <sphinxbase/err.h> 00101 #include <sphinxbase/ckd_alloc.h> 00102 #include <sphinxbase/bio.h> 00103 00104 /* Local headers. */ 00105 #include "tmat.h" 00106 #include "hmm.h" 00107 #include "vector.h" 00108 00109 #define TMAT_PARAM_VERSION "1.0" 00110 00111 00116 static int32 tmat_chk_uppertri(tmat_t *tmat, logmath_t *lmath); 00117 00118 00125 static int32 tmat_chk_1skip(tmat_t *tmat, logmath_t *lmath); 00126 00127 00128 void 00129 tmat_dump(tmat_t * tmat, FILE * fp) 00130 { 00131 int32 i, src, dst; 00132 00133 for (i = 0; i < tmat->n_tmat; i++) { 00134 fprintf(fp, "TMAT %d = %d x %d\n", i, tmat->n_state, 00135 tmat->n_state + 1); 00136 for (src = 0; src < tmat->n_state; src++) { 00137 for (dst = 0; dst <= tmat->n_state; dst++) 00138 fprintf(fp, " %12d", tmat->tp[i][src][dst]); 00139 fprintf(fp, "\n"); 00140 } 00141 fprintf(fp, "\n"); 00142 } 00143 fflush(fp); 00144 } 00145 00146 00147 /* 00148 * Check model tprob matrices that they conform to upper-triangular assumption; 00149 * i.e. no "backward" transitions allowed. 00150 */ 00151 int32 00152 tmat_chk_uppertri(tmat_t * tmat, logmath_t *lmath) 00153 { 00154 int32 i, src, dst; 00155 00156 /* Check that each tmat is upper-triangular */ 00157 for (i = 0; i < tmat->n_tmat; i++) { 00158 for (dst = 0; dst < tmat->n_state; dst++) 00159 for (src = dst + 1; src < tmat->n_state; src++) 00160 if (tmat->tp[i][src][dst] < 255) { 00161 E_ERROR("tmat[%d][%d][%d] = %d\n", 00162 i, src, dst, tmat->tp[i][src][dst]); 00163 return -1; 00164 } 00165 } 00166 00167 return 0; 00168 } 00169 00170 00171 int32 00172 tmat_chk_1skip(tmat_t * tmat, logmath_t *lmath) 00173 { 00174 int32 i, src, dst; 00175 00176 for (i = 0; i < tmat->n_tmat; i++) { 00177 for (src = 0; src < tmat->n_state; src++) 00178 for (dst = src + 3; dst <= tmat->n_state; dst++) 00179 if (tmat->tp[i][src][dst] < 255) { 00180 E_ERROR("tmat[%d][%d][%d] = %d\n", 00181 i, src, dst, tmat->tp[i][src][dst]); 00182 return -1; 00183 } 00184 } 00185 00186 return 0; 00187 } 00188 00189 00190 tmat_t * 00191 tmat_init(char const *file_name, logmath_t *lmath, float64 tpfloor, int32 breport) 00192 { 00193 char tmp; 00194 int32 n_src, n_dst, n_tmat; 00195 FILE *fp; 00196 int32 byteswap, chksum_present; 00197 uint32 chksum; 00198 float32 **tp; 00199 int32 i, j, k, tp_per_tmat; 00200 char **argname, **argval; 00201 tmat_t *t; 00202 00203 00204 if (breport) { 00205 E_INFO("Reading HMM transition probability matrices: %s\n", 00206 file_name); 00207 } 00208 00209 t = (tmat_t *) ckd_calloc(1, sizeof(tmat_t)); 00210 00211 if ((fp = fopen(file_name, "rb")) == NULL) 00212 E_FATAL_SYSTEM("Failed to open transition file '%s' for reading", file_name); 00213 00214 /* Read header, including argument-value info and 32-bit byteorder magic */ 00215 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 00216 E_FATAL("Failed to read header from file '%s'\n", file_name); 00217 00218 /* Parse argument-value list */ 00219 chksum_present = 0; 00220 for (i = 0; argname[i]; i++) { 00221 if (strcmp(argname[i], "version") == 0) { 00222 if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0) 00223 E_WARN("Version mismatch(%s): %s, expecting %s\n", 00224 file_name, argval[i], TMAT_PARAM_VERSION); 00225 } 00226 else if (strcmp(argname[i], "chksum0") == 0) { 00227 chksum_present = 1; /* Ignore the associated value */ 00228 } 00229 } 00230 bio_hdrarg_free(argname, argval); 00231 argname = argval = NULL; 00232 00233 chksum = 0; 00234 00235 /* Read #tmat, #from-states, #to-states, arraysize */ 00236 if ((bio_fread(&n_tmat, sizeof(int32), 1, fp, byteswap, &chksum) 00237 != 1) 00238 || (bio_fread(&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != 00239 1) 00240 || (bio_fread(&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != 00241 1) 00242 || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 00243 E_FATAL("Failed to read header from '%s'\n", file_name); 00244 } 00245 if (n_tmat >= MAX_INT16) 00246 E_FATAL("%s: Number of transition matrices (%d) exceeds limit (%d)\n", file_name, 00247 n_tmat, MAX_INT16); 00248 t->n_tmat = n_tmat; 00249 00250 if (n_dst != n_src + 1) 00251 E_FATAL("%s: Unsupported transition matrix. Number of source states (%d) != number of target states (%d)-1\n", file_name, 00252 n_src, n_dst); 00253 t->n_state = n_src; 00254 00255 if (i != t->n_tmat * n_src * n_dst) { 00256 E_FATAL 00257 ("%s: Invalid transitions. Number of coefficients (%d) doesn't match expected array dimension: %d x %d x %d\n", 00258 file_name, i, t->n_tmat, n_src, n_dst); 00259 } 00260 00261 /* Allocate memory for tmat data */ 00262 t->tp = ckd_calloc_3d(t->n_tmat, n_src, n_dst, sizeof(***t->tp)); 00263 00264 /* Temporary structure to read in the float data */ 00265 tp = ckd_calloc_2d(n_src, n_dst, sizeof(**tp)); 00266 00267 /* Read transition matrices, normalize and floor them, and convert to log domain */ 00268 tp_per_tmat = n_src * n_dst; 00269 for (i = 0; i < t->n_tmat; i++) { 00270 if (bio_fread(tp[0], sizeof(float32), tp_per_tmat, fp, 00271 byteswap, &chksum) != tp_per_tmat) { 00272 E_FATAL("Failed to read transition matrix %d from '%s'\n", i, file_name); 00273 } 00274 00275 /* Normalize and floor */ 00276 for (j = 0; j < n_src; j++) { 00277 if (vector_sum_norm(tp[j], n_dst) == 0.0) 00278 E_WARN("Normalization failed for transition matrix %d from state %d\n", 00279 i, j); 00280 vector_nz_floor(tp[j], n_dst, tpfloor); 00281 vector_sum_norm(tp[j], n_dst); 00282 00283 /* Convert to logs3. */ 00284 for (k = 0; k < n_dst; k++) { 00285 int ltp; 00286 #if 0 /* No, don't do this! It will subtly break 3-state HMMs. */ 00287 /* For these ones, we floor them even if they are 00288 * zero, otherwise HMM evaluation goes nuts. */ 00289 if (k >= j && k-j < 3 && tp[j][k] == 0.0f) 00290 tp[j][k] = tpfloor; 00291 #endif 00292 /* Log and quantize them. */ 00293 ltp = -logmath_log(lmath, tp[j][k]) >> SENSCR_SHIFT; 00294 if (ltp > 255) ltp = 255; 00295 t->tp[i][j][k] = (uint8)ltp; 00296 } 00297 } 00298 } 00299 00300 ckd_free_2d(tp); 00301 00302 if (chksum_present) 00303 bio_verify_chksum(fp, byteswap, chksum); 00304 00305 if (fread(&tmp, 1, 1, fp) == 1) 00306 E_ERROR("Non-empty file beyond end of data\n"); 00307 00308 fclose(fp); 00309 00310 if (tmat_chk_uppertri(t, lmath) < 0) 00311 E_FATAL("Tmat not upper triangular\n"); 00312 if (tmat_chk_1skip(t, lmath) < 0) 00313 E_FATAL("Topology not Left-to-Right or Bakis\n"); 00314 00315 return t; 00316 } 00317 00318 void 00319 tmat_report(tmat_t * t) 00320 { 00321 E_INFO_NOFN("Initialization of tmat_t, report:\n"); 00322 E_INFO_NOFN("Read %d transition matrices of size %dx%d\n", 00323 t->n_tmat, t->n_state, t->n_state + 1); 00324 E_INFO_NOFN("\n"); 00325 00326 } 00327 00328 /* 00329 * RAH, Free memory allocated in tmat_init () 00330 */ 00331 void 00332 tmat_free(tmat_t * t) 00333 { 00334 if (t) { 00335 if (t->tp) 00336 ckd_free_3d(t->tp); 00337 ckd_free(t); 00338 } 00339 }