PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2005 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /********************************************************************* 00038 * 00039 * File: bin_mdef.c 00040 * 00041 * Description: 00042 * Binary format model definition files, with support for 00043 * heterogeneous topologies and variable-size N-phones 00044 * 00045 * Author: 00046 * David Huggins-Daines <dhuggins@cs.cmu.edu> 00047 *********************************************************************/ 00048 00049 /* System headers. */ 00050 #include <stdio.h> 00051 #include <string.h> 00052 #include <assert.h> 00053 00054 /* SphinxBase headers. */ 00055 #include <sphinxbase/prim_type.h> 00056 #include <sphinxbase/ckd_alloc.h> 00057 #include <sphinxbase/byteorder.h> 00058 #include <sphinxbase/case.h> 00059 #include <sphinxbase/err.h> 00060 00061 /* Local headers. */ 00062 #include "mdef.h" 00063 #include "bin_mdef.h" 00064 00065 bin_mdef_t * 00066 bin_mdef_read_text(cmd_ln_t *config, const char *filename) 00067 { 00068 bin_mdef_t *bmdef; 00069 mdef_t *mdef; 00070 int i, nodes, ci_idx, lc_idx, rc_idx; 00071 int nchars; 00072 00073 if ((mdef = mdef_init((char *) filename, TRUE)) == NULL) 00074 return NULL; 00075 00076 /* Enforce some limits. */ 00077 if (mdef->n_sen > BAD_SENID) { 00078 E_ERROR("Number of senones exceeds limit: %d > %d\n", 00079 mdef->n_sen, BAD_SENID); 00080 mdef_free(mdef); 00081 return NULL; 00082 } 00083 if (mdef->n_sseq > BAD_SSID) { 00084 E_ERROR("Number of senone sequences exceeds limit: %d > %d\n", 00085 mdef->n_sseq, BAD_SSID); 00086 mdef_free(mdef); 00087 return NULL; 00088 } 00089 00090 00091 bmdef = ckd_calloc(1, sizeof(*bmdef)); 00092 bmdef->refcnt = 1; 00093 00094 /* Easy stuff. The mdef.c code has done the heavy lifting for us. */ 00095 bmdef->n_ciphone = mdef->n_ciphone; 00096 bmdef->n_phone = mdef->n_phone; 00097 bmdef->n_emit_state = mdef->n_emit_state; 00098 bmdef->n_ci_sen = mdef->n_ci_sen; 00099 bmdef->n_sen = mdef->n_sen; 00100 bmdef->n_tmat = mdef->n_tmat; 00101 bmdef->n_sseq = mdef->n_sseq; 00102 bmdef->sseq = mdef->sseq; 00103 bmdef->cd2cisen = mdef->cd2cisen; 00104 bmdef->sen2cimap = mdef->sen2cimap; 00105 bmdef->n_ctx = 3; /* Triphones only. */ 00106 bmdef->sil = mdef->sil; 00107 mdef->sseq = NULL; /* We are taking over this one. */ 00108 mdef->cd2cisen = NULL; /* And this one. */ 00109 mdef->sen2cimap = NULL; /* And this one. */ 00110 00111 /* Get the phone names. If they are not sorted 00112 * ASCII-betically then we are in a world of hurt and 00113 * therefore will simply refuse to continue. */ 00114 bmdef->ciname = ckd_calloc(bmdef->n_ciphone, sizeof(*bmdef->ciname)); 00115 nchars = 0; 00116 for (i = 0; i < bmdef->n_ciphone; ++i) 00117 nchars += strlen(mdef->ciphone[i].name) + 1; 00118 bmdef->ciname[0] = ckd_calloc(nchars, 1); 00119 strcpy(bmdef->ciname[0], mdef->ciphone[0].name); 00120 for (i = 1; i < bmdef->n_ciphone; ++i) { 00121 bmdef->ciname[i] = 00122 bmdef->ciname[i - 1] + strlen(bmdef->ciname[i - 1]) + 1; 00123 strcpy(bmdef->ciname[i], mdef->ciphone[i].name); 00124 if (i > 0 && strcmp(bmdef->ciname[i - 1], bmdef->ciname[i]) > 0) { 00125 /* FIXME: there should be a solution to this, actually. */ 00126 E_ERROR("Phone names are not in sorted order, sorry."); 00127 bin_mdef_free(bmdef); 00128 return NULL; 00129 } 00130 } 00131 00132 /* Copy over phone information. */ 00133 bmdef->phone = ckd_calloc(bmdef->n_phone, sizeof(*bmdef->phone)); 00134 for (i = 0; i < mdef->n_phone; ++i) { 00135 bmdef->phone[i].ssid = mdef->phone[i].ssid; 00136 bmdef->phone[i].tmat = mdef->phone[i].tmat; 00137 if (i < bmdef->n_ciphone) { 00138 bmdef->phone[i].info.ci.filler = mdef->ciphone[i].filler; 00139 } 00140 else { 00141 bmdef->phone[i].info.cd.wpos = mdef->phone[i].wpos; 00142 bmdef->phone[i].info.cd.ctx[0] = mdef->phone[i].ci; 00143 bmdef->phone[i].info.cd.ctx[1] = mdef->phone[i].lc; 00144 bmdef->phone[i].info.cd.ctx[2] = mdef->phone[i].rc; 00145 } 00146 } 00147 00148 /* Walk the wpos_ci_lclist once to find the total number of 00149 * nodes and the starting locations for each level. */ 00150 nodes = lc_idx = ci_idx = rc_idx = 0; 00151 for (i = 0; i < N_WORD_POSN; ++i) { 00152 int j; 00153 for (j = 0; j < mdef->n_ciphone; ++j) { 00154 ph_lc_t *lc; 00155 00156 for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) { 00157 ph_rc_t *rc; 00158 for (rc = lc->rclist; rc; rc = rc->next) { 00159 ++nodes; /* RC node */ 00160 } 00161 ++nodes; /* LC node */ 00162 ++rc_idx; /* Start of RC nodes (after LC nodes) */ 00163 } 00164 ++nodes; /* CI node */ 00165 ++lc_idx; /* Start of LC nodes (after CI nodes) */ 00166 ++rc_idx; /* Start of RC nodes (after CI and LC nodes) */ 00167 } 00168 ++nodes; /* wpos node */ 00169 ++ci_idx; /* Start of CI nodes (after wpos nodes) */ 00170 ++lc_idx; /* Start of LC nodes (after CI nodes) */ 00171 ++rc_idx; /* STart of RC nodes (after wpos, CI, and LC nodes) */ 00172 } 00173 E_INFO("Allocating %d * %d bytes (%d KiB) for CD tree\n", 00174 nodes, sizeof(*bmdef->cd_tree), 00175 nodes * sizeof(*bmdef->cd_tree) / 1024); 00176 bmdef->n_cd_tree = nodes; 00177 bmdef->cd_tree = ckd_calloc(nodes, sizeof(*bmdef->cd_tree)); 00178 for (i = 0; i < N_WORD_POSN; ++i) { 00179 int j; 00180 00181 bmdef->cd_tree[i].ctx = i; 00182 bmdef->cd_tree[i].n_down = mdef->n_ciphone; 00183 bmdef->cd_tree[i].c.down = ci_idx; 00184 #if 0 00185 E_INFO("%d => %c (%d@%d)\n", 00186 i, (WPOS_NAME)[i], 00187 bmdef->cd_tree[i].n_down, bmdef->cd_tree[i].c.down); 00188 #endif 00189 00190 /* Now we can build the rest of the tree. */ 00191 for (j = 0; j < mdef->n_ciphone; ++j) { 00192 ph_lc_t *lc; 00193 00194 bmdef->cd_tree[ci_idx].ctx = j; 00195 bmdef->cd_tree[ci_idx].c.down = lc_idx; 00196 for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) { 00197 ph_rc_t *rc; 00198 00199 bmdef->cd_tree[lc_idx].ctx = lc->lc; 00200 bmdef->cd_tree[lc_idx].c.down = rc_idx; 00201 for (rc = lc->rclist; rc; rc = rc->next) { 00202 bmdef->cd_tree[rc_idx].ctx = rc->rc; 00203 bmdef->cd_tree[rc_idx].n_down = 0; 00204 bmdef->cd_tree[rc_idx].c.pid = rc->pid; 00205 #if 0 00206 E_INFO("%d => %s %s %s %c (%d@%d)\n", 00207 rc_idx, 00208 bmdef->ciname[j], 00209 bmdef->ciname[lc->lc], 00210 bmdef->ciname[rc->rc], 00211 (WPOS_NAME)[i], 00212 bmdef->cd_tree[rc_idx].n_down, 00213 bmdef->cd_tree[rc_idx].c.down); 00214 #endif 00215 00216 ++bmdef->cd_tree[lc_idx].n_down; 00217 ++rc_idx; 00218 } 00219 /* If there are no triphones here, 00220 * this is considered a leafnode, so 00221 * set the pid to -1. */ 00222 if (bmdef->cd_tree[lc_idx].n_down == 0) 00223 bmdef->cd_tree[lc_idx].c.pid = -1; 00224 #if 0 00225 E_INFO("%d => %s %s %c (%d@%d)\n", 00226 lc_idx, 00227 bmdef->ciname[j], 00228 bmdef->ciname[lc->lc], 00229 (WPOS_NAME)[i], 00230 bmdef->cd_tree[lc_idx].n_down, 00231 bmdef->cd_tree[lc_idx].c.down); 00232 #endif 00233 00234 ++bmdef->cd_tree[ci_idx].n_down; 00235 ++lc_idx; 00236 } 00237 00238 /* As above, so below. */ 00239 if (bmdef->cd_tree[ci_idx].n_down == 0) 00240 bmdef->cd_tree[ci_idx].c.pid = -1; 00241 #if 0 00242 E_INFO("%d => %d=%s (%d@%d)\n", 00243 ci_idx, j, bmdef->ciname[j], 00244 bmdef->cd_tree[ci_idx].n_down, 00245 bmdef->cd_tree[ci_idx].c.down); 00246 #endif 00247 00248 ++ci_idx; 00249 } 00250 } 00251 00252 mdef_free(mdef); 00253 00254 bmdef->alloc_mode = BIN_MDEF_FROM_TEXT; 00255 return bmdef; 00256 } 00257 00258 bin_mdef_t * 00259 bin_mdef_retain(bin_mdef_t *m) 00260 { 00261 ++m->refcnt; 00262 return m; 00263 } 00264 00265 int 00266 bin_mdef_free(bin_mdef_t * m) 00267 { 00268 if (m == NULL) 00269 return 0; 00270 if (--m->refcnt > 0) 00271 return m->refcnt; 00272 00273 switch (m->alloc_mode) { 00274 case BIN_MDEF_FROM_TEXT: 00275 ckd_free(m->ciname[0]); 00276 ckd_free(m->sseq[0]); 00277 ckd_free(m->phone); 00278 ckd_free(m->cd_tree); 00279 break; 00280 case BIN_MDEF_IN_MEMORY: 00281 ckd_free(m->ciname[0]); 00282 break; 00283 case BIN_MDEF_ON_DISK: 00284 break; 00285 } 00286 if (m->filemap) 00287 mmio_file_unmap(m->filemap); 00288 ckd_free(m->cd2cisen); 00289 ckd_free(m->sen2cimap); 00290 ckd_free(m->ciname); 00291 ckd_free(m->sseq); 00292 ckd_free(m); 00293 return 0; 00294 } 00295 00296 static const char format_desc[] = 00297 "BEGIN FILE FORMAT DESCRIPTION\n" 00298 "int32 n_ciphone; /**< Number of base (CI) phones */\n" 00299 "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n" 00300 "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n" 00301 "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n" 00302 "int32 n_sen; /**< Number of senones (CI+CD) */\n" 00303 "int32 n_tmat; /**< Number of transition matrices */\n" 00304 "int32 n_sseq; /**< Number of unique senone sequences */\n" 00305 "int32 n_ctx; /**< Number of phones of context */\n" 00306 "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n" 00307 "int32 sil; /**< CI phone ID for silence */\n" 00308 "char ciphones[][]; /**< CI phone strings (null-terminated) */\n" 00309 "char padding[]; /**< Padding to a 4-bytes boundary */\n" 00310 "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n" 00311 "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n" 00312 "int16 sseq[]; /**< Unique senone sequences */\n" 00313 "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n" 00314 "END FILE FORMAT DESCRIPTION\n"; 00315 00316 bin_mdef_t * 00317 bin_mdef_read(cmd_ln_t *config, const char *filename) 00318 { 00319 bin_mdef_t *m; 00320 FILE *fh; 00321 size_t tree_start; 00322 int32 val, i, swap, pos, end; 00323 int32 *sseq_size; 00324 int do_mmap; 00325 00326 /* Try to read it as text first. */ 00327 if ((m = bin_mdef_read_text(config, filename)) != NULL) 00328 return m; 00329 00330 E_INFO("Reading binary model definition: %s\n", filename); 00331 if ((fh = fopen(filename, "rb")) == NULL) 00332 return NULL; 00333 00334 if (fread(&val, 4, 1, fh) != 1) { 00335 fclose(fh); 00336 E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n", 00337 filename); 00338 return NULL; 00339 } 00340 swap = 0; 00341 if (val == BIN_MDEF_OTHER_ENDIAN) { 00342 swap = 1; 00343 E_INFO("Must byte-swap %s\n", filename); 00344 } 00345 if (fread(&val, 4, 1, fh) != 1) { 00346 fclose(fh); 00347 E_ERROR_SYSTEM("Failed to read version from %s\n", filename); 00348 return NULL; 00349 } 00350 if (swap) 00351 SWAP_INT32(&val); 00352 if (val > BIN_MDEF_FORMAT_VERSION) { 00353 E_ERROR("File format version %d for %s is newer than library\n", 00354 val, filename); 00355 fclose(fh); 00356 return NULL; 00357 } 00358 if (fread(&val, 4, 1, fh) != 1) { 00359 fclose(fh); 00360 E_ERROR_SYSTEM("Failed to read header length from %s\n", filename); 00361 return NULL; 00362 } 00363 if (swap) 00364 SWAP_INT32(&val); 00365 /* Skip format descriptor. */ 00366 fseek(fh, val, SEEK_CUR); 00367 00368 /* Finally allocate it. */ 00369 m = ckd_calloc(1, sizeof(*m)); 00370 m->refcnt = 1; 00371 00372 /* Check these, to make gcc/glibc shut up. */ 00373 #define FREAD_SWAP32_CHK(dest) \ 00374 if (fread((dest), 4, 1, fh) != 1) { \ 00375 fclose(fh); \ 00376 ckd_free(m); \ 00377 E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \ 00378 return NULL; \ 00379 } \ 00380 if (swap) SWAP_INT32(dest); 00381 00382 FREAD_SWAP32_CHK(&m->n_ciphone); 00383 FREAD_SWAP32_CHK(&m->n_phone); 00384 FREAD_SWAP32_CHK(&m->n_emit_state); 00385 FREAD_SWAP32_CHK(&m->n_ci_sen); 00386 FREAD_SWAP32_CHK(&m->n_sen); 00387 FREAD_SWAP32_CHK(&m->n_tmat); 00388 FREAD_SWAP32_CHK(&m->n_sseq); 00389 FREAD_SWAP32_CHK(&m->n_ctx); 00390 FREAD_SWAP32_CHK(&m->n_cd_tree); 00391 FREAD_SWAP32_CHK(&m->sil); 00392 00393 /* CI names are first in the file. */ 00394 m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname)); 00395 00396 /* Decide whether to read in the whole file or mmap it. */ 00397 do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE; 00398 if (swap) { 00399 E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n"); 00400 do_mmap = FALSE; 00401 } 00402 /* Actually try to mmap it. */ 00403 if (do_mmap) { 00404 m->filemap = mmio_file_read(filename); 00405 if (m->filemap == NULL) 00406 do_mmap = FALSE; 00407 } 00408 pos = ftell(fh); 00409 if (do_mmap) { 00410 /* Get the base pointer from the memory map. */ 00411 m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos; 00412 /* Success! */ 00413 m->alloc_mode = BIN_MDEF_ON_DISK; 00414 } 00415 else { 00416 /* Read everything into memory. */ 00417 m->alloc_mode = BIN_MDEF_IN_MEMORY; 00418 fseek(fh, 0, SEEK_END); 00419 end = ftell(fh); 00420 fseek(fh, pos, SEEK_SET); 00421 m->ciname[0] = ckd_malloc(end - pos); 00422 if (fread(m->ciname[0], 1, end - pos, fh) != end - pos) 00423 E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename); 00424 } 00425 00426 for (i = 1; i < m->n_ciphone; ++i) 00427 m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1; 00428 00429 /* Skip past the padding. */ 00430 tree_start = 00431 m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0]; 00432 tree_start = (tree_start + 3) & ~3; 00433 m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start); 00434 if (swap) { 00435 for (i = 0; i < m->n_cd_tree; ++i) { 00436 SWAP_INT16(&m->cd_tree[i].ctx); 00437 SWAP_INT16(&m->cd_tree[i].n_down); 00438 SWAP_INT32(&m->cd_tree[i].c.down); 00439 } 00440 } 00441 m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree); 00442 if (swap) { 00443 for (i = 0; i < m->n_phone; ++i) { 00444 SWAP_INT32(&m->phone[i].ssid); 00445 SWAP_INT32(&m->phone[i].tmat); 00446 } 00447 } 00448 sseq_size = (int32 *) (m->phone + m->n_phone); 00449 if (swap) 00450 SWAP_INT32(sseq_size); 00451 m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq)); 00452 m->sseq[0] = (uint16 *) (sseq_size + 1); 00453 if (swap) { 00454 for (i = 0; i < *sseq_size; ++i) 00455 SWAP_INT16(m->sseq[0] + i); 00456 } 00457 if (m->n_emit_state) { 00458 for (i = 1; i < m->n_sseq; ++i) 00459 m->sseq[i] = m->sseq[0] + i * m->n_emit_state; 00460 } 00461 else { 00462 m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size); 00463 for (i = 1; i < m->n_sseq; ++i) 00464 m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1]; 00465 } 00466 00467 /* Now build the CD-to-CI mappings using the senone sequences. 00468 * This is the only really accurate way to do it, though it is 00469 * still inaccurate in the case of heterogeneous topologies or 00470 * cross-state tying. */ 00471 m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen)); 00472 m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap)); 00473 00474 /* Default mappings (identity, none) */ 00475 for (i = 0; i < m->n_ci_sen; ++i) 00476 m->cd2cisen[i] = i; 00477 for (; i < m->n_sen; ++i) 00478 m->cd2cisen[i] = -1; 00479 for (i = 0; i < m->n_sen; ++i) 00480 m->sen2cimap[i] = -1; 00481 for (i = 0; i < m->n_phone; ++i) { 00482 int32 j, ssid = m->phone[i].ssid; 00483 00484 for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) { 00485 int s = bin_mdef_sseq2sen(m, ssid, j); 00486 int ci = bin_mdef_pid2ci(m, i); 00487 /* Take the first one and warn if we have cross-state tying. */ 00488 if (m->sen2cimap[s] == -1) 00489 m->sen2cimap[s] = ci; 00490 if (m->sen2cimap[s] != ci) 00491 E_WARN 00492 ("Senone %d is shared between multiple base phones\n", 00493 s); 00494 00495 if (j > bin_mdef_n_emit_state_phone(m, ci)) 00496 E_WARN("CD phone %d has fewer states than CI phone %d\n", 00497 i, ci); 00498 else 00499 m->cd2cisen[s] = 00500 bin_mdef_sseq2sen(m, m->phone[ci].ssid, j); 00501 } 00502 } 00503 00504 /* Set the silence phone. */ 00505 m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE); 00506 00507 E_INFO 00508 ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", 00509 m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, 00510 m->n_ci_sen, m->n_sen, m->n_sseq); 00511 fclose(fh); 00512 return m; 00513 } 00514 00515 int 00516 bin_mdef_write(bin_mdef_t * m, const char *filename) 00517 { 00518 FILE *fh; 00519 int32 val, i; 00520 00521 if ((fh = fopen(filename, "wb")) == NULL) 00522 return -1; 00523 00524 /* Byteorder marker. */ 00525 val = BIN_MDEF_NATIVE_ENDIAN; 00526 fwrite(&val, 1, 4, fh); 00527 /* Version. */ 00528 val = BIN_MDEF_FORMAT_VERSION; 00529 fwrite(&val, 1, sizeof(val), fh); 00530 00531 /* Round the format descriptor size up to a 4-byte boundary. */ 00532 val = ((sizeof(format_desc) + 3) & ~3); 00533 fwrite(&val, 1, sizeof(val), fh); 00534 fwrite(format_desc, 1, sizeof(format_desc), fh); 00535 /* Pad it with zeros. */ 00536 i = 0; 00537 fwrite(&i, 1, val - sizeof(format_desc), fh); 00538 00539 /* Binary header things. */ 00540 fwrite(&m->n_ciphone, 4, 1, fh); 00541 fwrite(&m->n_phone, 4, 1, fh); 00542 fwrite(&m->n_emit_state, 4, 1, fh); 00543 fwrite(&m->n_ci_sen, 4, 1, fh); 00544 fwrite(&m->n_sen, 4, 1, fh); 00545 fwrite(&m->n_tmat, 4, 1, fh); 00546 fwrite(&m->n_sseq, 4, 1, fh); 00547 fwrite(&m->n_ctx, 4, 1, fh); 00548 fwrite(&m->n_cd_tree, 4, 1, fh); 00549 /* Write this as a 32-bit value to preserve alignment for the 00550 * non-mmap case (we want things aligned both from the 00551 * beginning of the file and the beginning of the phone 00552 * strings). */ 00553 val = m->sil; 00554 fwrite(&val, 4, 1, fh); 00555 00556 /* Phone strings. */ 00557 for (i = 0; i < m->n_ciphone; ++i) 00558 fwrite(m->ciname[i], 1, strlen(m->ciname[i]) + 1, fh); 00559 /* Pad with zeros. */ 00560 val = (ftell(fh) + 3) & ~3; 00561 i = 0; 00562 fwrite(&i, 1, val - ftell(fh), fh); 00563 00564 /* Write CD-tree */ 00565 fwrite(m->cd_tree, sizeof(*m->cd_tree), m->n_cd_tree, fh); 00566 /* Write phones */ 00567 fwrite(m->phone, sizeof(*m->phone), m->n_phone, fh); 00568 if (m->n_emit_state) { 00569 /* Write size of sseq */ 00570 val = m->n_sseq * m->n_emit_state; 00571 fwrite(&val, 4, 1, fh); 00572 00573 /* Write sseq */ 00574 fwrite(m->sseq[0], sizeof(**m->sseq), 00575 m->n_sseq * m->n_emit_state, fh); 00576 } 00577 else { 00578 int32 n; 00579 00580 /* Calcluate size of sseq */ 00581 n = 0; 00582 for (i = 0; i < m->n_sseq; ++i) 00583 n += m->sseq_len[i]; 00584 00585 /* Write size of sseq */ 00586 fwrite(&n, 4, 1, fh); 00587 00588 /* Write sseq */ 00589 fwrite(m->sseq[0], sizeof(**m->sseq), n, fh); 00590 00591 /* Write sseq_len */ 00592 fwrite(m->sseq_len, 1, m->n_sseq, fh); 00593 } 00594 fclose(fh); 00595 00596 return 0; 00597 } 00598 00599 int 00600 bin_mdef_write_text(bin_mdef_t * m, const char *filename) 00601 { 00602 FILE *fh; 00603 int p, i, n_total_state; 00604 00605 if (strcmp(filename, "-") == 0) 00606 fh = stdout; 00607 else { 00608 if ((fh = fopen(filename, "w")) == NULL) 00609 return -1; 00610 } 00611 00612 fprintf(fh, "0.3\n"); 00613 fprintf(fh, "%d n_base\n", m->n_ciphone); 00614 fprintf(fh, "%d n_tri\n", m->n_phone - m->n_ciphone); 00615 if (m->n_emit_state) 00616 n_total_state = m->n_phone * (m->n_emit_state + 1); 00617 else { 00618 n_total_state = 0; 00619 for (i = 0; i < m->n_phone; ++i) 00620 n_total_state += m->sseq_len[m->phone[i].ssid] + 1; 00621 } 00622 fprintf(fh, "%d n_state_map\n", n_total_state); 00623 fprintf(fh, "%d n_tied_state\n", m->n_sen); 00624 fprintf(fh, "%d n_tied_ci_state\n", m->n_ci_sen); 00625 fprintf(fh, "%d n_tied_tmat\n", m->n_tmat); 00626 fprintf(fh, "#\n# Columns definitions\n"); 00627 fprintf(fh, "#%4s %3s %3s %1s %6s %4s %s\n", 00628 "base", "lft", "rt", "p", "attrib", "tmat", 00629 " ... state id's ..."); 00630 00631 for (p = 0; p < m->n_ciphone; p++) { 00632 int n_state; 00633 00634 fprintf(fh, "%5s %3s %3s %1s", m->ciname[p], "-", "-", "-"); 00635 00636 if (bin_mdef_is_fillerphone(m, p)) 00637 fprintf(fh, " %6s", "filler"); 00638 else 00639 fprintf(fh, " %6s", "n/a"); 00640 fprintf(fh, " %4d", m->phone[p].tmat); 00641 00642 if (m->n_emit_state) 00643 n_state = m->n_emit_state; 00644 else 00645 n_state = m->sseq_len[m->phone[p].ssid]; 00646 for (i = 0; i < n_state; i++) { 00647 fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]); 00648 } 00649 fprintf(fh, " N\n"); 00650 } 00651 00652 00653 for (; p < m->n_phone; p++) { 00654 int n_state; 00655 00656 fprintf(fh, "%5s %3s %3s %c", 00657 m->ciname[m->phone[p].info.cd.ctx[0]], 00658 m->ciname[m->phone[p].info.cd.ctx[1]], 00659 m->ciname[m->phone[p].info.cd.ctx[2]], 00660 (WPOS_NAME)[m->phone[p].info.cd.wpos]); 00661 00662 if (bin_mdef_is_fillerphone(m, p)) 00663 fprintf(fh, " %6s", "filler"); 00664 else 00665 fprintf(fh, " %6s", "n/a"); 00666 fprintf(fh, " %4d", m->phone[p].tmat); 00667 00668 00669 if (m->n_emit_state) 00670 n_state = m->n_emit_state; 00671 else 00672 n_state = m->sseq_len[m->phone[p].ssid]; 00673 for (i = 0; i < n_state; i++) { 00674 fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]); 00675 } 00676 fprintf(fh, " N\n"); 00677 } 00678 00679 if (strcmp(filename, "-") != 0) 00680 fclose(fh); 00681 return 0; 00682 } 00683 00684 int 00685 bin_mdef_ciphone_id(bin_mdef_t * m, const char *ciphone) 00686 { 00687 int low, mid, high; 00688 00689 /* Exact binary search on m->ciphone */ 00690 low = 0; 00691 high = m->n_ciphone; 00692 while (low < high) { 00693 int c; 00694 00695 mid = (low + high) / 2; 00696 c = strcmp(ciphone, m->ciname[mid]); 00697 if (c == 0) 00698 return mid; 00699 else if (c > 0) 00700 low = mid + 1; 00701 else if (c < 0) 00702 high = mid; 00703 } 00704 return -1; 00705 } 00706 00707 int 00708 bin_mdef_ciphone_id_nocase(bin_mdef_t * m, const char *ciphone) 00709 { 00710 int low, mid, high; 00711 00712 /* Exact binary search on m->ciphone */ 00713 low = 0; 00714 high = m->n_ciphone; 00715 while (low < high) { 00716 int c; 00717 00718 mid = (low + high) / 2; 00719 c = strcmp_nocase(ciphone, m->ciname[mid]); 00720 if (c == 0) 00721 return mid; 00722 else if (c > 0) 00723 low = mid + 1; 00724 else if (c < 0) 00725 high = mid; 00726 } 00727 return -1; 00728 } 00729 00730 const char * 00731 bin_mdef_ciphone_str(bin_mdef_t * m, int32 ci) 00732 { 00733 assert(m != NULL); 00734 assert(ci < m->n_ciphone); 00735 return m->ciname[ci]; 00736 } 00737 00738 int 00739 bin_mdef_phone_id(bin_mdef_t * m, int32 ci, int32 lc, int32 rc, int32 wpos) 00740 { 00741 cd_tree_t *cd_tree; 00742 int level, max; 00743 int16 ctx[4]; 00744 00745 assert(m); 00746 00747 /* In the future, we might back off when context is not available, 00748 * but for now we'll just return the CI phone. */ 00749 if (lc < 0 || rc < 0) 00750 return ci; 00751 00752 assert((ci >= 0) && (ci < m->n_ciphone)); 00753 assert((lc >= 0) && (lc < m->n_ciphone)); 00754 assert((rc >= 0) && (rc < m->n_ciphone)); 00755 assert((wpos >= 0) && (wpos < N_WORD_POSN)); 00756 00757 /* Create a context list, mapping fillers to silence. */ 00758 ctx[0] = wpos; 00759 ctx[1] = ci; 00760 ctx[2] = (m->sil >= 0 00761 && m->phone[lc].info.ci.filler) ? m->sil : lc; 00762 ctx[3] = (m->sil >= 0 00763 && m->phone[rc].info.ci.filler) ? m->sil : rc; 00764 00765 /* Walk down the cd_tree. */ 00766 cd_tree = m->cd_tree; 00767 level = 0; /* What level we are on. */ 00768 max = N_WORD_POSN; /* Number of nodes on this level. */ 00769 while (level < 4) { 00770 int i; 00771 00772 #if 0 00773 E_INFO("Looking for context %d=%s in %d at %d\n", 00774 ctx[level], m->ciname[ctx[level]], 00775 max, cd_tree - m->cd_tree); 00776 #endif 00777 for (i = 0; i < max; ++i) { 00778 #if 0 00779 E_INFO("Look at context %d=%s at %d\n", 00780 cd_tree[i].ctx, 00781 m->ciname[cd_tree[i].ctx], cd_tree + i - m->cd_tree); 00782 #endif 00783 if (cd_tree[i].ctx == ctx[level]) 00784 break; 00785 } 00786 if (i == max) 00787 return -1; 00788 #if 0 00789 E_INFO("Found context %d=%s at %d, n_down=%d, down=%d\n", 00790 ctx[level], m->ciname[ctx[level]], 00791 cd_tree + i - m->cd_tree, 00792 cd_tree[i].n_down, cd_tree[i].c.down); 00793 #endif 00794 /* Leaf node, stop here. */ 00795 if (cd_tree[i].n_down == 0) 00796 return cd_tree[i].c.pid; 00797 00798 /* Go down one level. */ 00799 max = cd_tree[i].n_down; 00800 cd_tree = m->cd_tree + cd_tree[i].c.down; 00801 ++level; 00802 } 00803 /* We probably shouldn't get here. */ 00804 return -1; 00805 } 00806 00807 int 00808 bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos) 00809 { 00810 int p, tmppos; 00811 00812 00813 00814 /* In the future, we might back off when context is not available, 00815 * but for now we'll just return the CI phone. */ 00816 if (l < 0 || r < 0) 00817 return b; 00818 00819 p = bin_mdef_phone_id(m, b, l, r, pos); 00820 if (p >= 0) 00821 return p; 00822 00823 /* Exact triphone not found; backoff to other word positions */ 00824 for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { 00825 if (tmppos != pos) { 00826 p = bin_mdef_phone_id(m, b, l, r, tmppos); 00827 if (p >= 0) 00828 return p; 00829 } 00830 } 00831 00832 /* Nothing yet; backoff to silence phone if non-silence filler context */ 00833 /* In addition, backoff to silence phone on left/right if in beginning/end position */ 00834 if (m->sil >= 0) { 00835 int newl = l, newr = r; 00836 if (m->phone[(int)l].info.ci.filler 00837 || pos == WORD_POSN_BEGIN || pos == WORD_POSN_SINGLE) 00838 newl = m->sil; 00839 if (m->phone[(int)r].info.ci.filler 00840 || pos == WORD_POSN_END || pos == WORD_POSN_SINGLE) 00841 newr = m->sil; 00842 if ((newl != l) || (newr != r)) { 00843 p = bin_mdef_phone_id(m, b, newl, newr, pos); 00844 if (p >= 0) 00845 return p; 00846 00847 for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { 00848 if (tmppos != pos) { 00849 p = bin_mdef_phone_id(m, b, newl, newr, tmppos); 00850 if (p >= 0) 00851 return p; 00852 } 00853 } 00854 } 00855 } 00856 00857 /* Nothing yet; backoff to base phone */ 00858 return b; 00859 } 00860 00861 int 00862 bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf) 00863 { 00864 char *wpos_name; 00865 00866 assert(m); 00867 assert((pid >= 0) && (pid < m->n_phone)); 00868 wpos_name = WPOS_NAME; 00869 00870 buf[0] = '\0'; 00871 if (pid < m->n_ciphone) 00872 sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid)); 00873 else { 00874 sprintf(buf, "%s %s %s %c", 00875 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]), 00876 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]), 00877 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]), 00878 wpos_name[m->phone[pid].info.cd.wpos]); 00879 } 00880 return 0; 00881 }