PocketSphinx  0.6
src/libpocketsphinx/mdef.c
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /*
00039  * mdef.c -- HMM model definition: base (CI) phones and triphones
00040  *
00041  * **********************************************
00042  * CMU ARPA Speech Project
00043  *
00044  * Copyright (c) 1999 Carnegie Mellon University.
00045  * ALL RIGHTS RESERVED.
00046  * **********************************************
00047  * 
00048  * HISTORY
00049  * 
00050  * 
00051  * 22-Nov-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00052  *              Imported from s3.2, for supporting s3 format continuous
00053  *              acoustic models.
00054  * 
00055  * 14-Oct-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00056  *              Added mdef_sseq2sen_active().
00057  * 
00058  * 06-May-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00059  *              In mdef_phone_id(), added backing off to silence phone context from filler
00060  *              context if original triphone not found.
00061  * 
00062  * 30-Apr-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00063  *              Added senone-sequence id (ssid) to phone_t and appropriate functions to
00064  *              maintain it.  Instead, moved state sequence info to mdef_t.
00065  * 
00066  * 13-Jul-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00067  *              Added mdef_phone_str().
00068  * 
00069  * 01-Jan-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00070  *              Allowed mdef_phone_id_nearest to return base phone id if either
00071  *              left or right context (or both) is undefined.
00072  * 
00073  * 01-Jan-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
00074  *              Created.
00075  */
00076 
00077 
00078 /*
00079  * Major assumptions:
00080  *   All phones have same #states, same topology.
00081  *   Every phone has exactly one non-emitting, final state--the last one.
00082  *   CI phones must appear first in model definition file.
00083  */
00084 
00085 /* System headers. */
00086 #include <stdio.h>
00087 #include <string.h>
00088 #include <stdlib.h>
00089 #include <assert.h>
00090 
00091 /* SphinxBase headers. */
00092 #include <sphinxbase/ckd_alloc.h>
00093 #include <sphinxbase/err.h>
00094 
00095 /* Local headers. */
00096 #include "mdef.h"
00097 
00098 
00099 #define MODEL_DEF_VERSION       "0.3"
00100 
00101 static void
00102 ciphone_add(mdef_t * m, char *ci, int p)
00103 {
00104     assert(p < m->n_ciphone);
00105 
00106     m->ciphone[p].name = (char *) ckd_salloc(ci);       /* freed in mdef_free */
00107     if (hash_table_enter(m->ciphone_ht, m->ciphone[p].name,
00108                          (void *)(long)p) != (void *)(long)p)
00109         E_FATAL("hash_table_enter(%s) failed; duplicate CIphone?\n",
00110                 m->ciphone[p].name);
00111 }
00112 
00113 
00114 static ph_lc_t *
00115 find_ph_lc(ph_lc_t * lclist, int lc)
00116 {
00117     ph_lc_t *lcptr;
00118 
00119     for (lcptr = lclist; lcptr && (lcptr->lc != lc); lcptr = lcptr->next);
00120     return lcptr;
00121 }
00122 
00123 
00124 static ph_rc_t *
00125 find_ph_rc(ph_rc_t * rclist, int rc)
00126 {
00127     ph_rc_t *rcptr;
00128 
00129     for (rcptr = rclist; rcptr && (rcptr->rc != rc); rcptr = rcptr->next);
00130     return rcptr;
00131 }
00132 
00133 
00134 static void
00135 triphone_add(mdef_t * m,
00136              int ci, int lc, int rc, word_posn_t wpos,
00137              int p)
00138 {
00139     ph_lc_t *lcptr;
00140     ph_rc_t *rcptr;
00141 
00142     assert(p < m->n_phone);
00143 
00144     /* Fill in phone[p] information (state and tmat mappings added later) */
00145     m->phone[p].ci = ci;
00146     m->phone[p].lc = lc;
00147     m->phone[p].rc = rc;
00148     m->phone[p].wpos = wpos;
00149 
00150     /* Create <ci,lc,rc,wpos> -> p mapping if not a CI phone */
00151     if (p >= m->n_ciphone) {
00152         if ((lcptr = find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc))
00153             == NULL) {
00154             lcptr = (ph_lc_t *) ckd_calloc(1, sizeof(ph_lc_t)); /* freed at mdef_free, I believe */
00155             lcptr->lc = lc;
00156             lcptr->next = m->wpos_ci_lclist[wpos][(int) ci];
00157             m->wpos_ci_lclist[wpos][(int) ci] = lcptr;  /* This is what needs to be freed */
00158         }
00159         if ((rcptr = find_ph_rc(lcptr->rclist, rc)) != NULL) {
00160             __BIGSTACKVARIABLE__ char buf[4096];
00161 
00162             mdef_phone_str(m, rcptr->pid, buf);
00163             E_FATAL("Duplicate triphone: %s\n", buf);
00164         }
00165 
00166         rcptr = (ph_rc_t *) ckd_calloc(1, sizeof(ph_rc_t));     /* freed in mdef_free, I believe */
00167         rcptr->rc = rc;
00168         rcptr->pid = p;
00169         rcptr->next = lcptr->rclist;
00170         lcptr->rclist = rcptr;
00171     }
00172 }
00173 
00174 
00175 int
00176 mdef_ciphone_id(mdef_t * m, char *ci)
00177 {
00178     int32 id;
00179     if (hash_table_lookup_int32(m->ciphone_ht, ci, &id) < 0)
00180         return -1;
00181     return id;
00182 }
00183 
00184 
00185 const char *
00186 mdef_ciphone_str(mdef_t * m, int id)
00187 {
00188     assert(m);
00189     assert((id >= 0) && (id < m->n_ciphone));
00190 
00191     return (m->ciphone[id].name);
00192 }
00193 
00194 
00195 int
00196 mdef_phone_str(mdef_t * m, int pid, char *buf)
00197 {
00198     char *wpos_name;
00199 
00200     assert(m);
00201     assert((pid >= 0) && (pid < m->n_phone));
00202     wpos_name = WPOS_NAME;
00203 
00204     buf[0] = '\0';
00205     if (pid < m->n_ciphone)
00206         sprintf(buf, "%s", mdef_ciphone_str(m, pid));
00207     else {
00208         sprintf(buf, "%s %s %s %c",
00209                 mdef_ciphone_str(m, m->phone[pid].ci),
00210                 mdef_ciphone_str(m, m->phone[pid].lc),
00211                 mdef_ciphone_str(m, m->phone[pid].rc),
00212                 wpos_name[m->phone[pid].wpos]);
00213     }
00214     return 0;
00215 }
00216 
00217 
00218 int
00219 mdef_phone_id(mdef_t * m,
00220               int ci, int lc, int rc, word_posn_t wpos)
00221 {
00222     ph_lc_t *lcptr;
00223     ph_rc_t *rcptr;
00224     int newl, newr;
00225 
00226     assert(m);
00227     assert((ci >= 0) && (ci < m->n_ciphone));
00228     assert((lc >= 0) && (lc < m->n_ciphone));
00229     assert((rc >= 0) && (rc < m->n_ciphone));
00230     assert((wpos >= 0) && (wpos < N_WORD_POSN));
00231 
00232     if (((lcptr =
00233           find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc)) == NULL)
00234         || ((rcptr = find_ph_rc(lcptr->rclist, rc)) == NULL)) {
00235         /* Not found; backoff to silence context if non-silence filler context */
00236         if (m->sil < 0)
00237             return -1;
00238 
00239         newl = m->ciphone[(int) lc].filler ? m->sil : lc;
00240         newr = m->ciphone[(int) rc].filler ? m->sil : rc;
00241         if ((newl == lc) && (newr == rc))
00242             return -1;
00243 
00244         return (mdef_phone_id(m, ci, newl, newr, wpos));
00245     }
00246 
00247     return (rcptr->pid);
00248 }
00249 
00250 int
00251 mdef_is_ciphone(mdef_t * m, int p)
00252 {
00253     assert(m);
00254     assert((p >= 0) && (p < m->n_phone));
00255 
00256     return ((p < m->n_ciphone) ? 1 : 0);
00257 }
00258 
00259 int
00260 mdef_is_cisenone(mdef_t * m, int s)
00261 {
00262     assert(m);
00263     if (s >= m->n_sen) {
00264         return 0;
00265     }
00266     assert(s >= 0);
00267     return ((s == m->cd2cisen[s]) ? 1 : 0);
00268 }
00269 
00270 
00271 /* Parse tmat and state->senone mappings for phone p and fill in structure */
00272 static void
00273 parse_tmat_senmap(mdef_t * m, char *line, int32 off, int p)
00274 {
00275     int32 wlen, n, s;
00276     __BIGSTACKVARIABLE__ char word[1024], *lp;
00277 
00278     lp = line + off;
00279 
00280     /* Read transition matrix id */
00281     if ((sscanf(lp, "%d%n", &n, &wlen) != 1) || (n < 0))
00282         E_FATAL("Missing or bad transition matrix id: %s\n", line);
00283     m->phone[p].tmat = n;
00284     if (m->n_tmat <= n)
00285         E_FATAL("tmat-id(%d) > #tmat in header(%d): %s\n", n, m->n_tmat,
00286                 line);
00287     lp += wlen;
00288 
00289     /* Read senone mappings for each emitting state */
00290     for (n = 0; n < m->n_emit_state; n++) {
00291         if ((sscanf(lp, "%d%n", &s, &wlen) != 1) || (s < 0))
00292             E_FATAL("Missing or bad state[%d]->senone mapping: %s\n", n,
00293                     line);
00294 
00295         /*20040821 ARCHAN, This line is added to allow 3.x/3.0 compatability. */
00296         m->phone[p].state[n] = s;
00297 
00298         if ((p < m->n_ciphone) && (m->n_ci_sen <= s))
00299             E_FATAL("CI-senone-id(%d) > #CI-senones(%d): %s\n", s,
00300                     m->n_ci_sen, line);
00301         if (m->n_sen <= s)
00302             E_FATAL("Senone-id(%d) > #senones(%d): %s\n", s, m->n_sen,
00303                     line);
00304 
00305         m->sseq[p][n] = s;
00306         lp += wlen;
00307     }
00308 
00309     /* Check for the last non-emitting state N */
00310     if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (strcmp(word, "N") != 0))
00311         E_FATAL("Missing non-emitting state spec: %s\n", line);
00312     lp += wlen;
00313 
00314     /* Check for end of line */
00315     if (sscanf(lp, "%s%n", word, &wlen) == 1)
00316         E_FATAL("Non-empty beyond non-emitting final state: %s\n", line);
00317 }
00318 
00319 
00320 static void
00321 parse_base_line(mdef_t * m, char *line, int p)
00322 {
00323     int32 wlen, n;
00324     __BIGSTACKVARIABLE__ char word[1024], *lp;
00325     int ci;
00326 
00327     lp = line;
00328 
00329     /* Read base phone name */
00330     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00331         E_FATAL("Missing base phone name: %s\n", line);
00332     lp += wlen;
00333 
00334     /* Make sure it's not a duplicate */
00335     ci = mdef_ciphone_id(m, word);
00336     if (ci >= 0)
00337         E_FATAL("Duplicate base phone: %s\n", line);
00338 
00339     /* Add ciphone to ciphone table with id p */
00340     ciphone_add(m, word, p);
00341     ci = (int) p;
00342 
00343     /* Read and skip "-" for lc, rc, wpos */
00344     for (n = 0; n < 3; n++) {
00345         if ((sscanf(lp, "%s%n", word, &wlen) != 1)
00346             || (strcmp(word, "-") != 0))
00347             E_FATAL("Bad context info for base phone: %s\n", line);
00348         lp += wlen;
00349     }
00350 
00351     /* Read filler attribute, if present */
00352     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00353         E_FATAL("Missing filler atribute field: %s\n", line);
00354     lp += wlen;
00355     if (strcmp(word, "filler") == 0)
00356         m->ciphone[(int) ci].filler = 1;
00357     else if (strcmp(word, "n/a") == 0)
00358         m->ciphone[(int) ci].filler = 0;
00359     else
00360         E_FATAL("Bad filler attribute field: %s\n", line);
00361 
00362     triphone_add(m, ci, -1, -1, WORD_POSN_UNDEFINED, p);
00363 
00364     /* Parse remainder of line: transition matrix and state->senone mappings */
00365     parse_tmat_senmap(m, line, lp - line, p);
00366 }
00367 
00368 
00369 static void
00370 parse_tri_line(mdef_t * m, char *line, int p)
00371 {
00372     int32 wlen;
00373     __BIGSTACKVARIABLE__ char word[1024], *lp;
00374     int ci, lc, rc;
00375     word_posn_t wpos = WORD_POSN_BEGIN;
00376 
00377     lp = line;
00378 
00379     /* Read base phone name */
00380     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00381         E_FATAL("Missing base phone name: %s\n", line);
00382     lp += wlen;
00383 
00384     ci = mdef_ciphone_id(m, word);
00385     if (ci < 0)
00386         E_FATAL("Unknown base phone: %s\n", line);
00387 
00388     /* Read lc */
00389     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00390         E_FATAL("Missing left context: %s\n", line);
00391     lp += wlen;
00392     lc = mdef_ciphone_id(m, word);
00393     if (lc < 0)
00394         E_FATAL("Unknown left context: %s\n", line);
00395 
00396     /* Read rc */
00397     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00398         E_FATAL("Missing right context: %s\n", line);
00399     lp += wlen;
00400     rc = mdef_ciphone_id(m, word);
00401     if (rc < 0)
00402         E_FATAL("Unknown right  context: %s\n", line);
00403 
00404     /* Read tripone word-position within word */
00405     if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0'))
00406         E_FATAL("Missing or bad word-position spec: %s\n", line);
00407     lp += wlen;
00408     switch (word[0]) {
00409     case 'b':
00410         wpos = WORD_POSN_BEGIN;
00411         break;
00412     case 'e':
00413         wpos = WORD_POSN_END;
00414         break;
00415     case 's':
00416         wpos = WORD_POSN_SINGLE;
00417         break;
00418     case 'i':
00419         wpos = WORD_POSN_INTERNAL;
00420         break;
00421     default:
00422         E_FATAL("Bad word-position spec: %s\n", line);
00423     }
00424 
00425     /* Read filler attribute, if present.  Must match base phone attribute */
00426     if (sscanf(lp, "%s%n", word, &wlen) != 1)
00427         E_FATAL("Missing filler attribute field: %s\n", line);
00428     lp += wlen;
00429     if (((strcmp(word, "filler") == 0) && (m->ciphone[(int) ci].filler)) ||
00430         ((strcmp(word, "n/a") == 0) && (!m->ciphone[(int) ci].filler))) {
00431         /* Everything is fine */
00432     }
00433     else
00434         E_FATAL("Bad filler attribute field: %s\n", line);
00435 
00436     triphone_add(m, ci, lc, rc, wpos, p);
00437 
00438     /* Parse remainder of line: transition matrix and state->senone mappings */
00439     parse_tmat_senmap(m, line, lp - line, p);
00440 }
00441 
00442 
00443 static void
00444 sseq_compress(mdef_t * m)
00445 {
00446     hash_table_t *h;
00447     uint16 **sseq;
00448     int32 n_sseq;
00449     int32 p, j, k;
00450     glist_t g;
00451     gnode_t *gn;
00452     hash_entry_t *he;
00453 
00454     k = m->n_emit_state * sizeof(int16);
00455 
00456     h = hash_table_new(m->n_phone, HASH_CASE_YES);
00457     n_sseq = 0;
00458 
00459     /* Identify unique senone-sequence IDs.  BUG: tmat-id not being considered!! */
00460     for (p = 0; p < m->n_phone; p++) {
00461         /* Add senone sequence to hash table */
00462         if (n_sseq
00463             == (j = hash_table_enter_bkey_int32(h, (char *)m->sseq[p], k, n_sseq)))
00464             n_sseq++;
00465 
00466         m->phone[p].ssid = j;
00467     }
00468 
00469     /* Generate compacted sseq table */
00470     sseq = ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(**sseq)); /* freed in mdef_free() */
00471 
00472     g = hash_table_tolist(h, &j);
00473     assert(j == n_sseq);
00474 
00475     for (gn = g; gn; gn = gnode_next(gn)) {
00476         he = (hash_entry_t *) gnode_ptr(gn);
00477         j = (long)hash_entry_val(he);
00478         memcpy(sseq[j], hash_entry_key(he), k);
00479     }
00480     glist_free(g);
00481 
00482     /* Free the old, temporary senone sequence table, replace with compacted one */
00483     ckd_free_2d(m->sseq);
00484     m->sseq = sseq;
00485     m->n_sseq = n_sseq;
00486 
00487     hash_table_free(h);
00488 }
00489 
00490 
00491 static int32
00492 noncomment_line(char *line, int32 size, FILE * fp)
00493 {
00494     while (fgets(line, size, fp) != NULL) {
00495         if (line[0] != '#')
00496             return 0;
00497     }
00498     return -1;
00499 }
00500 
00501 
00502 /*
00503  * Initialize phones (ci and triphones) and state->senone mappings from .mdef file.
00504  */
00505 mdef_t *
00506 mdef_init(char *mdeffile, int32 breport)
00507 {
00508     FILE *fp;
00509     int32 n_ci, n_tri, n_map, n;
00510     __BIGSTACKVARIABLE__ char tag[1024], buf[1024];
00511     uint16 **senmap;
00512     int p;
00513     int32 s, ci, cd;
00514     mdef_t *m;
00515 
00516     if (!mdeffile)
00517         E_FATAL("No mdef-file\n");
00518 
00519     if (breport)
00520         E_INFO("Reading model definition: %s\n", mdeffile);
00521 
00522     m = (mdef_t *) ckd_calloc(1, sizeof(mdef_t));       /* freed in mdef_free */
00523 
00524     if ((fp = fopen(mdeffile, "r")) == NULL)
00525         E_FATAL_SYSTEM("Failed to open mdef file '%s' for reading", mdeffile);
00526 
00527     if (noncomment_line(buf, sizeof(buf), fp) < 0)
00528         E_FATAL("Empty file: %s\n", mdeffile);
00529 
00530     if (strncmp(buf, "BMDF", 4) == 0 || strncmp(buf, "FDMB", 4) == 0) {
00531         E_INFO
00532             ("Found byte-order mark %.4s, assuming this is a binary mdef file\n",
00533              buf);
00534         fclose(fp);
00535         ckd_free(m);
00536         return NULL;
00537     }
00538     if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
00539         E_FATAL("Version error: Expecing %s, but read %s\n",
00540                 MODEL_DEF_VERSION, buf);
00541 
00542     /* Read #base phones, #triphones, #senone mappings defined in header */
00543     n_ci = -1;
00544     n_tri = -1;
00545     n_map = -1;
00546     m->n_ci_sen = -1;
00547     m->n_sen = -1;
00548     m->n_tmat = -1;
00549     do {
00550         if (noncomment_line(buf, sizeof(buf), fp) < 0)
00551             E_FATAL("Incomplete header\n");
00552 
00553         if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))
00554             E_FATAL("Error in header: %s\n", buf);
00555 
00556         if (strcmp(tag, "n_base") == 0)
00557             n_ci = n;
00558         else if (strcmp(tag, "n_tri") == 0)
00559             n_tri = n;
00560         else if (strcmp(tag, "n_state_map") == 0)
00561             n_map = n;
00562         else if (strcmp(tag, "n_tied_ci_state") == 0)
00563             m->n_ci_sen = n;
00564         else if (strcmp(tag, "n_tied_state") == 0)
00565             m->n_sen = n;
00566         else if (strcmp(tag, "n_tied_tmat") == 0)
00567             m->n_tmat = n;
00568         else
00569             E_FATAL("Unknown header line: %s\n", buf);
00570     } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
00571              (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0));
00572 
00573     if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0)
00574         || (m->n_ci_sen > m->n_sen))
00575         E_FATAL("%s: Error in header\n", mdeffile);
00576 
00577     /* Check typesize limits */
00578     if (n_ci >= MAX_INT16)
00579         E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
00580                 MAX_INT16);
00581     if (n_ci + n_tri >= MAX_INT32) /* Comparison is always false... */
00582         E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
00583                 n_ci + n_tri, MAX_INT32);
00584     if (m->n_sen >= MAX_INT16)
00585         E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
00586                 m->n_sen, MAX_INT16);
00587     if (m->n_tmat >= MAX_INT32) /* Comparison is always false... */
00588         E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
00589                 m->n_tmat, MAX_INT32);
00590 
00591     m->n_emit_state = (n_map / (n_ci + n_tri)) - 1;
00592     if ((m->n_emit_state + 1) * (n_ci + n_tri) != n_map)
00593         E_FATAL
00594             ("Header error: n_state_map not a multiple of n_ci*n_tri\n");
00595 
00596     /* Initialize ciphone info */
00597     m->n_ciphone = n_ci;
00598     m->ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES);  /* With case-insensitive string names *//* freed in mdef_free */
00599     m->ciphone = (ciphone_t *) ckd_calloc(n_ci, sizeof(ciphone_t));     /* freed in mdef_free */
00600 
00601     /* Initialize phones info (ciphones + triphones) */
00602     m->n_phone = n_ci + n_tri;
00603     m->phone = (phone_t *) ckd_calloc(m->n_phone, sizeof(phone_t));     /* freed in mdef_free */
00604 
00605     /* Allocate space for state->senone map for each phone */
00606     senmap = ckd_calloc_2d(m->n_phone, m->n_emit_state, sizeof(**senmap));      /* freed in mdef_free */
00607     m->sseq = senmap;           /* TEMPORARY; until it is compressed into just the unique ones */
00608 
00609 
00611     /* Flat decoder-specific */
00612     /* Allocate space for state->senone map for each phone */
00613 
00614     /* ARCHAN 20040820, this sacrifice readability and may cause pointer
00615        problems in future. However, this is a less evil than
00616        duplication of code.  This is trick point all the state mapping
00617        to the global mapping and avoid duplicated memory.  
00618      */
00619 
00620     /* S3 xwdpid_compress will compress the below list phone list. 
00621      */
00622 
00623     /* ARCHAN, this part should not be used when one of the recognizer is used. */
00624     m->st2senmap =
00625         (int16 *) ckd_calloc(m->n_phone * m->n_emit_state,
00626                                  sizeof(*m->st2senmap));
00627     for (p = 0; p < m->n_phone; p++)
00628         m->phone[p].state = m->st2senmap + (p * m->n_emit_state);
00629     /******************************************************************************************************/
00630 
00631 
00632     /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */
00633     m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d(N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *));      /* freed in mdef_free */
00634 
00635     /*
00636      * Read base phones and triphones.  They'll simply be assigned a running sequence
00637      * number as their "phone-id".  If the phone-id < n_ci, it's a ciphone.
00638      */
00639 
00640     /* Read base phones */
00641     for (p = 0; p < n_ci; p++) {
00642         if (noncomment_line(buf, sizeof(buf), fp) < 0)
00643             E_FATAL("Premature EOF reading CIphone %d\n", p);
00644         parse_base_line(m, buf, p);
00645     }
00646     m->sil = mdef_ciphone_id(m, S3_SILENCE_CIPHONE);
00647 
00648     /* Read triphones, if any */
00649     for (; p < m->n_phone; p++) {
00650         if (noncomment_line(buf, sizeof(buf), fp) < 0)
00651             E_FATAL("Premature EOF reading phone %d\n", p);
00652         parse_tri_line(m, buf, p);
00653     }
00654 
00655     if (noncomment_line(buf, sizeof(buf), fp) >= 0)
00656         E_ERROR("Non-empty file beyond expected #phones (%d)\n",
00657                 m->n_phone);
00658 
00659     /* Build CD senones to CI senones map */
00660     if (m->n_ciphone * m->n_emit_state != m->n_ci_sen)
00661         E_FATAL
00662             ("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
00663              m->n_ci_sen, m->n_ciphone, m->n_emit_state);
00664     m->cd2cisen = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->cd2cisen)); /* freed in mdef_free */
00665 
00666     m->sen2cimap = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->sen2cimap)); /* freed in mdef_free */
00667 
00668     for (s = 0; s < m->n_sen; s++)
00669         m->sen2cimap[s] = -1;
00670     for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */
00671         m->cd2cisen[s] = s;
00672         m->sen2cimap[s] = s / m->n_emit_state;
00673     }
00674     for (p = n_ci; p < m->n_phone; p++) {       /* CD senones */
00675         for (s = 0; s < m->n_emit_state; s++) {
00676             cd = m->sseq[p][s];
00677             ci = m->sseq[m->phone[p].ci][s];
00678             m->cd2cisen[cd] = ci;
00679             m->sen2cimap[cd] = m->phone[p].ci;
00680         }
00681     }
00682 
00683     sseq_compress(m);
00684     fclose(fp);
00685 
00686     return m;
00687 }
00688 
00689 void
00690 mdef_report(mdef_t * m)
00691 {
00692     E_INFO_NOFN("Initialization of mdef_t, report:\n");
00693     E_INFO_NOFN
00694         ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
00695          m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
00696          m->n_ci_sen, m->n_sen, m->n_sseq);
00697     E_INFO_NOFN("\n");
00698 
00699 }
00700 
00701 /* RAH 4.23.01, Need to step down the ->next list to see if there are
00702    any more things to free
00703  */
00704 
00705 
00706 
00707 /* RAH 4.19.01, Attempt to free memory that was allocated within this module
00708    I have not verified that all the memory has been freed. I've taken only a 
00709    reasonable effort for now.
00710    RAH 4.24.01 - verified that all memory is released.
00711  */
00712 void
00713 mdef_free_recursive_lc(ph_lc_t * lc)
00714 {
00715     if (lc == NULL)
00716         return;
00717 
00718     if (lc->rclist)
00719         mdef_free_recursive_rc(lc->rclist);
00720 
00721     if (lc->next)
00722         mdef_free_recursive_lc(lc->next);
00723 
00724     ckd_free((void *) lc);
00725 }
00726 
00727 void
00728 mdef_free_recursive_rc(ph_rc_t * rc)
00729 {
00730     if (rc == NULL)
00731         return;
00732 
00733     if (rc->next)
00734         mdef_free_recursive_rc(rc->next);
00735 
00736     ckd_free((void *) rc);
00737 }
00738 
00739 
00740 /* RAH, Free memory that was allocated in mdef_init 
00741    Rational purify shows that no leaks exist
00742  */
00743 
00744 void
00745 mdef_free(mdef_t * m)
00746 {
00747     int i, j;
00748 
00749     if (m) {
00750         if (m->sen2cimap)
00751             ckd_free((void *) m->sen2cimap);
00752         if (m->cd2cisen)
00753             ckd_free((void *) m->cd2cisen);
00754 
00755         /* RAH, go down the ->next list and delete all the pieces */
00756         for (i = 0; i < N_WORD_POSN; i++)
00757             for (j = 0; j < m->n_ciphone; j++)
00758                 if (m->wpos_ci_lclist[i][j]) {
00759                     mdef_free_recursive_lc(m->wpos_ci_lclist[i][j]->next);
00760                     mdef_free_recursive_rc(m->wpos_ci_lclist[i][j]->
00761                                            rclist);
00762                 }
00763 
00764         for (i = 0; i < N_WORD_POSN; i++)
00765             for (j = 0; j < m->n_ciphone; j++)
00766                 if (m->wpos_ci_lclist[i][j])
00767                     ckd_free((void *) m->wpos_ci_lclist[i][j]);
00768 
00769 
00770         if (m->wpos_ci_lclist)
00771             ckd_free_2d((void *) m->wpos_ci_lclist);
00772         if (m->sseq)
00773             ckd_free_2d((void *) m->sseq);
00774         /* Free phone context */
00775         if (m->phone)
00776             ckd_free((void *) m->phone);
00777         if (m->ciphone_ht)
00778             hash_table_free(m->ciphone_ht);
00779 
00780         for (i = 0; i < m->n_ciphone; i++) {
00781             if (m->ciphone[i].name)
00782                 ckd_free((void *) m->ciphone[i].name);
00783         }
00784 
00785 
00786         if (m->ciphone)
00787             ckd_free((void *) m->ciphone);
00788 
00789         if (m->st2senmap)
00790             ckd_free((void *) m->st2senmap);
00791 
00792         ckd_free((void *) m);
00793     }
00794 }