92 #include <sphinxbase/ckd_alloc.h>
93 #include <sphinxbase/err.h>
99 #define MODEL_DEF_VERSION "0.3"
102 ciphone_add(
mdef_t * m,
char *ci,
int p)
104 assert(p < m->n_ciphone);
108 (
void *)(
long)p) != (
void *)(
long)p)
109 E_FATAL(
"hash_table_enter(%s) failed; duplicate CIphone?\n",
115 find_ph_lc(
ph_lc_t * lclist,
int lc)
119 for (lcptr = lclist; lcptr && (lcptr->lc != lc); lcptr = lcptr->next);
125 find_ph_rc(
ph_rc_t * rclist,
int rc)
129 for (rcptr = rclist; rcptr && (rcptr->rc != rc); rcptr = rcptr->next);
142 assert(p < m->n_phone);
159 if ((rcptr = find_ph_rc(lcptr->rclist, rc)) != NULL) {
160 __BIGSTACKVARIABLE__
char buf[4096];
162 mdef_phone_str(m, rcptr->pid, buf);
163 E_FATAL(
"Duplicate triphone: %s\n", buf);
169 rcptr->next = lcptr->rclist;
170 lcptr->rclist = rcptr;
179 if (hash_table_lookup_int32(m->
ciphone_ht, ci, &
id) < 0)
189 assert((
id >= 0) && (id < m->n_ciphone));
196 mdef_phone_str(
mdef_t * m,
int pid,
char *buf)
201 assert((pid >= 0) && (pid < m->n_phone));
205 if (pid < m->n_ciphone)
206 sprintf(buf,
"%s", mdef_ciphone_str(m, pid));
208 sprintf(buf,
"%s %s %s %c",
209 mdef_ciphone_str(m, m->
phone[pid].ci),
210 mdef_ciphone_str(m, m->
phone[pid].lc),
211 mdef_ciphone_str(m, m->
phone[pid].
rc),
227 assert((ci >= 0) && (ci < m->n_ciphone));
228 assert((lc >= 0) && (lc < m->n_ciphone));
229 assert((rc >= 0) && (rc < m->n_ciphone));
234 || ((rcptr = find_ph_rc(lcptr->rclist, rc)) == NULL)) {
239 newl = m->
ciphone[(int) lc].filler ? m->
sil : lc;
241 if ((newl == lc) && (newr == rc))
244 return (mdef_phone_id(m, ci, newl, newr, wpos));
254 assert((p >= 0) && (p < m->n_phone));
256 return ((p < m->n_ciphone) ? 1 : 0);
267 return ((s == m->
cd2cisen[s]) ? 1 : 0);
273 parse_tmat_senmap(
mdef_t * m,
char *line, int32 off,
int p)
276 __BIGSTACKVARIABLE__
char word[1024], *lp;
281 if ((sscanf(lp,
"%d%n", &n, &wlen) != 1) || (n < 0))
282 E_FATAL(
"Missing or bad transition matrix id: %s\n", line);
285 E_FATAL(
"tmat-id(%d) > #tmat in header(%d): %s\n", n, m->
n_tmat,
291 if ((sscanf(lp,
"%d%n", &s, &wlen) != 1) || (s < 0))
292 E_FATAL(
"Missing or bad state[%d]->senone mapping: %s\n", n,
295 if ((p < m->n_ciphone) && (m->
n_ci_sen <= s))
296 E_FATAL(
"CI-senone-id(%d) > #CI-senones(%d): %s\n", s,
299 E_FATAL(
"Senone-id(%d) > #senones(%d): %s\n", s, m->
n_sen,
307 if ((sscanf(lp,
"%s%n", word, &wlen) != 1) || (strcmp(word,
"N") != 0))
308 E_FATAL(
"Missing non-emitting state spec: %s\n", line);
312 if (sscanf(lp,
"%s%n", word, &wlen) == 1)
313 E_FATAL(
"Non-empty beyond non-emitting final state: %s\n", line);
318 parse_base_line(
mdef_t * m,
char *line,
int p)
321 __BIGSTACKVARIABLE__
char word[1024], *lp;
327 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
328 E_FATAL(
"Missing base phone name: %s\n", line);
332 ci = mdef_ciphone_id(m, word);
334 E_FATAL(
"Duplicate base phone: %s\n", line);
337 ciphone_add(m, word, p);
341 for (n = 0; n < 3; n++) {
342 if ((sscanf(lp,
"%s%n", word, &wlen) != 1)
343 || (strcmp(word,
"-") != 0))
344 E_FATAL(
"Bad context info for base phone: %s\n", line);
349 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
350 E_FATAL(
"Missing filler atribute field: %s\n", line);
352 if (strcmp(word,
"filler") == 0)
353 m->
ciphone[(int) ci].filler = 1;
354 else if (strcmp(word,
"n/a") == 0)
357 E_FATAL(
"Bad filler attribute field: %s\n", line);
362 parse_tmat_senmap(m, line, lp - line, p);
367 parse_tri_line(
mdef_t * m,
char *line,
int p)
370 __BIGSTACKVARIABLE__
char word[1024], *lp;
377 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
378 E_FATAL(
"Missing base phone name: %s\n", line);
381 ci = mdef_ciphone_id(m, word);
383 E_FATAL(
"Unknown base phone: %s\n", line);
386 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
387 E_FATAL(
"Missing left context: %s\n", line);
389 lc = mdef_ciphone_id(m, word);
391 E_FATAL(
"Unknown left context: %s\n", line);
394 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
395 E_FATAL(
"Missing right context: %s\n", line);
397 rc = mdef_ciphone_id(m, word);
399 E_FATAL(
"Unknown right context: %s\n", line);
402 if ((sscanf(lp,
"%s%n", word, &wlen) != 1) || (word[1] !=
'\0'))
403 E_FATAL(
"Missing or bad word-position spec: %s\n", line);
419 E_FATAL(
"Bad word-position spec: %s\n", line);
423 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
424 E_FATAL(
"Missing filler attribute field: %s\n", line);
426 if (((strcmp(word,
"filler") == 0) && (m->
ciphone[(
int) ci].
filler)) ||
427 ((strcmp(word,
"n/a") == 0) && (!m->
ciphone[(
int) ci].
filler))) {
431 E_FATAL(
"Bad filler attribute field: %s\n", line);
433 triphone_add(m, ci, lc, rc, wpos, p);
436 parse_tmat_senmap(m, line, lp - line, p);
453 h = hash_table_new(m->
n_phone, HASH_CASE_YES);
457 for (p = 0; p < m->
n_phone; p++) {
460 == (j = hash_table_enter_bkey_int32(h, (
char *)m->
sseq[p], k, n_sseq)))
467 sseq = ckd_calloc_2d(n_sseq, m->
n_emit_state,
sizeof(**sseq));
469 g = hash_table_tolist(h, &j);
472 for (gn = g; gn; gn = gnode_next(gn)) {
473 he = (hash_entry_t *) gnode_ptr(gn);
474 j = (long)hash_entry_val(he);
475 memcpy(sseq[j], hash_entry_key(he), k);
480 ckd_free_2d(m->
sseq);
489 noncomment_line(
char *line, int32 size, FILE * fp)
491 while (fgets(line, size, fp) != NULL) {
506 int32 n_ci, n_tri, n_map, n;
507 __BIGSTACKVARIABLE__
char tag[1024], buf[1024];
514 E_FATAL(
"No mdef-file\n");
517 E_INFO(
"Reading model definition: %s\n", mdeffile);
521 if ((fp = fopen(mdeffile,
"r")) == NULL)
522 E_FATAL_SYSTEM(
"Failed to open mdef file '%s' for reading", mdeffile);
524 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
525 E_FATAL(
"Empty file: %s\n", mdeffile);
527 if (strncmp(buf,
"BMDF", 4) == 0 || strncmp(buf,
"FDMB", 4) == 0) {
529 (
"Found byte-order mark %.4s, assuming this is a binary mdef file\n",
535 if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
536 E_FATAL(
"Version error: Expecing %s, but read %s\n",
537 MODEL_DEF_VERSION, buf);
547 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
548 E_FATAL(
"Incomplete header\n");
550 if ((sscanf(buf,
"%d %s", &n, tag) != 2) || (n < 0))
551 E_FATAL(
"Error in header: %s\n", buf);
553 if (strcmp(tag,
"n_base") == 0)
555 else if (strcmp(tag,
"n_tri") == 0)
557 else if (strcmp(tag,
"n_state_map") == 0)
559 else if (strcmp(tag,
"n_tied_ci_state") == 0)
561 else if (strcmp(tag,
"n_tied_state") == 0)
563 else if (strcmp(tag,
"n_tied_tmat") == 0)
566 E_FATAL(
"Unknown header line: %s\n", buf);
567 }
while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
572 E_FATAL(
"%s: Error in header\n", mdeffile);
575 if (n_ci >= MAX_INT16)
576 E_FATAL(
"%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
578 if (n_ci + n_tri >= MAX_INT32)
579 E_FATAL(
"%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
580 n_ci + n_tri, MAX_INT32);
581 if (m->
n_sen >= MAX_INT16)
582 E_FATAL(
"%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
583 m->
n_sen, MAX_INT16);
584 if (m->
n_tmat >= MAX_INT32)
585 E_FATAL(
"%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
591 (
"Header error: n_state_map not a multiple of n_ci*n_tri\n");
595 m->
ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES);
615 for (p = 0; p < n_ci; p++) {
616 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
617 E_FATAL(
"Premature EOF reading CIphone %d\n", p);
618 parse_base_line(m, buf, p);
624 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
625 E_FATAL(
"Premature EOF reading phone %d\n", p);
626 parse_tri_line(m, buf, p);
629 if (noncomment_line(buf,
sizeof(buf), fp) >= 0)
630 E_ERROR(
"Non-empty file beyond expected #phones (%d)\n",
636 (
"#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
642 for (s = 0; s < m->
n_sen; s++)
648 for (p = n_ci; p < m->
n_phone; p++) {
666 E_INFO_NOFN(
"Initialization of mdef_t, report:\n");
668 (
"%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
693 mdef_free_recursive_rc(lc->rclist);
696 mdef_free_recursive_lc(lc->next);
698 ckd_free((
void *) lc);
708 mdef_free_recursive_rc(rc->next);
710 ckd_free((
void *) rc);
747 ckd_free_2d((
void *) m->
sseq);
750 ckd_free((
void *) m->
phone);
763 ckd_free((
void *) m);