55 #include <sphinxbase/prim_type.h> 56 #include <sphinxbase/ckd_alloc.h> 57 #include <sphinxbase/byteorder.h> 58 #include <sphinxbase/case.h> 59 #include <sphinxbase/err.h> 70 int i, nodes, ci_idx, lc_idx, rc_idx;
73 if ((mdef =
mdef_init((
char *) filename, TRUE)) == NULL)
78 E_ERROR(
"Number of senones exceeds limit: %d > %d\n",
84 E_ERROR(
"Number of senone sequences exceeds limit: %d > %d\n",
91 E_ERROR(
"Number of phones exceeds limit: %d > %d\n",
97 bmdef = ckd_calloc(1,
sizeof(*bmdef));
124 bmdef->
ciname[0] = ckd_calloc(nchars, 1);
128 bmdef->
ciname[i - 1] + strlen(bmdef->
ciname[i - 1]) + 1;
130 if (i > 0 && strcmp(bmdef->
ciname[i - 1], bmdef->
ciname[i]) > 0) {
132 E_ERROR(
"Phone names are not in sorted order, sorry.");
140 for (i = 0; i < mdef->
n_phone; ++i) {
143 if (i < bmdef->n_ciphone) {
148 bmdef->
phone[i].info.cd.ctx[0] = mdef->
phone[i].ci;
149 bmdef->
phone[i].info.cd.ctx[1] = mdef->
phone[i].lc;
156 nodes = lc_idx = ci_idx = rc_idx = 0;
164 for (rc = lc->rclist; rc; rc = rc->next) {
179 E_INFO(
"Allocating %d * %d bytes (%d KiB) for CD tree\n",
180 nodes,
sizeof(*bmdef->
cd_tree),
181 nodes *
sizeof(*bmdef->
cd_tree) / 1024);
191 E_INFO(
"%d => %c (%d@%d)\n",
207 for (rc = lc->rclist; rc; rc = rc->next) {
212 E_INFO(
"%d => %s %s %s %c (%d@%d)\n",
231 E_INFO(
"%d => %s %s %c (%d@%d)\n",
248 E_INFO(
"%d => %d=%s (%d@%d)\n",
249 ci_idx, j, bmdef->
ciname[j],
280 case BIN_MDEF_FROM_TEXT:
282 ckd_free(m->
sseq[0]);
286 case BIN_MDEF_IN_MEMORY:
289 case BIN_MDEF_ON_DISK:
302 static const char format_desc[] =
303 "BEGIN FILE FORMAT DESCRIPTION\n" 304 "int32 n_ciphone; /**< Number of base (CI) phones */\n" 305 "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n" 306 "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n" 307 "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n" 308 "int32 n_sen; /**< Number of senones (CI+CD) */\n" 309 "int32 n_tmat; /**< Number of transition matrices */\n" 310 "int32 n_sseq; /**< Number of unique senone sequences */\n" 311 "int32 n_ctx; /**< Number of phones of context */\n" 312 "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n" 313 "int32 sil; /**< CI phone ID for silence */\n" 314 "char ciphones[][]; /**< CI phone strings (null-terminated) */\n" 315 "char padding[]; /**< Padding to a 4-bytes boundary */\n" 316 "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n" 317 "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n" 318 "int16 sseq[]; /**< Unique senone sequences */\n" 319 "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n" 320 "END FILE FORMAT DESCRIPTION\n";
328 int32 val, i, do_mmap, swap;
336 E_INFO(
"Reading binary model definition: %s\n", filename);
337 if ((fh = fopen(filename,
"rb")) == NULL)
340 if (fread(&val, 4, 1, fh) != 1) {
342 E_ERROR_SYSTEM(
"Failed to read byte-order marker from %s\n",
347 if (val == BIN_MDEF_OTHER_ENDIAN) {
349 E_INFO(
"Must byte-swap %s\n", filename);
351 if (fread(&val, 4, 1, fh) != 1) {
353 E_ERROR_SYSTEM(
"Failed to read version from %s\n", filename);
358 if (val > BIN_MDEF_FORMAT_VERSION) {
359 E_ERROR(
"File format version %d for %s is newer than library\n",
364 if (fread(&val, 4, 1, fh) != 1) {
366 E_ERROR_SYSTEM(
"Failed to read header length from %s\n", filename);
372 fseek(fh, val, SEEK_CUR);
375 m = ckd_calloc(1,
sizeof(*m));
379 #define FREAD_SWAP32_CHK(dest) \ 380 if (fread((dest), 4, 1, fh) != 1) { \ 383 E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \ 386 if (swap) SWAP_INT32(dest); 392 FREAD_SWAP32_CHK(&m->
n_sen);
393 FREAD_SWAP32_CHK(&m->
n_tmat);
394 FREAD_SWAP32_CHK(&m->
n_sseq);
395 FREAD_SWAP32_CHK(&m->
n_ctx);
397 FREAD_SWAP32_CHK(&m->
sil);
403 do_mmap = config ? cmd_ln_boolean_r(config,
"-mmap") : TRUE;
405 E_WARN(
"-mmap specified, but mdef is other-endian. Will not memory-map.\n");
410 m->
filemap = mmio_file_read(filename);
424 fseek(fh, 0, SEEK_END);
426 fseek(fh, pos, SEEK_SET);
427 m->
ciname[0] = ckd_malloc(end - pos);
428 if (fread(m->
ciname[0], 1, end - pos, fh) != end - pos)
429 E_FATAL(
"Failed to read %d bytes of data from %s\n", end - pos, filename);
438 tree_start = (tree_start + 3) & ~3;
449 for (i = 0; i < m->
n_phone; ++i) {
456 SWAP_INT32(sseq_size);
458 m->
sseq[0] = (uint16 *) (sseq_size + 1);
460 for (i = 0; i < *sseq_size; ++i)
461 SWAP_INT16(m->
sseq[0] + i);
464 for (i = 1; i < m->
n_sseq; ++i)
469 for (i = 1; i < m->
n_sseq; ++i)
483 for (; i < m->
n_sen; ++i)
485 for (i = 0; i < m->
n_sen; ++i)
487 for (i = 0; i < m->
n_phone; ++i) {
490 for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
491 int s = bin_mdef_sseq2sen(m, ssid, j);
492 int ci = bin_mdef_pid2ci(m, i);
498 (
"Senone %d is shared between multiple base phones\n",
501 if (j > bin_mdef_n_emit_state_phone(m, ci))
502 E_WARN(
"CD phone %d has fewer states than CI phone %d\n",
506 bin_mdef_sseq2sen(m, m->
phone[ci].
ssid, j);
514 (
"%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
527 if ((fh = fopen(filename,
"wb")) == NULL)
531 val = BIN_MDEF_NATIVE_ENDIAN;
532 fwrite(&val, 1, 4, fh);
534 val = BIN_MDEF_FORMAT_VERSION;
535 fwrite(&val, 1,
sizeof(val), fh);
538 val = ((
sizeof(format_desc) + 3) & ~3);
539 fwrite(&val, 1,
sizeof(val), fh);
540 fwrite(format_desc, 1,
sizeof(format_desc), fh);
543 fwrite(&i, 1, val -
sizeof(format_desc), fh);
550 fwrite(&m->
n_sen, 4, 1, fh);
551 fwrite(&m->
n_tmat, 4, 1, fh);
552 fwrite(&m->
n_sseq, 4, 1, fh);
553 fwrite(&m->
n_ctx, 4, 1, fh);
560 fwrite(&val, 4, 1, fh);
566 val = (ftell(fh) + 3) & ~3;
568 fwrite(&i, 1, val - ftell(fh), fh);
577 fwrite(&val, 4, 1, fh);
580 fwrite(m->
sseq[0],
sizeof(**m->
sseq),
588 for (i = 0; i < m->
n_sseq; ++i)
592 fwrite(&n, 4, 1, fh);
595 fwrite(m->
sseq[0],
sizeof(**m->
sseq), n, fh);
609 int p, i, n_total_state;
611 if (strcmp(filename,
"-") == 0)
614 if ((fh = fopen(filename,
"w")) == NULL)
618 fprintf(fh,
"0.3\n");
619 fprintf(fh,
"%d n_base\n", m->
n_ciphone);
625 for (i = 0; i < m->
n_phone; ++i)
628 fprintf(fh,
"%d n_state_map\n", n_total_state);
629 fprintf(fh,
"%d n_tied_state\n", m->
n_sen);
630 fprintf(fh,
"%d n_tied_ci_state\n", m->
n_ci_sen);
631 fprintf(fh,
"%d n_tied_tmat\n", m->
n_tmat);
632 fprintf(fh,
"#\n# Columns definitions\n");
633 fprintf(fh,
"#%4s %3s %3s %1s %6s %4s %s\n",
634 "base",
"lft",
"rt",
"p",
"attrib",
"tmat",
635 " ... state id's ...");
640 fprintf(fh,
"%5s %3s %3s %1s", m->
ciname[p],
"-",
"-",
"-");
642 if (bin_mdef_is_fillerphone(m, p))
643 fprintf(fh,
" %6s",
"filler");
645 fprintf(fh,
" %6s",
"n/a");
652 for (i = 0; i < n_state; i++) {
662 fprintf(fh,
"%5s %3s %3s %c",
668 if (bin_mdef_is_fillerphone(m, p))
669 fprintf(fh,
" %6s",
"filler");
671 fprintf(fh,
" %6s",
"n/a");
679 for (i = 0; i < n_state; i++) {
685 if (strcmp(filename,
"-") != 0)
701 mid = (low + high) / 2;
702 c = strcmp(ciphone, m->
ciname[mid]);
724 mid = (low + high) / 2;
725 c = strcmp_nocase(ciphone, m->
ciname[mid]);
740 assert(ci < m->n_ciphone);
755 if (lc < 0 || rc < 0)
758 assert((ci >= 0) && (ci < m->n_ciphone));
759 assert((lc >= 0) && (lc < m->n_ciphone));
760 assert((rc >= 0) && (rc < m->n_ciphone));
766 ctx[2] = (m->
sil >= 0
767 && m->
phone[lc].info.
ci.filler) ? m->
sil : lc;
768 ctx[3] = (m->
sil >= 0
769 && m->
phone[rc].info.
ci.filler) ? m->
sil : rc;
779 E_INFO(
"Looking for context %d=%s in %d at %d\n",
780 ctx[level], m->
ciname[ctx[level]],
783 for (i = 0; i < max; ++i) {
785 E_INFO(
"Look at context %d=%s at %d\n",
789 if (cd_tree[i].ctx == ctx[level])
795 E_INFO(
"Found context %d=%s at %d, n_down=%d, down=%d\n",
796 ctx[level], m->
ciname[ctx[level]],
801 if (cd_tree[i].n_down == 0)
802 return cd_tree[i].c.
pid;
814 bin_mdef_phone_id_nearest(
bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos)
841 int newl = l, newr = r;
842 if (m->
phone[(
int)l].info.
ci.filler
845 if (m->
phone[(
int)r].info.
ci.filler
848 if ((newl != l) || (newr != r)) {
873 assert((pid >= 0) && (pid < m->n_phone));
877 if (pid < m->n_ciphone)
880 sprintf(buf,
"%s %s %s %c",
884 wpos_name[m->
phone[pid].info.cd.wpos]);
The main model definition structure.
int16 ctx
Context (word position or CI phone)
int16 n_down
Number of children (0 for leafnode)
int32 n_ciphone
number basephones actually present
POCKETSPHINX_EXPORT int bin_mdef_write(bin_mdef_t *m, const char *filename)
Write a binary mdef to a file.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
word_posn_t wpos
Word position.
int32 n_tmat
Number of transition matrices.
int32 n_sen
Number of senones (CI+CD)
int32 ssid
Senone sequence ID.
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
int32 n_cd_tree
Number of nodes in cd_tree (below)
uint16 ** sseq
Unique senone sequences (2D array built at load time)
#define WPOS_NAME
Printable code for each word position above.
int32 tmat
Transition matrix ID.
cd_tree_t * cd_tree
Tree mapping CD phones to phone IDs.
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
int32 filler
Whether a filler phone; if so, can be substituted by silence phone in left or right context position...
mmio_file_t * filemap
File map for this file (if any)
int32 n_phone
number basephones + number triphones actually present
int16 rc
Base, left, right context ciphones.
char * name
The name of the CI phone.
int16 * cd2cisen
Parent CI-senone id for each senone; the first n_ci_sen are identity mappings; the CD-senones are con...
int32 n_phone
Number of base (CI) phones + (CD) triphones.
uint8 * sseq_len
Number of states in each sseq (NULL for homogeneous)
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
int32 n_sseq
Number of unique senone sequences.
char ** ciname
CI phone names.
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename)
Read a binary mdef from a file.
int32 n_ctx
Number of phones of context.
void mdef_free(mdef_t *mdef)
Free an mdef_t.
#define N_WORD_POSN
total # of word positions (excluding undefined)
int32 n_ci_sen
Number of CI senones; these are the first.
int32 n_ci_sen
number CI senones; these are the first
int32 ssid
State sequence (or senone sequence) ID, considering the n_emit_state senone-ids are a unit...
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
int bin_mdef_phone_id(bin_mdef_t *m, int32 b, int32 l, int32 r, int32 pos)
In: Word position.
int16 sil
CI phone ID for silence.
int16 * cd2cisen
Parent CI-senone id for each senone.
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
enum bin_mdef_s::@4 alloc_mode
Allocation mode for this object.
int32 n_emit_state
Number of emitting states per phone (0 for heterogeneous)
Structures for storing the left context.
uint16 ** sseq
Unique state (or senone) sequences in this model, shared among all phones/triphones.
mdef_t * mdef_init(char *mdeffile, int breport)
Initialize the phone structure from the given model definition file.
int32 down
Next level of the tree (offset from start of cd_trees)
int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, const char *ciphone)
Case-insensitive context-independent phone lookup.
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
int bin_mdef_free(bin_mdef_t *m)
Release a pointer to a binary mdef.
ciphone_t * ciphone
CI-phone information for all ciphones.
POCKETSPHINX_EXPORT bin_mdef_t * bin_mdef_read_text(cmd_ln_t *config, const char *filename)
Read a text mdef from a file (creating an in-memory binary mdef).
bin_mdef_t * bin_mdef_retain(bin_mdef_t *m)
Retain a pointer to a bin_mdef_t.
int32 pid
Phone ID (leafnode)
int32 n_sen
number senones (CI+CD)
int32 tmat
Transition matrix id.
int bin_mdef_phone_str(bin_mdef_t *m, int pid, char *buf)
Create a phone string for the given phone (base or triphone) id in the given buf. ...
int16 * sen2cimap
Parent CI-phone for each senone (CI or CD)
mdef_entry_t * phone
All phone structures.
POCKETSPHINX_EXPORT int bin_mdef_write_text(bin_mdef_t *m, const char *filename)
Write a binary mdef to a text file.
int32 n_ciphone
Number of base (CI) phones.
int32 n_tmat
number transition matrices
int32 n_emit_state
number emitting states per phone
int16 sil
SILENCE_CIPHONE id.
ph_lc_t *** wpos_ci_lclist
wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>.
Structures needed for mapping <ci,lc,rc,wpos> into pid.
phone_t * phone
Information for all ciphones and triphones.