45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
51 #define M_PI 3.14159265358979323846
55 #include <sphinx_config.h>
56 #include <sphinxbase/cmd_ln.h>
57 #include <sphinxbase/fixpoint.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/bio.h>
60 #include <sphinxbase/err.h>
61 #include <sphinxbase/prim_type.h>
64 #include "s2_semi_mgau.h"
66 #include "posixwin32.h"
70 &s2_semi_mgau_frame_eval,
71 &s2_semi_mgau_mllr_transform,
87 ceplen = s->veclen[feat];
89 for (i = 0; i < s->max_topn; i++) {
90 mfcc_t *mean, diff, sqdiff, compl;
96 cw = topn[i].codeword;
97 mean = s->means[feat][0] + cw * ceplen;
98 var = s->vars[feat][0] + cw * ceplen;
99 d = s->dets[feat][cw];
101 for (j = 0; j < ceplen; j++) {
102 diff = *obs++ - *mean++;
103 sqdiff = MFCCMUL(diff, diff);
104 compl = MFCCMUL(sqdiff, *var);
108 topn[i].score = (int32)d;
112 for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
113 topn[j + 1] = topn[j];
124 mfcc_t *var, *det, *detP, *detE;
127 best = topn = s->
f[feat];
128 worst = topn + (s->max_topn - 1);
129 mean = s->means[feat][0];
130 var = s->vars[feat][0];
132 detE = det + s->n_density;
133 ceplen = s->veclen[feat];
135 for (detP = det; detP < detE; ++detP) {
136 mfcc_t diff, sqdiff, compl;
145 for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
146 diff = *obs++ - *mean++;
147 sqdiff = MFCCMUL(diff, diff);
148 compl = MFCCMUL(sqdiff, *var);
154 mean += (ceplen - j);
158 if ((int32)d < worst->score)
160 for (i = 0; i < s->max_topn; i++) {
162 if (topn[i].codeword == cw)
168 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
172 cur->score = (int32)d;
177 mgau_dist(
s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
179 eval_topn(s, feat, z);
182 if (frame % s->ds_ratio)
199 for (j = 0; j < s->max_topn; ++j) {
200 s->
f[feat][j].score = -((s->
f[feat][j].score >>
SENSCR_SHIFT) - norm);
203 if (s->topn_beam[feat] && s->
f[feat][j].score > s->topn_beam[feat])
211 int16 *senone_scores, uint8 *senone_active,
212 int32 n_senone_active)
215 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
217 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
218 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
219 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
220 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
221 pid_cw4 = s->mixw[i][s->
f[i][4].codeword];
222 pid_cw5 = s->mixw[i][s->
f[i][5].codeword];
224 for (l = j = 0; j < n_senone_active; j++) {
225 int sen = senone_active[j] + l;
226 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
229 pid_cw1[sen] + s->
f[i][1].score);
231 pid_cw2[sen] + s->
f[i][2].score);
233 pid_cw3[sen] + s->
f[i][3].score);
235 pid_cw4[sen] + s->
f[i][4].score);
237 pid_cw5[sen] + s->
f[i][5].score);
239 senone_scores[sen] += tmp;
247 int16 *senone_scores, uint8 *senone_active,
248 int32 n_senone_active)
251 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
253 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
254 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
255 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
256 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
257 pid_cw4 = s->mixw[i][s->
f[i][4].codeword];
259 for (l = j = 0; j < n_senone_active; j++) {
260 int sen = senone_active[j] + l;
261 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
264 pid_cw1[sen] + s->
f[i][1].score);
266 pid_cw2[sen] + s->
f[i][2].score);
268 pid_cw3[sen] + s->
f[i][3].score);
270 pid_cw4[sen] + s->
f[i][4].score);
272 senone_scores[sen] += tmp;
280 int16 *senone_scores, uint8 *senone_active,
281 int32 n_senone_active)
284 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
286 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
287 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
288 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
289 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
291 for (l = j = 0; j < n_senone_active; j++) {
292 int sen = senone_active[j] + l;
293 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
296 pid_cw1[sen] + s->
f[i][1].score);
298 pid_cw2[sen] + s->
f[i][2].score);
300 pid_cw3[sen] + s->
f[i][3].score);
302 senone_scores[sen] += tmp;
310 int16 *senone_scores, uint8 *senone_active,
311 int32 n_senone_active)
314 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
316 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
317 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
318 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
320 for (l = j = 0; j < n_senone_active; j++) {
321 int sen = senone_active[j] + l;
322 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
325 pid_cw1[sen] + s->
f[i][1].score);
327 pid_cw2[sen] + s->
f[i][2].score);
329 senone_scores[sen] += tmp;
337 int16 *senone_scores, uint8 *senone_active,
338 int32 n_senone_active)
341 uint8 *pid_cw0, *pid_cw1;
343 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
344 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
346 for (l = j = 0; j < n_senone_active; j++) {
347 int sen = senone_active[j] + l;
348 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
351 pid_cw1[sen] + s->
f[i][1].score);
353 senone_scores[sen] += tmp;
361 int16 *senone_scores, uint8 *senone_active,
362 int32 n_senone_active)
367 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
368 for (l = j = 0; j < n_senone_active; j++) {
369 int sen = senone_active[j] + l;
370 int32 tmp = pid_cw0[sen] + s->
f[i][0].score;
371 senone_scores[sen] += tmp;
379 int16 *senone_scores, uint8 *senone_active,
380 int32 n_senone_active)
384 for (l = j = 0; j < n_senone_active; j++) {
385 int sen = senone_active[j] + l;
388 pid_cw = s->mixw[i][s->
f[i][0].codeword];
389 tmp = pid_cw[sen] + s->
f[i][0].score;
390 for (k = 1; k < topn; ++k) {
391 pid_cw = s->mixw[i][s->
f[i][k].codeword];
393 pid_cw[sen] + s->
f[i][k].score);
395 senone_scores[sen] += tmp;
403 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
407 return get_scores_8b_feat_6(s, i, senone_scores,
408 senone_active, n_senone_active);
410 return get_scores_8b_feat_5(s, i, senone_scores,
411 senone_active, n_senone_active);
413 return get_scores_8b_feat_4(s, i, senone_scores,
414 senone_active, n_senone_active);
416 return get_scores_8b_feat_3(s, i, senone_scores,
417 senone_active, n_senone_active);
419 return get_scores_8b_feat_2(s, i, senone_scores,
420 senone_active, n_senone_active);
422 return get_scores_8b_feat_1(s, i, senone_scores,
423 senone_active, n_senone_active);
425 return get_scores_8b_feat_any(s, i, topn, senone_scores,
426 senone_active, n_senone_active);
431 get_scores_8b_feat_all(
s2_semi_mgau_t * s,
int i,
int topn, int16 *senone_scores)
435 for (j = 0; j < s->n_sen; j++) {
438 pid_cw = s->mixw[i][s->
f[i][0].codeword];
439 tmp = pid_cw[j] + s->
f[i][0].score;
440 for (k = 1; k < topn; ++k) {
441 pid_cw = s->mixw[i][s->
f[i][k].codeword];
443 pid_cw[j] + s->
f[i][k].score);
445 senone_scores[j] += tmp;
452 int16 *senone_scores, uint8 *senone_active,
453 int32 n_senone_active)
456 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
460 for (j = 0; j < 16; ++j) {
461 w_den[0][j] = s->mixw_cb[j] + s->
f[i][0].score;
462 w_den[1][j] = s->mixw_cb[j] + s->
f[i][1].score;
463 w_den[2][j] = s->mixw_cb[j] + s->
f[i][2].score;
464 w_den[3][j] = s->mixw_cb[j] + s->
f[i][3].score;
465 w_den[4][j] = s->mixw_cb[j] + s->
f[i][4].score;
466 w_den[5][j] = s->mixw_cb[j] + s->
f[i][5].score;
469 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
470 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
471 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
472 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
473 pid_cw4 = s->mixw[i][s->
f[i][4].codeword];
474 pid_cw5 = s->mixw[i][s->
f[i][5].codeword];
476 for (l = j = 0; j < n_senone_active; j++) {
477 int n = senone_active[j] + l;
481 cw = pid_cw0[n/2] >> 4;
483 cw = pid_cw1[n/2] >> 4;
485 cw = pid_cw2[n/2] >> 4;
487 cw = pid_cw3[n/2] >> 4;
489 cw = pid_cw4[n/2] >> 4;
491 cw = pid_cw5[n/2] >> 4;
495 cw = pid_cw0[n/2] & 0x0f;
497 cw = pid_cw1[n/2] & 0x0f;
499 cw = pid_cw2[n/2] & 0x0f;
501 cw = pid_cw3[n/2] & 0x0f;
503 cw = pid_cw4[n/2] & 0x0f;
505 cw = pid_cw5[n/2] & 0x0f;
508 senone_scores[n] += tmp;
516 int16 *senone_scores, uint8 *senone_active,
517 int32 n_senone_active)
520 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
524 for (j = 0; j < 16; ++j) {
525 w_den[0][j] = s->mixw_cb[j] + s->
f[i][0].score;
526 w_den[1][j] = s->mixw_cb[j] + s->
f[i][1].score;
527 w_den[2][j] = s->mixw_cb[j] + s->
f[i][2].score;
528 w_den[3][j] = s->mixw_cb[j] + s->
f[i][3].score;
529 w_den[4][j] = s->mixw_cb[j] + s->
f[i][4].score;
532 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
533 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
534 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
535 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
536 pid_cw4 = s->mixw[i][s->
f[i][4].codeword];
538 for (l = j = 0; j < n_senone_active; j++) {
539 int n = senone_active[j] + l;
543 cw = pid_cw0[n/2] >> 4;
545 cw = pid_cw1[n/2] >> 4;
547 cw = pid_cw2[n/2] >> 4;
549 cw = pid_cw3[n/2] >> 4;
551 cw = pid_cw4[n/2] >> 4;
555 cw = pid_cw0[n/2] & 0x0f;
557 cw = pid_cw1[n/2] & 0x0f;
559 cw = pid_cw2[n/2] & 0x0f;
561 cw = pid_cw3[n/2] & 0x0f;
563 cw = pid_cw4[n/2] & 0x0f;
566 senone_scores[n] += tmp;
574 int16 *senone_scores, uint8 *senone_active,
575 int32 n_senone_active)
578 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
582 for (j = 0; j < 16; ++j) {
583 w_den[0][j] = s->mixw_cb[j] + s->
f[i][0].score;
584 w_den[1][j] = s->mixw_cb[j] + s->
f[i][1].score;
585 w_den[2][j] = s->mixw_cb[j] + s->
f[i][2].score;
586 w_den[3][j] = s->mixw_cb[j] + s->
f[i][3].score;
589 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
590 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
591 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
592 pid_cw3 = s->mixw[i][s->
f[i][3].codeword];
594 for (l = j = 0; j < n_senone_active; j++) {
595 int n = senone_active[j] + l;
599 cw = pid_cw0[n/2] >> 4;
601 cw = pid_cw1[n/2] >> 4;
603 cw = pid_cw2[n/2] >> 4;
605 cw = pid_cw3[n/2] >> 4;
609 cw = pid_cw0[n/2] & 0x0f;
611 cw = pid_cw1[n/2] & 0x0f;
613 cw = pid_cw2[n/2] & 0x0f;
615 cw = pid_cw3[n/2] & 0x0f;
618 senone_scores[n] += tmp;
626 int16 *senone_scores, uint8 *senone_active,
627 int32 n_senone_active)
630 uint8 *pid_cw0, *pid_cw1, *pid_cw2;
634 for (j = 0; j < 16; ++j) {
635 w_den[0][j] = s->mixw_cb[j] + s->
f[i][0].score;
636 w_den[1][j] = s->mixw_cb[j] + s->
f[i][1].score;
637 w_den[2][j] = s->mixw_cb[j] + s->
f[i][2].score;
640 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
641 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
642 pid_cw2 = s->mixw[i][s->
f[i][2].codeword];
644 for (l = j = 0; j < n_senone_active; j++) {
645 int n = senone_active[j] + l;
649 cw = pid_cw0[n/2] >> 4;
651 cw = pid_cw1[n/2] >> 4;
653 cw = pid_cw2[n/2] >> 4;
657 cw = pid_cw0[n/2] & 0x0f;
659 cw = pid_cw1[n/2] & 0x0f;
661 cw = pid_cw2[n/2] & 0x0f;
664 senone_scores[n] += tmp;
672 int16 *senone_scores, uint8 *senone_active,
673 int32 n_senone_active)
676 uint8 *pid_cw0, *pid_cw1;
680 for (j = 0; j < 16; ++j) {
681 w_den[0][j] = s->mixw_cb[j] + s->
f[i][0].score;
682 w_den[1][j] = s->mixw_cb[j] + s->
f[i][1].score;
685 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
686 pid_cw1 = s->mixw[i][s->
f[i][1].codeword];
688 for (l = j = 0; j < n_senone_active; j++) {
689 int n = senone_active[j] + l;
693 cw = pid_cw0[n/2] >> 4;
695 cw = pid_cw1[n/2] >> 4;
699 cw = pid_cw0[n/2] & 0x0f;
701 cw = pid_cw1[n/2] & 0x0f;
704 senone_scores[n] += tmp;
712 int16 *senone_scores, uint8 *senone_active,
713 int32 n_senone_active)
720 for (j = 0; j < 16; ++j) {
721 w_den[j] = s->mixw_cb[j] + s->
f[i][0].score;
724 pid_cw0 = s->mixw[i][s->
f[i][0].codeword];
726 for (l = j = 0; j < n_senone_active; j++) {
727 int n = senone_active[j] + l;
731 cw = pid_cw0[n/2] >> 4;
735 cw = pid_cw0[n/2] & 0x0f;
738 senone_scores[n] += tmp;
746 int16 *senone_scores, uint8 *senone_active,
747 int32 n_senone_active)
751 for (l = j = 0; j < n_senone_active; j++) {
752 int n = senone_active[j] + l;
756 pid_cw = s->mixw[i][s->
f[i][0].codeword];
758 cw = pid_cw[n/2] >> 4;
760 cw = pid_cw[n/2] & 0x0f;
761 tmp = s->mixw_cb[cw] + s->
f[i][0].score;
762 for (k = 1; k < topn; ++k) {
763 pid_cw = s->mixw[i][s->
f[i][k].codeword];
765 cw = pid_cw[n/2] >> 4;
767 cw = pid_cw[n/2] & 0x0f;
769 s->mixw_cb[cw] + s->
f[i][k].score);
771 senone_scores[n] += tmp;
779 int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
783 return get_scores_4b_feat_6(s, i, senone_scores,
784 senone_active, n_senone_active);
786 return get_scores_4b_feat_5(s, i, senone_scores,
787 senone_active, n_senone_active);
789 return get_scores_4b_feat_4(s, i, senone_scores,
790 senone_active, n_senone_active);
792 return get_scores_4b_feat_3(s, i, senone_scores,
793 senone_active, n_senone_active);
795 return get_scores_4b_feat_2(s, i, senone_scores,
796 senone_active, n_senone_active);
798 return get_scores_4b_feat_1(s, i, senone_scores,
799 senone_active, n_senone_active);
801 return get_scores_4b_feat_any(s, i, topn, senone_scores,
802 senone_active, n_senone_active);
807 get_scores_4b_feat_all(
s2_semi_mgau_t * s,
int i,
int topn, int16 *senone_scores)
813 last_sen = s->n_sen & ~1;
814 while (j < last_sen) {
819 pid_cw = s->mixw[i][s->
f[i][0].codeword];
820 tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->
f[i][0].score;
821 tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->
f[i][0].score;
822 for (k = 1; k < topn; ++k) {
823 int32 w_den0, w_den1;
825 pid_cw = s->mixw[i][s->
f[i][k].codeword];
826 w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->
f[i][k].score;
827 w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->
f[i][k].score;
831 senone_scores[j++] += tmp0;
832 senone_scores[j++] += tmp1;
842 int16 *senone_scores,
843 uint8 *senone_active,
844 int32 n_senone_active,
845 mfcc_t ** featbuf, int32 frame,
851 memset(senone_scores, 0, s->n_sen *
sizeof(*senone_scores));
857 for (i = 0; i < s->n_feat; ++i) {
859 if (frame >= ps_mgau_base(ps)->frame_idx) {
865 memcpy(s->
f[i], lastf[i],
sizeof(
vqFeature_t) * s->max_topn);
866 mgau_dist(s, frame, i, featbuf[i]);
871 get_scores_4b_feat_all(s, i, s->
topn_hist_n[topn_idx][i], senone_scores);
873 get_scores_4b_feat(s, i, s->
topn_hist_n[topn_idx][i], senone_scores,
874 senone_active, n_senone_active);
878 get_scores_8b_feat_all(s, i, s->
topn_hist_n[topn_idx][i], senone_scores);
880 get_scores_8b_feat(s, i, s->
topn_hist_n[topn_idx][i], senone_scores,
881 senone_active, n_senone_active);
894 int32 do_swap, do_mmap;
895 size_t filesize, offset;
897 int n_feat = s->n_feat;
898 int n_density = s->n_density;
899 int n_sen = bin_mdef_n_sen(mdef);
903 do_mmap = cmd_ln_boolean_r(s->config,
"-mmap");
905 if ((fp = fopen(file,
"rb")) == NULL)
908 E_INFO(
"Loading senones from dump file %s\n", file);
910 if (fread(&n,
sizeof(int32), 1, fp) != 1) {
911 E_ERROR_SYSTEM(
"Failed to read title size from %s", file);
916 if (n < 1 || n > 999) {
918 if (n < 1 || n > 999) {
919 E_ERROR(
"Title length %x in dump file %s out of range\n", n, file);
924 if (fread(line,
sizeof(
char), n, fp) != n) {
925 E_ERROR_SYSTEM(
"Cannot read title");
928 if (line[n - 1] !=
'\0') {
929 E_ERROR(
"Bad title in dump file\n");
932 E_INFO(
"%s\n", line);
935 if (fread(&n,
sizeof(n), 1, fp) != 1) {
936 E_ERROR_SYSTEM(
"Failed to read header size from %s", file);
939 if (do_swap) SWAP_INT32(&n);
940 if (fread(line,
sizeof(
char), n, fp) != n) {
941 E_ERROR_SYSTEM(
"Cannot read header");
944 if (line[n - 1] !=
'\0') {
945 E_ERROR(
"Bad header in dump file\n");
951 if (fread(&n,
sizeof(n), 1, fp) != 1) {
952 E_ERROR_SYSTEM(
"Failed to read header string size from %s", file);
955 if (do_swap) SWAP_INT32(&n);
958 if (fread(line,
sizeof(
char), n, fp) != n) {
959 E_ERROR_SYSTEM(
"Cannot read header");
963 if (!strncmp(line,
"feature_count ", strlen(
"feature_count "))) {
964 n_feat = atoi(line + strlen(
"feature_count "));
966 if (!strncmp(line,
"mixture_count ", strlen(
"mixture_count "))) {
967 n_density = atoi(line + strlen(
"mixture_count "));
969 if (!strncmp(line,
"model_count ", strlen(
"model_count "))) {
970 n_sen = atoi(line + strlen(
"model_count "));
972 if (!strncmp(line,
"cluster_count ", strlen(
"cluster_count "))) {
973 n_clust = atoi(line + strlen(
"cluster_count "));
975 if (!strncmp(line,
"cluster_bits ", strlen(
"cluster_bits "))) {
976 n_bits = atoi(line + strlen(
"cluster_bits "));
985 if (fread(&r,
sizeof(r), 1, fp) != 1) {
986 E_ERROR_SYSTEM(
"Cannot read #rows");
989 if (do_swap) SWAP_INT32(&r);
990 if (fread(&c,
sizeof(c), 1, fp) != 1) {
991 E_ERROR_SYSTEM(
"Cannot read #columns");
994 if (do_swap) SWAP_INT32(&c);
995 E_INFO(
"Rows: %d, Columns: %d\n", r, c);
998 if (n_feat != s->n_feat) {
999 E_ERROR(
"Number of feature streams mismatch: %d != %d\n",
1003 if (n_density != s->n_density) {
1004 E_ERROR(
"Number of densities mismatch: %d != %d\n",
1005 n_density, s->n_density);
1008 if (n_sen != s->n_sen) {
1009 E_ERROR(
"Number of senones mismatch: %d != %d\n",
1014 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
1015 E_ERROR(
"Cluster count must be 0, 15, or 16\n");
1021 if (!((n_bits == 8) || (n_bits == 4))) {
1022 E_ERROR(
"Cluster count must be 4 or 8\n");
1027 E_INFO(
"Using memory-mapped I/O for senones\n");
1030 fseek(fp, 0, SEEK_END);
1031 filesize = ftell(fp);
1032 fseek(fp, offset, SEEK_SET);
1036 s->sendump_mmap = mmio_file_read(file);
1039 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1046 s->mixw_cb = ckd_calloc(1, n_clust);
1047 if (fread(s->mixw_cb, 1, n_clust, fp) != (
size_t) n_clust) {
1048 E_ERROR(
"Failed to read %d bytes from sendump\n", n_clust);
1055 if (s->sendump_mmap) {
1056 s->mixw = ckd_calloc_2d(s->n_feat, n_density,
sizeof(*s->mixw));
1057 for (n = 0; n < n_feat; n++) {
1060 step = (step + 1) / 2;
1061 for (i = 0; i < r; i++) {
1062 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1068 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen,
sizeof(***s->mixw));
1070 for (n = 0; n < n_feat; n++) {
1073 step = (step + 1) / 2;
1074 for (i = 0; i < r; i++) {
1075 if (fread(s->mixw[n][i],
sizeof(***s->mixw), step, fp)
1077 E_ERROR(
"Failed to read %d bytes from sendump\n", step);
1092 read_mixw(
s2_semi_mgau_t * s,
char const *file_name,
double SmoothMin)
1094 char **argname, **argval;
1097 int32 byteswap, chksum_present;
1106 E_INFO(
"Reading mixture weights file '%s'\n", file_name);
1108 if ((fp = fopen(file_name,
"rb")) == NULL)
1109 E_FATAL(
"Failed to open mixture weights file '%s' for reading: %s\n", file_name, strerror(errno));
1112 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
1113 E_FATAL(
"Failed to read header from file '%s'\n", file_name);
1117 for (i = 0; argname[i]; i++) {
1118 if (strcmp(argname[i],
"version") == 0) {
1119 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
1120 E_WARN(
"Version mismatch(%s): %s, expecting %s\n",
1121 file_name, argval[i], MGAU_MIXW_VERSION);
1123 else if (strcmp(argname[i],
"chksum0") == 0) {
1127 bio_hdrarg_free(argname, argval);
1128 argname = argval = NULL;
1133 if ((bio_fread(&n_sen,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)
1134 || (bio_fread(&n_feat,
sizeof(int32), 1, fp, byteswap, &chksum) !=
1136 || (bio_fread(&n_comp,
sizeof(int32), 1, fp, byteswap, &chksum) !=
1138 || (bio_fread(&n,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
1139 E_FATAL(
"bio_fread(%s) (arraysize) failed\n", file_name);
1141 if (n_feat != s->n_feat)
1142 E_FATAL(
"#Features streams(%d) != %d\n", n_feat, s->n_feat);
1143 if (n != n_sen * n_feat * n_comp) {
1145 (
"%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
1146 file_name, i, n_sen, n_feat, n_comp);
1155 s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen,
sizeof(***s->mixw));
1158 pdf = (float32 *) ckd_calloc(n_comp,
sizeof(float32));
1162 for (i = 0; i < n_sen; i++) {
1163 for (f = 0; f < n_feat; f++) {
1164 if (bio_fread((
void *) pdf,
sizeof(float32),
1165 n_comp, fp, byteswap, &chksum) != n_comp) {
1166 E_FATAL(
"bio_fread(%s) (arraydata) failed\n", file_name);
1170 if (vector_sum_norm(pdf, n_comp) <= 0.0)
1172 vector_floor(pdf, n_comp, SmoothMin);
1173 vector_sum_norm(pdf, n_comp);
1176 for (c = 0; c < n_comp; c++) {
1179 qscr = -logmath_log(s->lmath_8b, pdf[c]);
1182 s->mixw[f][c][i] = qscr;
1187 E_WARN(
"Weight normalization failed for %d senones\n", n_err);
1192 bio_verify_chksum(fp, byteswap, chksum);
1194 if (fread(&eofchk, 1, 1, fp) == 1)
1195 E_FATAL(
"More data than expected in %s\n", file_name);
1199 E_INFO(
"Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
1205 split_topn(
char const *str, uint8 *out,
int nfeat)
1207 char *topn_list = ckd_salloc(str);
1214 while (i < nfeat && (cc = strchr(c,
',')) != NULL) {
1217 if (out[i] > maxn) maxn = out[i];
1221 if (i < nfeat && *c !=
'\0') {
1223 if (out[i] > maxn) maxn = out[i];
1229 ckd_free(topn_list);
1235 s2_semi_mgau_init(
acmod_t *acmod)
1239 char const *sendump_path;
1242 s = ckd_calloc(1,
sizeof(*s));
1243 s->config = acmod->
config;
1245 s->lmath = logmath_retain(acmod->
lmath);
1248 if (s->lmath_8b == NULL)
1251 if (logmath_get_width(s->lmath_8b) != 1) {
1252 E_ERROR(
"Log base %f is too small to represent add table in 8 bits\n",
1253 logmath_get_base(s->lmath_8b));
1258 if ((s->g = gauden_init(cmd_ln_str_r(s->config,
"-mean"),
1259 cmd_ln_str_r(s->config,
"-var"),
1260 cmd_ln_float32_r(s->config,
"-varfloor"),
1267 s->means = s->g->
mean[0];
1268 s->vars = s->g->
var[0];
1269 s->dets = s->g->
det[0];
1272 s->n_feat = s->g->
n_feat;
1273 if (s->n_feat != feat_dimension1(acmod->
fcb)) {
1274 E_ERROR(
"Number of streams does not match: %d != %d\n",
1275 s->n_feat, feat_dimension(acmod->
fcb));
1278 for (i = 0; i < s->n_feat; ++i) {
1279 if (s->veclen[i] != feat_dimension2(acmod->
fcb, i)) {
1280 E_ERROR(
"Dimension of stream %d does not match: %d != %d\n",
1281 s->veclen[i], feat_dimension2(acmod->
fcb, i));
1287 if ((sendump_path = cmd_ln_str_r(s->config,
"-sendump"))) {
1288 if (read_sendump(s, acmod->
mdef, sendump_path) < 0) {
1293 if (read_mixw(s, cmd_ln_str_r(s->config,
"-mixw"),
1294 cmd_ln_float32_r(s->config,
"-mixwfloor")) < 0) {
1298 s->ds_ratio = cmd_ln_int32_r(s->config,
"-ds");
1301 s->topn_beam = ckd_calloc(s->n_feat,
sizeof(*s->topn_beam));
1302 s->max_topn = cmd_ln_int32_r(s->config,
"-topn");
1303 split_topn(cmd_ln_str_r(s->config,
"-topn_beam"), s->topn_beam, s->n_feat);
1304 E_INFO(
"Maximum top-N: %d ", s->max_topn);
1305 E_INFOCONT(
"Top-N beams:");
1306 for (i = 0; i < s->n_feat; ++i) {
1307 E_INFOCONT(
" %d", s->topn_beam[i]);
1312 s->
n_topn_hist = cmd_ln_int32_r(s->config,
"-pl_window") + 2;
1314 ckd_calloc_3d(s->
n_topn_hist, s->n_feat, s->max_topn,
1320 for (j = 0; j < s->n_feat; ++j) {
1322 for (k = 0; k < s->max_topn; ++k) {
1323 s->
topn_hist[i][j][k].score = WORST_DIST;
1330 ps->
vt = &s2_semi_mgau_funcs;
1333 s2_semi_mgau_free(ps_mgau_base(s));
1338 s2_semi_mgau_mllr_transform(
ps_mgau_t *ps,
1342 return gauden_mllr_transform(s->g, mllr, s->config);
1350 logmath_free(s->lmath);
1351 logmath_free(s->lmath_8b);
1352 if (s->sendump_mmap) {
1353 ckd_free_2d(s->mixw);
1354 mmio_file_unmap(s->sendump_mmap);
1357 ckd_free_3d(s->mixw);
1360 ckd_free(s->topn_beam);