55 my_compare(
const void *a,
const void *b)
58 if (strcmp(*(
char *
const *)a,
"<UNK>") == 0)
60 else if (strcmp(*(
char *
const *)b,
"<UNK>") == 0)
63 return strcmp(*(
char *
const *)a, *(
char *
const *)b);
79 for (i = 0; i <
set->n_models; ++i) {
81 for (j = 0; j < models[i]->
n_words; ++j) {
90 ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab));
94 for (gn = hlist; gn; gn = gnode_next(gn)) {
96 base->
word_str[i++] = (
char *)ent->key;
105 sizeof(**set->widmap));
106 for (i = 0; i < base->
n_words; ++i) {
111 for (j = 0; j <
set->n_models; ++j) {
124 const float32 *weights,
137 lmath = models[0]->
lmath;
138 for (i = 1; i < n_models; ++i) {
141 E_ERROR(
"Log-math parameters don't match, will not create LM set\n");
156 for (i = 0; i < n_models; ++i)
164 for (i = 0; i < n_models; ++i) {
165 model->
lms[i] = models[i];
170 if (models[i]->n > n)
177 build_widmap(base, lmath, n);
183 const char *lmctlfile,
189 __BIGSTACKVARIABLE__
char str[1024];
197 if ((ctlfp = fopen(lmctlfile,
"r")) == NULL) {
204 if ((c = strrchr(lmctlfile,
'/')) || (c = strrchr(lmctlfile,
'\\'))) {
207 memcpy(basedir, lmctlfile, c - lmctlfile + 1);
212 E_INFO(
"Reading LM control file '%s'\n", lmctlfile);
214 E_INFO(
"Will prepend '%s' to unqualified paths\n", basedir);
216 if (fscanf(ctlfp,
"%1023s", str) == 1) {
217 if (strcmp(str,
"{") == 0) {
219 while ((fscanf(ctlfp,
"%1023s", str) == 1)
220 && (strcmp(str,
"}") != 0)) {
226 E_INFO(
"Reading classdef from '%s'\n", deffile);
227 if (read_classdef_file(classes, deffile) < 0) {
234 if (strcmp(str,
"}") != 0) {
235 E_ERROR(
"Unexpected EOF in %s\n", lmctlfile);
240 if (fscanf(ctlfp,
"%1023s", str) != 1)
248 while (str[0] !=
'\0') {
252 if (basedir && str[0] !=
'/' && str[0] !=
'\\')
256 E_INFO(
"Reading lm from '%s'\n", lmfile);
262 if (fscanf(ctlfp,
"%1023s", str) != 1) {
263 E_ERROR(
"LMname missing after LMFileName '%s'\n", lmfile);
271 if (fscanf(ctlfp,
"%1023s", str) == 1) {
272 if (strcmp(str,
"{") == 0) {
274 while ((fscanf(ctlfp,
"%1023s", str) == 1) &&
275 (strcmp(str,
"}") != 0)) {
280 E_ERROR(
"Unknown class %s in control file\n", str);
285 classdef->words, classdef->weights,
286 classdef->n_words) < 0) {
289 E_INFO(
"Added class %s containing %d words\n",
290 str, classdef->n_words);
292 if (strcmp(str,
"}") != 0) {
293 E_ERROR(
"Unexpected EOF in %s\n", lmctlfile);
296 if (fscanf(ctlfp,
"%1023s", str) != 1)
317 lm_array =
ckd_calloc(n_models,
sizeof(*lm_array));
318 name_array =
ckd_calloc(n_models,
sizeof(*name_array));
321 for (i = 0; i < n_models; ++i) {
324 lm_node = gnode_next(lm_node);
325 name_node = gnode_next(name_node);
338 for (gn = lms; gn; gn = gnode_next(gn)) {
343 for (gn = lmnames; gn; gn = gnode_next(gn)) {
348 for (gn = hlist; gn; gn = gnode_next(gn)) {
351 classdef_free(he->
val);
364 return set->n_models;
373 if (
set == NULL || set->n_models == 0)
383 if (++itor->cur == itor->set->
n_models) {
400 if (lmname) *lmname = itor->set->
names[itor->cur];
401 return itor->set->
lms[itor->cur];
415 return set->lms[
set->cur];
419 for (i = 0; i <
set->n_models; ++i)
420 if (0 == strcmp(set->names[i], name))
422 if (i == set->n_models)
435 for (i = 0; i <
set->n_models; ++i)
436 if (0 == strcmp(set->names[i], name))
438 if (i == set->n_models)
441 return set->lms[
set->cur];
452 return set->names[
set->cur];
461 if (set->cur == -1 || set_wid >= base->
n_words)
464 return set->widmap[set_wid][
set->cur];
475 else if (set->cur == -1) {
477 for (i = 0; i <
set->n_models; ++i) {
484 return (set->widmap[set_wid][set->cur]
491 const float32 *weights)
496 if (names && weights) {
500 for (i = 0; i <
set->n_models; ++i) {
501 for (j = 0; j <
set->n_models; ++j)
502 if (0 == strcmp(names[i], set->names[j]))
504 if (j == set->n_models) {
505 E_ERROR(
"Unknown LM name %s\n", names[i]);
512 memcpy(set->lweights, weights, set->n_models *
sizeof(*set->lweights));
533 set->lms =
ckd_realloc(set->lms, set->n_models *
sizeof(*set->lms));
534 set->lms[
set->n_models - 1] = model;
535 set->names =
ckd_realloc(set->names, set->n_models *
sizeof(*set->names));
536 set->names[
set->n_models - 1] =
ckd_salloc(name);
538 if (model->
n > base->
n) {
541 (model->
n - 1) *
sizeof(*set->maphist));
545 fprob = weight * 1.0 /
set->n_models;
547 set->n_models *
sizeof(*set->lweights));
553 for (i = 0; i <
set->n_models - 1; ++i)
554 set->lweights[i] += scale;
562 sizeof (**new_widmap));
563 for (i = 0; i < base->
n_words; ++i) {
565 memcpy(new_widmap[i], set->widmap[i],
566 (set->n_models - 1) *
sizeof(**new_widmap));
571 set->widmap = new_widmap;
574 build_widmap(base, base->
lmath, base->
n);
586 int32 lmidx, scale, n, i;
589 for (lmidx = 0; lmidx <
set->n_models; ++lmidx)
590 if (0 == strcmp(name, set->names[lmidx]))
592 if (lmidx == set->n_models)
594 submodel =
set->lms[lmidx];
606 set->names[lmidx] = NULL;
607 for (i = 0; i <
set->n_models; ++i) {
609 set->lms[i] =
set->lms[i+1];
610 set->names[i] =
set->names[i+1];
611 set->lweights[i] =
set->lweights[i+1];
613 set->lweights[i] -= scale;
614 if (set->lms[i]->n > n)
618 set->lms[
set->n_models] = NULL;
619 set->lweights[
set->n_models] = base->
log_zero;
625 for (i = 0; i < base->
n_words; ++i) {
626 memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1,
627 (set->n_models - lmidx) *
sizeof(**set->widmap));
631 build_widmap(base, base->
lmath, n);
646 for (i = 0; i < base->
n_words; ++i) {
655 set->widmap = (int32 **)
ckd_calloc_2d(n_words, set->n_models,
sizeof(**set->widmap));
657 for (i = 0; i < n_words; ++i) {
661 for (j = 0; j <
set->n_models; ++j) {
668 ngram_model_set_apply_weights(
ngram_model_t *base, float32 lw,
669 float32 wip, float32 uw)
675 for (i = 0; i <
set->n_models; ++i)
682 int32 *history, int32 n_hist,
691 if (n_hist > base->
n - 1)
692 n_hist = base->
n - 1;
695 if (set->cur == -1) {
697 for (i = 0; i <
set->n_models; ++i) {
700 mapwid =
set->widmap[wid][i];
701 for (j = 0; j < n_hist; ++j) {
705 set->maphist[j] =
set->widmap[history[j]][i];
710 mapwid, set->maphist, n_hist, n_used));
716 mapwid =
set->widmap[wid][
set->cur];
717 for (j = 0; j < n_hist; ++j) {
721 set->maphist[j] =
set->widmap[history[j]][
set->cur];
724 mapwid, set->maphist, n_hist, n_used);
732 int32 *history, int32 n_hist,
741 if (n_hist > base->
n - 1)
742 n_hist = base->
n - 1;
745 if (set->cur == -1) {
747 for (i = 0; i <
set->n_models; ++i) {
750 mapwid =
set->widmap[wid][i];
751 for (j = 0; j < n_hist; ++j) {
755 set->maphist[j] =
set->widmap[history[j]][i];
760 mapwid, set->maphist, n_hist, n_used));
766 mapwid =
set->widmap[wid][
set->cur];
767 for (j = 0; j < n_hist; ++j) {
771 set->maphist[j] =
set->widmap[history[j]][
set->cur];
774 mapwid, set->maphist, n_hist, n_used);
782 int32 wid, int32 lweight)
791 newwid =
ckd_calloc(set->n_models,
sizeof(*newwid));
793 for (i = 0; i <
set->n_models; ++i) {
797 if (set->cur == -1 || set->cur == i) {
811 wprob =
ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
814 else if (set->cur == -1)
828 *
sizeof(**set->widmap));
829 for (i = 0; i < base->
n_words; ++i)
830 set->widmap[i] = set->widmap[0] + i * set->n_models;
831 memcpy(set->widmap[wid], newwid, set->n_models *
sizeof(*newwid));
842 for (i = 0; i <
set->n_models; ++i)
845 for (i = 0; i <
set->n_models; ++i)
859 for (i = 0; i <
set->n_models; ++i)
864 ngram_model_set_free,
865 ngram_model_set_apply_weights,
866 ngram_model_set_score,
867 ngram_model_set_raw_score,
868 ngram_model_set_add_ug,
869 ngram_model_set_flush