00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "ckd_alloc.h"
00044 #include "ngram_model_dmp.h"
00045 #include "pio.h"
00046 #include "err.h"
00047 #include "byteorder.h"
00048 #include "listelem_alloc.h"
00049
00050 #include <stdio.h>
00051 #include <string.h>
00052 #include <stdlib.h>
00053 #include <limits.h>
00054
00055 static const char darpa_hdr[] = "Darpa Trigram LM";
00056 static ngram_funcs_t ngram_model_dmp_funcs;
00057
00058 #define TSEG_BASE(m,b) ((m)->lm3g.tseg_base[(b)>>LOG_BG_SEG_SZ])
00059 #define FIRST_BG(m,u) ((m)->lm3g.unigrams[u].bigrams)
00060 #define FIRST_TG(m,b) (TSEG_BASE((m),(b))+((m)->lm3g.bigrams[b].trigrams))
00061
00062 static unigram_t *
00063 new_unigram_table(int32 n_ug)
00064 {
00065 unigram_t *table;
00066 int32 i;
00067
00068 table = ckd_calloc(n_ug, sizeof(unigram_t));
00069 for (i = 0; i < n_ug; i++) {
00070 table[i].prob1.f = -99.0;
00071 table[i].bo_wt1.f = -99.0;
00072 }
00073 return table;
00074 }
00075
00076 ngram_model_t *
00077 ngram_model_dmp_read(cmd_ln_t *config,
00078 const char *file_name,
00079 logmath_t *lmath)
00080 {
00081 ngram_model_t *base;
00082 ngram_model_dmp_t *model;
00083 FILE *fp;
00084 int do_mmap, do_swap;
00085 int32 is_pipe;
00086 int32 i, j, k, vn, n, ts;
00087 int32 n_unigram;
00088 int32 n_bigram;
00089 int32 n_trigram;
00090 char str[1024];
00091 unigram_t *ugptr;
00092 bigram_t *bgptr;
00093 trigram_t *tgptr;
00094 char *tmp_word_str;
00095 char *map_base = NULL;
00096 size_t offset = 0, filesize;
00097
00098 do_mmap = FALSE;
00099 if (config)
00100 do_mmap = cmd_ln_boolean_r(config, "-mmap");
00101
00102 if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) {
00103 E_ERROR("Dump file %s not found\n", file_name);
00104 return NULL;
00105 }
00106
00107 if (is_pipe && do_mmap) {
00108 E_WARN("Dump file is compressed, will not use memory-mapped I/O\n");
00109 do_mmap = 0;
00110 }
00111
00112 do_swap = FALSE;
00113 fread(&k, sizeof(k), 1, fp);
00114 if (k != strlen(darpa_hdr)+1) {
00115 SWAP_INT32(&k);
00116 if (k != strlen(darpa_hdr)+1) {
00117 E_ERROR("Wrong magic header size number %x: %s is not a dump file\n", k, file_name);
00118 fclose(fp);
00119 return NULL;
00120 }
00121 do_swap = 1;
00122 }
00123 if (fread(str, sizeof(char), k, fp) != (size_t) k) {
00124 E_ERROR("Cannot read header\n");
00125 fclose_comp(fp, is_pipe);
00126 return NULL;
00127 }
00128 if (strncmp(str, darpa_hdr, k) != 0) {
00129 E_ERROR("Wrong header %s: %s is not a dump file\n", darpa_hdr);
00130 fclose(fp);
00131 return NULL;
00132 }
00133
00134 if (do_mmap) {
00135 if (do_swap) {
00136 E_INFO
00137 ("Byteswapping required, will not use memory-mapped I/O for LM file\n");
00138 do_mmap = 0;
00139 }
00140 else {
00141 E_INFO("Will use memory-mapped I/O for LM file\n");
00142 #ifdef __ADSPBLACKFIN__
00143 E_FATAL("memory mapping is not supported at the moment.");
00144 #else
00145 #endif
00146 }
00147 }
00148
00149 fread(&k, sizeof(k), 1, fp);
00150 if (do_swap) SWAP_INT32(&k);
00151 if (fread(str, sizeof(char), k, fp) != (size_t) k) {
00152 E_ERROR("Cannot read LM filename in header\n");
00153 fclose(fp);
00154 return NULL;
00155 }
00156
00157
00158 fread(&vn, sizeof(vn), 1, fp);
00159 if (do_swap) SWAP_INT32(&vn);
00160 if (vn <= 0) {
00161
00162 fread(&ts, sizeof(ts), 1, fp);
00163 if (do_swap) SWAP_INT32(&ts);
00164
00165
00166 for (;;) {
00167 fread(&k, sizeof(k), 1, fp);
00168 if (do_swap) SWAP_INT32(&k);
00169 if (k == 0)
00170 break;
00171 if (fread(str, sizeof(char), k, fp) != (size_t) k) {
00172 E_ERROR("fread(word) failed\n");
00173 fclose(fp);
00174 return NULL;
00175 }
00176 }
00177
00178 fread(&n_unigram, sizeof(n_unigram), 1, fp);
00179 if (do_swap) SWAP_INT32(&n_unigram);
00180 }
00181 else {
00182 n_unigram = vn;
00183 }
00184
00185
00186 fread(&n_bigram, sizeof(n_bigram), 1, fp);
00187 if (do_swap) SWAP_INT32(&n_bigram);
00188 fread(&n_trigram, sizeof(n_trigram), 1, fp);
00189 if (do_swap) SWAP_INT32(&n_trigram);
00190 E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram);
00191
00192
00193 model = ckd_calloc(1, sizeof(*model));
00194 base = &model->base;
00195 if (n_trigram > 0)
00196 n = 3;
00197 else if (n_bigram > 0)
00198 n = 2;
00199 else
00200 n = 1;
00201 ngram_model_init(base, &ngram_model_dmp_funcs, lmath, n, n_unigram);
00202 base->n_counts[0] = n_unigram;
00203 base->n_counts[1] = n_bigram;
00204 base->n_counts[2] = n_trigram;
00205
00206
00207
00208 model->lm3g.unigrams = new_unigram_table(n_unigram + 1);
00209 ugptr = model->lm3g.unigrams;
00210 for (i = 0; i <= n_unigram; ++i) {
00211
00212 if (fread(ugptr, sizeof(int32), 1, fp) != 1) {
00213 E_ERROR("fread(mapid[%d]) failed\n", i);
00214 ngram_model_free(base);
00215 fclose_comp(fp, is_pipe);
00216 return NULL;
00217 }
00218
00219 if (fread(ugptr, sizeof(unigram_t), 1, fp) != 1) {
00220 E_ERROR("fread(unigrams) failed\n");
00221 ngram_model_free(base);
00222 fclose_comp(fp, is_pipe);
00223 return NULL;
00224 }
00225
00226 if (do_swap) {
00227 SWAP_INT32(&ugptr->prob1.l);
00228 SWAP_INT32(&ugptr->bo_wt1.l);
00229 SWAP_INT32(&ugptr->bigrams);
00230 }
00231
00232 ugptr->prob1.l = logmath_log10_to_log(lmath, ugptr->prob1.f);
00233 ugptr->bo_wt1.l = logmath_log10_to_log(lmath, ugptr->bo_wt1.f);
00234 ++ugptr;
00235 }
00236 E_INFO("%8d = LM.unigrams(+trailer) read\n", n_unigram);
00237
00238
00239 if (do_mmap) {
00240 offset = ftell(fp);
00241 fseek(fp, 0, SEEK_END);
00242 filesize = ftell(fp);
00243 fseek(fp, offset, SEEK_SET);
00244
00245
00246 if (offset & 0x3) {
00247 E_WARN("-mmap specified, but tseg_base is not word-aligned. Will not memory-map.\n");
00248 do_mmap = FALSE;
00249 }
00250 else {
00251 model->dump_mmap = mmio_file_read(file_name);
00252 if (model->dump_mmap == NULL) {
00253 do_mmap = FALSE;
00254 }
00255 else {
00256 map_base = mmio_file_ptr(model->dump_mmap);
00257 }
00258 }
00259 }
00260
00261
00262 if (do_mmap) {
00263 model->lm3g.bigrams = (bigram_t *) (map_base + offset);
00264 offset += (n_bigram + 1) * sizeof(bigram_t);
00265 }
00266 else {
00267 model->lm3g.bigrams =
00268 ckd_calloc(n_bigram + 1, sizeof(bigram_t));
00269 if (fread(model->lm3g.bigrams, sizeof(bigram_t), n_bigram + 1, fp)
00270 != (size_t) n_bigram + 1) {
00271 E_ERROR("fread(bigrams) failed\n");
00272 ngram_model_free(base);
00273 fclose_comp(fp, is_pipe);
00274 return NULL;
00275 }
00276 if (do_swap) {
00277 for (i = 0, bgptr = model->lm3g.bigrams; i <= n_bigram;
00278 i++, bgptr++) {
00279 SWAP_INT16(&bgptr->wid);
00280 SWAP_INT16(&bgptr->prob2);
00281 SWAP_INT16(&bgptr->bo_wt2);
00282 SWAP_INT16(&bgptr->trigrams);
00283 }
00284 }
00285 }
00286 E_INFO("%8d = LM.bigrams(+trailer) read\n", n_bigram);
00287
00288
00289 if (n_trigram > 0) {
00290 if (do_mmap) {
00291 model->lm3g.trigrams = (trigram_t *) (map_base + offset);
00292 offset += n_trigram * sizeof(trigram_t);
00293 }
00294 else {
00295 model->lm3g.trigrams =
00296 ckd_calloc(n_trigram, sizeof(trigram_t));
00297 if (fread
00298 (model->lm3g.trigrams, sizeof(trigram_t), n_trigram, fp)
00299 != (size_t) n_trigram) {
00300 E_ERROR("fread(trigrams) failed\n");
00301 ngram_model_free(base);
00302 fclose_comp(fp, is_pipe);
00303 return NULL;
00304 }
00305 if (do_swap) {
00306 for (i = 0, tgptr = model->lm3g.trigrams; i < n_trigram;
00307 i++, tgptr++) {
00308 SWAP_INT16(&tgptr->wid);
00309 SWAP_INT16(&tgptr->prob3);
00310 }
00311 }
00312 }
00313 E_INFO("%8d = LM.trigrams read\n", n_trigram);
00314
00315 model->lm3g.tginfo = ckd_calloc(n_unigram, sizeof(tginfo_t *));
00316 model->lm3g.le = listelem_alloc_init(sizeof(tginfo_t));
00317 }
00318
00319
00320 if (do_mmap)
00321 fseek(fp, offset, SEEK_SET);
00322 fread(&k, sizeof(k), 1, fp);
00323 if (do_swap) SWAP_INT32(&k);
00324 model->lm3g.n_prob2 = k;
00325 model->lm3g.prob2 = ckd_calloc(k, sizeof(*model->lm3g.prob2));
00326 if (fread(model->lm3g.prob2, sizeof(*model->lm3g.prob2), k, fp) != (size_t) k) {
00327 E_ERROR("fread(prob2) failed\n");
00328 ngram_model_free(base);
00329 fclose_comp(fp, is_pipe);
00330 return NULL;
00331 }
00332 for (i = 0; i < k; i++) {
00333 if (do_swap)
00334 SWAP_INT32(&model->lm3g.prob2[i].l);
00335
00336 model->lm3g.prob2[i].l = logmath_log10_to_log(lmath, model->lm3g.prob2[i].f);
00337 }
00338 E_INFO("%8d = LM.prob2 entries read\n", k);
00339
00340
00341 if (base->n > 2) {
00342 fread(&k, sizeof(k), 1, fp);
00343 if (do_swap) SWAP_INT32(&k);
00344 model->lm3g.n_bo_wt2 = k;
00345 model->lm3g.bo_wt2 = ckd_calloc(k, sizeof(*model->lm3g.bo_wt2));
00346 if (fread(model->lm3g.bo_wt2, sizeof(*model->lm3g.bo_wt2), k, fp) != (size_t) k) {
00347 E_ERROR("fread(bo_wt2) failed\n");
00348 ngram_model_free(base);
00349 fclose_comp(fp, is_pipe);
00350 return NULL;
00351 }
00352 for (i = 0; i < k; i++) {
00353 if (do_swap)
00354 SWAP_INT32(&model->lm3g.bo_wt2[i].l);
00355
00356 model->lm3g.bo_wt2[i].l = logmath_log10_to_log(lmath, model->lm3g.bo_wt2[i].f);
00357 }
00358 E_INFO("%8d = LM.bo_wt2 entries read\n", k);
00359 }
00360
00361
00362 if (base->n > 2) {
00363 fread(&k, sizeof(k), 1, fp);
00364 if (do_swap) SWAP_INT32(&k);
00365 model->lm3g.n_prob3 = k;
00366 model->lm3g.prob3 = ckd_calloc(k, sizeof(*model->lm3g.prob3));
00367 if (fread(model->lm3g.prob3, sizeof(*model->lm3g.prob3), k, fp) != (size_t) k) {
00368 E_ERROR("fread(prob3) failed\n");
00369 ngram_model_free(base);
00370 fclose_comp(fp, is_pipe);
00371 return NULL;
00372 }
00373 for (i = 0; i < k; i++) {
00374 if (do_swap)
00375 SWAP_INT32(&model->lm3g.prob3[i].l);
00376
00377 model->lm3g.prob3[i].l = logmath_log10_to_log(lmath, model->lm3g.prob3[i].f);
00378 }
00379 E_INFO("%8d = LM.prob3 entries read\n", k);
00380 }
00381
00382
00383 if (do_mmap)
00384 offset = ftell(fp);
00385 if (n_trigram > 0) {
00386 if (do_mmap) {
00387 memcpy(&k, map_base + offset, sizeof(k));
00388 offset += sizeof(int32);
00389 model->lm3g.tseg_base = (int32 *) (map_base + offset);
00390 offset += k * sizeof(int32);
00391 }
00392 else {
00393 k = (n_bigram + 1) / BG_SEG_SZ + 1;
00394 fread(&k, sizeof(k), 1, fp);
00395 if (do_swap) SWAP_INT32(&k);
00396 model->lm3g.tseg_base = ckd_calloc(k, sizeof(int32));
00397 if (fread(model->lm3g.tseg_base, sizeof(int32), k, fp) !=
00398 (size_t) k) {
00399 E_ERROR("fread(tseg_base) failed\n");
00400 ngram_model_free(base);
00401 fclose_comp(fp, is_pipe);
00402 return NULL;
00403 }
00404 if (do_swap)
00405 for (i = 0; i < k; i++)
00406 SWAP_INT32(&model->lm3g.tseg_base[i]);
00407 }
00408 E_INFO("%8d = LM.tseg_base entries read\n", k);
00409 }
00410
00411
00412 if (do_mmap) {
00413 memcpy(&k, map_base + offset, sizeof(k));
00414 offset += sizeof(int32);
00415 tmp_word_str = (char *) (map_base + offset);
00416 offset += k;
00417 }
00418 else {
00419 base->writable = TRUE;
00420 fread(&k, sizeof(k), 1, fp);
00421 if (do_swap) SWAP_INT32(&k);
00422 tmp_word_str = ckd_calloc(k, sizeof(char));
00423 if (fread(tmp_word_str, sizeof(char), k, fp) != (size_t) k) {
00424 E_ERROR("fread(word-string) failed\n");
00425 ngram_model_free(base);
00426 fclose_comp(fp, is_pipe);
00427 return NULL;
00428 }
00429 }
00430
00431
00432 for (i = 0, j = 0; i < k; i++)
00433 if (tmp_word_str[i] == '\0')
00434 j++;
00435 if (j != n_unigram) {
00436 E_ERROR("Error reading word strings (%d doesn't match n_unigrams %d)\n",
00437 j, n_unigram);
00438 ngram_model_free(base);
00439 fclose_comp(fp, is_pipe);
00440 return NULL;
00441 }
00442
00443
00444 if (do_mmap) {
00445 j = 0;
00446 for (i = 0; i < n_unigram; i++) {
00447 base->word_str[i] = tmp_word_str + j;
00448 if (hash_table_enter(base->wid, base->word_str[i],
00449 (void *)(long)i) != (void *)(long)i) {
00450 E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]);
00451 }
00452 j += strlen(base->word_str[i]) + 1;
00453 }
00454 }
00455 else {
00456 j = 0;
00457 for (i = 0; i < n_unigram; i++) {
00458 base->word_str[i] = ckd_salloc(tmp_word_str + j);
00459 if (hash_table_enter(base->wid, base->word_str[i],
00460 (void *)(long)i) != (void *)(long)i) {
00461 E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]);
00462 }
00463 j += strlen(base->word_str[i]) + 1;
00464 }
00465 free(tmp_word_str);
00466 }
00467 E_INFO("%8d = ascii word strings read\n", i);
00468
00469 fclose_comp(fp, is_pipe);
00470 return base;
00471 }
00472
00473 int
00474 ngram_model_dmp_write(ngram_model_t *model,
00475 const char *file_name)
00476 {
00477 return -1;
00478 }
00479
00480 static int
00481 ngram_model_dmp_apply_weights(ngram_model_t *base, float32 lw,
00482 float32 wip, float32 uw)
00483 {
00484 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00485 lm3g_apply_weights(base, &model->lm3g, lw, wip, uw);
00486 return 0;
00487 }
00488
00489
00490 #define BINARY_SEARCH_THRESH 16
00491 static int32
00492 find_bg(bigram_t * bg, int32 n, int32 w)
00493 {
00494 int32 i, b, e;
00495
00496
00497 b = 0;
00498 e = n;
00499 while (e - b > BINARY_SEARCH_THRESH) {
00500 i = (b + e) >> 1;
00501 if (bg[i].wid < w)
00502 b = i + 1;
00503 else if (bg[i].wid > w)
00504 e = i;
00505 else
00506 return i;
00507 }
00508
00509
00510 for (i = b; (i < e) && (bg[i].wid != w); i++);
00511 return ((i < e) ? i : -1);
00512 }
00513
00514 static int32
00515 lm3g_bg_score(ngram_model_dmp_t *model,
00516 int32 lw1, int32 lw2, int32 *n_used)
00517 {
00518 int32 i, n, b, score;
00519 bigram_t *bg;
00520
00521 if (lw1 < 0) {
00522 *n_used = 1;
00523 return model->lm3g.unigrams[lw2].prob1.l;
00524 }
00525
00526 b = FIRST_BG(model, lw1);
00527 n = FIRST_BG(model, lw1 + 1) - b;
00528 bg = model->lm3g.bigrams + b;
00529
00530 if ((i = find_bg(bg, n, lw2)) >= 0) {
00531
00532 *n_used = 2;
00533 score = model->lm3g.prob2[bg[i].prob2].l;
00534 }
00535 else {
00536
00537 *n_used = 1;
00538 score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l;
00539 }
00540
00541 return (score);
00542 }
00543
00544 static void
00545 load_tginfo(ngram_model_dmp_t *model, int32 lw1, int32 lw2)
00546 {
00547 int32 i, n, b, t;
00548 bigram_t *bg;
00549 tginfo_t *tginfo;
00550
00551
00552 tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le);
00553 tginfo->w1 = lw1;
00554 tginfo->tg = NULL;
00555 tginfo->next = model->lm3g.tginfo[lw2];
00556 model->lm3g.tginfo[lw2] = tginfo;
00557
00558
00559 b = model->lm3g.unigrams[lw1].bigrams;
00560 n = model->lm3g.unigrams[lw1 + 1].bigrams - b;
00561 bg = model->lm3g.bigrams + b;
00562
00563 if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) {
00564 tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l;
00565
00566
00567 b += i;
00568 t = FIRST_TG(model, b);
00569
00570 tginfo->tg = model->lm3g.trigrams + t;
00571
00572
00573 tginfo->n_tg = FIRST_TG(model, b + 1) - t;
00574 }
00575 else {
00576 tginfo->bowt = 0;
00577 tginfo->n_tg = 0;
00578 }
00579 }
00580
00581
00582 static int32
00583 find_tg(trigram_t * tg, int32 n, int32 w)
00584 {
00585 int32 i, b, e;
00586
00587 b = 0;
00588 e = n;
00589 while (e - b > BINARY_SEARCH_THRESH) {
00590 i = (b + e) >> 1;
00591 if (tg[i].wid < w)
00592 b = i + 1;
00593 else if (tg[i].wid > w)
00594 e = i;
00595 else
00596 return i;
00597 }
00598
00599 for (i = b; (i < e) && (tg[i].wid != w); i++);
00600 return ((i < e) ? i : -1);
00601 }
00602
00603 static int32
00604 lm3g_tg_score(ngram_model_dmp_t *model, int32 lw1,
00605 int32 lw2, int32 lw3, int32 *n_used)
00606 {
00607 ngram_model_t *base = &model->base;
00608 int32 i, n, score;
00609 trigram_t *tg;
00610 tginfo_t *tginfo, *prev_tginfo;
00611
00612 if ((base->n < 3) || (lw1 < 0))
00613 return (lm3g_bg_score(model, lw2, lw3, n_used));
00614
00615 prev_tginfo = NULL;
00616 for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) {
00617 if (tginfo->w1 == lw1)
00618 break;
00619 prev_tginfo = tginfo;
00620 }
00621
00622 if (!tginfo) {
00623 load_tginfo(model, lw1, lw2);
00624 tginfo = model->lm3g.tginfo[lw2];
00625 }
00626 else if (prev_tginfo) {
00627 prev_tginfo->next = tginfo->next;
00628 tginfo->next = model->lm3g.tginfo[lw2];
00629 model->lm3g.tginfo[lw2] = tginfo;
00630 }
00631
00632 tginfo->used = 1;
00633
00634
00635 n = tginfo->n_tg;
00636 tg = tginfo->tg;
00637 if ((i = find_tg(tg, n, lw3)) >= 0) {
00638
00639 *n_used = 3;
00640 score = model->lm3g.prob3[tg[i].prob3].l;
00641 }
00642 else {
00643 score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used);
00644 }
00645
00646 return (score);
00647 }
00648
00649 static int32
00650 ngram_model_dmp_score(ngram_model_t *base, int32 wid,
00651 int32 *history, int32 n_hist,
00652 int32 *n_used)
00653 {
00654 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00655 switch (n_hist) {
00656 case 0:
00657
00658 *n_used = 1;
00659 return model->lm3g.unigrams[wid].prob1.l;
00660 case 1:
00661 return lm3g_bg_score(model, history[0], wid, n_used);
00662 case 2:
00663 default:
00664
00665 return lm3g_tg_score(model, history[1], history[0], wid, n_used);
00666 }
00667 }
00668
00669 static int32
00670 ngram_model_dmp_raw_score(ngram_model_t *base, int32 wid,
00671 int32 *history, int32 n_hist,
00672 int32 *n_used)
00673 {
00674 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00675 int32 score;
00676
00677 switch (n_hist) {
00678 case 0:
00679
00680 *n_used = 1;
00681
00682 score = model->lm3g.unigrams[wid].prob1.l - base->log_wip;
00683
00684 score = (int32)(score / base->lw);
00685
00686 if (strcmp(base->word_str[wid], "<s>") != 0) {
00687 score = logmath_log(base->lmath,
00688 logmath_exp(base->lmath, score)
00689 - logmath_exp(base->lmath,
00690 base->log_uniform + base->log_uniform_weight));
00691 }
00692 return score;
00693 case 1:
00694 score = lm3g_bg_score(model, history[0], wid, n_used);
00695 break;
00696 case 2:
00697 default:
00698
00699 score = lm3g_tg_score(model, history[1], history[0], wid, n_used);
00700 break;
00701 }
00702
00703 return (int32)((score - base->log_wip) / base->lw);
00704 }
00705
00706 static int32
00707 ngram_model_dmp_add_ug(ngram_model_t *base,
00708 int32 wid, int32 lweight)
00709 {
00710 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00711 return lm3g_add_ug(base, &model->lm3g, wid, lweight);
00712 }
00713
00714 static void
00715 ngram_model_dmp_free(ngram_model_t *base)
00716 {
00717 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00718
00719 ckd_free(model->lm3g.unigrams);
00720 ckd_free(model->lm3g.prob2);
00721 if (model->dump_mmap) {
00722 mmio_file_unmap(model->dump_mmap);
00723 }
00724 else {
00725 ckd_free(model->lm3g.bigrams);
00726 if (base->n > 2) {
00727 ckd_free(model->lm3g.trigrams);
00728 ckd_free(model->lm3g.tseg_base);
00729 }
00730 }
00731 if (base->n > 2) {
00732 ckd_free(model->lm3g.bo_wt2);
00733 ckd_free(model->lm3g.prob3);
00734 }
00735
00736 lm3g_tginfo_free(base, &model->lm3g);
00737 }
00738
00739 static void
00740 ngram_model_dmp_flush(ngram_model_t *base)
00741 {
00742 ngram_model_dmp_t *model = (ngram_model_dmp_t *)base;
00743 lm3g_tginfo_reset(base, &model->lm3g);
00744 }
00745
00746 static ngram_funcs_t ngram_model_dmp_funcs = {
00747 ngram_model_dmp_free,
00748 ngram_model_dmp_apply_weights,
00749 ngram_model_dmp_score,
00750 ngram_model_dmp_raw_score,
00751 ngram_model_dmp_add_ug,
00752 ngram_model_dmp_flush
00753 };