• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/dict.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* 
00039  * HISTORY
00040  * 
00041  * 05-Nov-98  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00042  *              dict_load now terminates program if input dictionary 
00043  *              contains errors.
00044  * 
00045  * 21-Nov-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00046  *              Bugfix: Noise dictionary was not being considered in figuring
00047  *              dictionary size.
00048  * 
00049  * 18-Nov-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00050  *              Added ability to modify pronunciation of an existing word in
00051  *              dictionary (in dict_add_word()).
00052  * 
00053  * 10-Aug-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00054  *              Added check for word already existing in dictionary in 
00055  *              dict_add_word().
00056  * 
00057  * 27-May-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00058  *              Included Bob Brennan's personaldic handling (similar to 
00059  *              oovdic).
00060  * 
00061  * 11-Apr-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00062  *              Made changes to replace_dict_entry to handle the addition of
00063  *              alternative pronunciations (linking in alt, wid, fwid fields).
00064  * 
00065  * 02-Apr-97  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00066  *              Caused a fatal error if max size exceeded, instead of realloc.
00067  * 
00068  * 08-Dec-95  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00069  *      Added function dict_write_oovdict().
00070  * 
00071  * 06-Dec-95  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie-Mellon University
00072  *      Added functions dict_next_alt() and dict_pron().
00073  * 
00074  * Revision 8.5  94/10/11  12:32:03  rkm
00075  * Minor changes.
00076  * 
00077  * Revision 8.4  94/07/29  11:49:59  rkm
00078  * Changed handling of OOV subdictionary (no longer alternatives to <UNK>).
00079  * Added placeholders for dynamic addition of words to dictionary.
00080  * Added dict_add_word () for adding new words to dictionary.
00081  * 
00082  * Revision 8.3  94/04/14  15:08:31  rkm
00083  * Added function dictid_to_str().
00084  * 
00085  * Revision 8.2  94/04/14  14:34:11  rkm
00086  * Added OOV words sub-dictionary.
00087  * 
00088  * Revision 8.1  94/02/15  15:06:26  rkm
00089  * Basically the same as in v7; includes multiple start symbols for
00090  * the LISTEN project.
00091  * 
00092  * 11-Feb-94  M K Ravishankar (rkm) at Carnegie-Mellon University
00093  *      Added multiple start symbols for the LISTEN project.
00094  * 
00095  *  9-Sep-92  Fil Alleva (faa) at Carnegie-Mellon University
00096  *      Added special silences for start_sym and end_sym.
00097  *      These special silences
00098  *      (SILb and SILe) are CI models and as such they create a new context,
00099  *      however since no triphones model these contexts explicity they are
00100  *      backed off to silence, which is the desired context. Therefore no
00101  *      special measures are take inside the decoder to handle these
00102  *      special silences.
00103  * 14-Oct-92  Eric Thayer (eht) at Carnegie Mellon University
00104  *      added Ravi's formal declarations for dict_to_id() so int32 -> pointer
00105  *      problem doesn't happen on DEC Alpha
00106  * 14-Oct-92  Eric Thayer (eht) at Carnegie Mellon University
00107  *      added Ravi's changes to make calls into hash.c work properly on Alpha
00108  *      
00109  */
00110 
00111 /* System headers. */
00112 #include <stdio.h>
00113 #include <stdlib.h>
00114 #include <string.h>
00115 #include <assert.h>
00116 
00117 /* SphinxBase headers. */
00118 #include <prim_type.h>
00119 #include <cmd_ln.h>
00120 #include <ckd_alloc.h>
00121 #include <pio.h>
00122 #include <hash_table.h>
00123 #include <err.h>
00124 #include <strfuncs.h>
00125 #include <glist.h>
00126 
00127 /* Local headers. */
00128 #include "dict.h"
00129 
00130 #ifdef DEBUG
00131 #define DFPRINTF(x)             fprintf x
00132 #else
00133 #define DFPRINTF(x)
00134 #endif
00135 
00136 #define QUIT(x)         {fprintf x; exit(-1);}
00137 
00138 static void buildEntryTable(dict_t *dict, glist_t list, uint16 *** table_p);
00139 static void buildExitTable(dict_t *dict, glist_t list, uint16 *** table_p,
00140                            uint16 *** permuTab_p, uint16 ** sizeTab_p);
00141 static int32 addToLeftContextTable(dict_t *dict, char *diphone);
00142 static int32 addToRightContextTable(dict_t *dict, char *diphone);
00143 static dict_entry_t *_new_dict_entry(dict_t *dict,
00144                                      char *word_str,
00145                                      char *pronoun_str,
00146                                      int32 use_context);
00147 static void _dict_list_add(dict_t * dict, dict_entry_t * entry);
00148 static int dict_load(dict_t * dict, bin_mdef_t *mdef,
00149                      char const *filename, int32 * word_id,
00150                      int32 use_context);
00151 
00152 #define MAX_PRONOUN_LEN         150
00153 
00154 static int32
00155 get_dict_size(char const *file)
00156 {
00157     FILE *fp;
00158     __BIGSTACKVARIABLE__ char line[1024];
00159     int32 n;
00160 
00161     if ((fp = fopen(file, "r")) == NULL)
00162         return -1;
00163     for (n = 0;; n++)
00164         if (fgets(line, sizeof(line), fp) == NULL)
00165             break;
00166     fclose(fp);
00167 
00168     return n;
00169 }
00170 
00171 dict_t *
00172 dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
00173 {
00174     dict_t *dict = ckd_calloc(1, sizeof(*dict));
00175     int32 word_id = 0, i, j;
00176     dict_entry_t *entry;
00177     int32 max_new_oov;
00178     void *val;
00179     char const *filename, *n_filename;
00180     int use_context;
00181 
00182     if (bin_mdef_n_sseq(mdef) > 65534) {
00183         E_ERROR("Model definition has more than 65534 unique senone sequences, cannot be used.\n");
00184         return NULL;
00185     }
00186 
00187     dict->config = config;
00188     dict->mdef = mdef;
00189     dict->dict_entry_alloc = listelem_alloc_init(sizeof(dict_entry_t));
00190     filename = cmd_ln_str_r(config, "-dict");
00191     n_filename = cmd_ln_str_r(config, "-fdict");
00192     use_context = !cmd_ln_boolean_r(config, "-usewdphones");
00193 
00194     /*
00195      * Find size of dictionary and set hash and list table size hints.
00196      * (Otherwise, the simple-minded PC malloc library goes berserk.)
00197      */
00198     if ((j = get_dict_size(filename)) < 0){
00199         E_ERROR("Failed to open dictionary file %s\n", filename);
00200         dict_free(dict);
00201         return NULL;
00202     }
00203     if (n_filename)
00204         j += get_dict_size(n_filename);
00205     j += 3;                     /* </s>, <s> and <sil> */
00206     if (dict->dict)
00207         hash_table_free(dict->dict);
00208     if (cmd_ln_boolean_r(config, "-dictcase"))
00209         dict->dict = hash_table_new(j, HASH_CASE_YES);
00210     else
00211         dict->dict = hash_table_new(j, HASH_CASE_NO);
00212 
00213     /* Context table size hint: (#CI*#CI)/2 */
00214     j = bin_mdef_n_ciphone(mdef);
00215     j = ((j * j) >> 1) + 1;
00216     if (use_context) {
00217         if (cmd_ln_boolean_r(config, "-dictcase")) {
00218             dict->lcHT = hash_table_new(j, HASH_CASE_YES);
00219             dict->rcHT = hash_table_new(j, HASH_CASE_YES);
00220         }
00221         else {
00222             dict->lcHT = hash_table_new(j, HASH_CASE_NO);
00223             dict->rcHT = hash_table_new(j, HASH_CASE_NO);
00224         }
00225     }
00226 
00227     /* Placeholders (dummy pronunciations) for new words that can be
00228      * added at runtime.  We can expand this region of the dictionary
00229      * later if need be. */
00230     dict->initial_dummy = dict->first_dummy = word_id;
00231     if ((max_new_oov = cmd_ln_int32_r(dict->config, "-maxnewoov")) > 0)
00232         E_INFO("Allocating %d placeholders for new OOVs\n", max_new_oov);
00233     for (i = 0; i < max_new_oov; i++) {
00234         char tmpstr[100], pronstr[100];
00235 
00236         /* Pick a temporary name that doesn't occur in the LM */
00237         sprintf(tmpstr, "=PLCHLDR%d=", i);
00238 
00239         /* new_dict_entry clobbers pronstr! so need this strcpy in the loop */
00240         strcpy(pronstr, "SIL");
00241         entry = _new_dict_entry(dict, tmpstr, pronstr, use_context);
00242         if (!entry) {
00243             E_ERROR("Failed to add DUMMY(SIL) entry to dictionary\n");
00244             dict_free(dict);
00245             return NULL;
00246         }
00247 
00248         _dict_list_add(dict, entry);
00249         (void)hash_table_enter_int32(dict->dict, entry->word, word_id);
00250         entry->wid = word_id;
00251         word_id++;
00252     }
00253     dict->last_dummy = word_id - 1;
00254 
00255     /* Load dictionaries */
00256     if (dict_load(dict, mdef, filename, &word_id, use_context) != 0) {
00257         dict_free(dict);
00258         return NULL;
00259     }
00260 
00261     /* Make sure that <s>, </s>, and <sil> are always in the dictionary. */
00262     if (hash_table_lookup(dict->dict, "</s>", &val) != 0) {
00263         char pronstr[5];
00264         /*
00265          * Check if there is a special end silence phone.
00266          */
00267         if (-1 == dict_ciphone_id(dict, "SILe")) {
00268             strcpy(pronstr, "SIL");
00269             entry = _new_dict_entry(dict, "</s>", pronstr, FALSE);
00270             if (!entry) {
00271                 E_ERROR("Failed to add </s>(SIL) to dictionary\n");
00272                 dict_free(dict);
00273                 return NULL;
00274             }
00275         }
00276         else {
00277             E_INFO("Using special end silence for </s>\n");
00278             strcpy(pronstr, "SILe");
00279             entry =
00280                 _new_dict_entry(dict, "</s>", pronstr, FALSE);
00281         }
00282         _dict_list_add(dict, entry);
00283         hash_table_enter(dict->dict, entry->word, (void *)(long)word_id);
00284         entry->wid = word_id;
00285         word_id++;
00286     }
00287 
00288     dict->config = config;
00289     /* Mark the start of filler words */
00290     dict->filler_start = word_id;
00291 
00292     /* Add the standard start symbol (<s>) if not already in dict */
00293     if (hash_table_lookup(dict->dict, "<s>", &val) != 0) {
00294         char pronstr[5];
00295         /*
00296          * Check if there is a special begin silence phone.
00297          */
00298         if (-1 == dict_ciphone_id(dict, "SILb")) {
00299             strcpy(pronstr, "SIL");
00300             entry =
00301                 _new_dict_entry(dict, "<s>", pronstr, FALSE);
00302             if (!entry) {
00303                 E_ERROR("Failed to add <s>(SIL) to dictionary\n");
00304                 dict_free(dict);
00305                 return NULL;
00306             }
00307         }
00308         else {
00309             E_INFO("Using special begin silence for <s>\n");
00310             strcpy(pronstr, "SILb");
00311             entry =
00312                 _new_dict_entry(dict, "<s>", pronstr, FALSE);
00313             if (!entry) {
00314                 E_ERROR("Failed to add <s>(SILb) to dictionary\n");
00315                 dict_free(dict);
00316                 return NULL;
00317             }
00318         }
00319         _dict_list_add(dict, entry);
00320         hash_table_enter(dict->dict, entry->word, (void *)(long)word_id);
00321         entry->wid = word_id;
00322         word_id++;
00323     }
00324 
00325     /* Finally create a silence word if it isn't there already. */
00326     if (hash_table_lookup(dict->dict, "<sil>", &val) != 0) {
00327         char pronstr[4];
00328 
00329         strcpy(pronstr, "SIL");
00330         entry = _new_dict_entry(dict, "<sil>", pronstr, FALSE);
00331         if (!entry) {
00332             E_ERROR("Failed to add <sil>(SIL) to dictionary\n");
00333             dict_free(dict);
00334             return NULL;
00335         }
00336         _dict_list_add(dict, entry);
00337         hash_table_enter(dict->dict, entry->word, (void *)(long)word_id);
00338         entry->wid = word_id;
00339         word_id++;
00340     }
00341 
00342     if (n_filename) {
00343         if (dict_load(dict, mdef, n_filename, &word_id, FALSE /* use_context */) != 0) {
00344             dict_free(dict);
00345             return NULL;
00346         }
00347     }
00348 
00349     E_INFO("LEFT CONTEXT TABLES\n");
00350     dict->lcList = glist_reverse(dict->lcList);
00351     buildEntryTable(dict, dict->lcList, &dict->lcFwdTable);
00352     buildExitTable(dict, dict->lcList, &dict->lcBwdTable, &dict->lcBwdPermTable,
00353                    &dict->lcBwdSizeTable);
00354 
00355     E_INFO("RIGHT CONTEXT TABLES\n");
00356     dict->rcList = glist_reverse(dict->rcList);
00357     buildEntryTable(dict, dict->rcList, &dict->rcBwdTable);
00358     buildExitTable(dict, dict->rcList, &dict->rcFwdTable, &dict->rcFwdPermTable,
00359                    &dict->rcFwdSizeTable);
00360 
00361     return dict;
00362 }
00363 
00364 void
00365 dict_free(dict_t * dict)
00366 {
00367     int32 i;
00368     int32 entry_count;
00369     dict_entry_t *entry;
00370     gnode_t *gn;
00371 
00372     if (dict == NULL)
00373         return;
00374 
00375     for (i = 0, gn = dict->lcList; gn; gn = gnode_next(gn), ++i) {
00376         ckd_free(dict->lcFwdTable[i]);
00377         ckd_free(gnode_ptr(gn));
00378     }
00379     ckd_free(dict->lcFwdTable);
00380     ckd_free_2d(dict->lcBwdTable);
00381     ckd_free_2d(dict->lcBwdPermTable);
00382     ckd_free(dict->lcBwdSizeTable);
00383     if (dict->lcHT)
00384         hash_table_free(dict->lcHT);
00385     glist_free(dict->lcList);
00386 
00387     for (i = 0, gn = dict->rcList; gn; gn = gnode_next(gn), ++i) {
00388         ckd_free(dict->rcBwdTable[i]);
00389         ckd_free(gnode_ptr(gn));
00390     }
00391     ckd_free(dict->rcBwdTable);
00392     ckd_free_2d(dict->rcFwdTable);
00393     ckd_free_2d(dict->rcFwdPermTable);
00394     ckd_free(dict->rcFwdSizeTable);
00395     if (dict->rcHT)
00396         hash_table_free(dict->rcHT);
00397     glist_free(dict->rcList);
00398 
00399     entry_count = dict->dict_entry_count;
00400 
00401     for (i = 0; i < entry_count; i++) {
00402         entry = dict->dict_list[i];
00403         ckd_free(entry->word);
00404         ckd_free(entry->phone_ids);
00405         ckd_free(entry->ci_phone_ids);
00406     }
00407     listelem_alloc_free(dict->dict_entry_alloc);
00408     ckd_free(dict->dict_list);
00409     ckd_free(dict->ci_index);
00410     if (dict->dict)
00411         hash_table_free(dict->dict);
00412     ckd_free(dict);
00413 }
00414 
00415 static int
00416 dict_load(dict_t * dict, bin_mdef_t *mdef,
00417           char const *filename, int32 *word_id,
00418           int32 use_context)
00419 {
00420     __BIGSTACKVARIABLE__ char dict_str[1024];
00421     __BIGSTACKVARIABLE__ char pronoun_str[1024];
00422     dict_entry_t *entry;
00423     FILE *fs;
00424     int32 start_wid = *word_id;
00425     int32 err = 0;
00426 
00427     if ((fs = fopen(filename, "r")) == NULL)
00428         return -1;
00429 
00430     pronoun_str[0] = '\0';
00431     while (EOF != fscanf(fs, "%s%[^\n]\n", dict_str, pronoun_str)) {
00432         int32 wid;
00433         /* Check for duplicate before we do anything. */
00434         if (hash_table_lookup_int32(dict->dict, dict_str, &wid) == 0) {
00435             E_WARN("Skipping duplicate definition of %s\n", dict_str);
00436             continue;
00437         }
00438         entry = _new_dict_entry(dict, dict_str, pronoun_str, use_context);
00439         if (!entry) {
00440             E_ERROR("Failed to add %s to dictionary\n", dict_str);
00441             err = 1;
00442             continue;
00443         }
00444 
00445         if (hash_table_enter_int32(dict->dict, entry->word, *word_id) != *word_id) {
00446             E_ERROR("Failed to add %s to dictionary hash!\n", entry->word);
00447             err = 1;
00448             continue;
00449         }
00450         _dict_list_add(dict, entry);
00451         entry->wid = *word_id;
00452         pronoun_str[0] = '\0';
00453         /*
00454          * Look for words of the form ".*(#)". These words are
00455          * alternate pronunciations. Also look for phrases
00456          * concatenated with '_'.
00457          */
00458         {
00459             char *p = strrchr(dict_str, '(');
00460 
00461             /*
00462              * For alternate pron. the last car of the word must be ')'
00463              * This will handle the case where the word is something like
00464              * "(LEFT_PAREN"
00465              */
00466             if (dict_str[strlen(dict_str) - 1] != ')')
00467                 p = NULL;
00468 
00469             if (p != NULL) {
00470                 void *wid;
00471 
00472                 *p = '\0';
00473                 if (hash_table_lookup(dict->dict, dict_str, &wid) != 0) {
00474                     E_ERROR
00475                         ("Missing first pronunciation for [%s]\nThis means that e.g. [%s(2)] was found with no [%s]\nPlease correct the dictionary and re-run.\n",
00476                          dict_str, dict_str, dict_str);
00477                     return -1;
00478                 }
00479                 DFPRINTF((stdout,
00480                           "Alternate transcription for [%s](wid = %d)\n",
00481                           entry->word, (long)wid));
00482                 entry->wid = (long)wid;
00483                 {
00484                     while (dict->dict_list[(long)wid]->alt >= 0)
00485                         wid = (void *)(long)dict->dict_list[(long)wid]->alt;
00486                     dict->dict_list[(long)wid]->alt = *word_id;
00487                 }
00488             }
00489         }
00490 
00491         *word_id = *word_id + 1;
00492     }
00493 
00494     E_INFO("%6d = words in file [%s]\n", *word_id - start_wid, filename);
00495 
00496     if (fs)
00497         fclose(fs);
00498 
00499     return err;
00500 }
00501 
00502 int32
00503 dict_to_id(dict_t * dict, char const *dict_str)
00504 {
00505     int32 dictid;
00506 
00507     if (hash_table_lookup_int32(dict->dict, dict_str, &dictid) < 0)
00508         return NO_WORD;
00509     return dictid;
00510 }
00511 
00512 static dict_entry_t *
00513 _new_dict_entry(dict_t *dict, char *word_str, char *pronoun_str, int32 use_context)
00514 {
00515     dict_entry_t *entry;
00516     __BIGSTACKVARIABLE__ char *phone[MAX_PRONOUN_LEN];
00517     __BIGSTACKVARIABLE__ int32 ciPhoneId[MAX_PRONOUN_LEN];
00518     __BIGSTACKVARIABLE__ int32 triphone_ids[MAX_PRONOUN_LEN];
00519     int32 pronoun_len = 0;
00520     int32 i;
00521     int32 lcTabId;
00522     int32 rcTabId;
00523     __BIGSTACKVARIABLE__ char triphoneStr[80];
00524     __BIGSTACKVARIABLE__ char position[256];         /* phone position */
00525     bin_mdef_t *mdef = dict->mdef;
00526 
00527     memset(position, 0, sizeof(position));      /* zero out the position matrix */
00528 
00529     position[0] = 'b';          /* First phone is at begginging */
00530 
00531     while (1) {
00532         int n;
00533         char delim;
00534 
00535         if (pronoun_len >= MAX_PRONOUN_LEN) {
00536             E_ERROR("'%s': Too many phones for bogus hard-coded limit (%d), skipping\n",
00537                     word_str, MAX_PRONOUN_LEN);
00538             return NULL;
00539         }
00540         n = nextword(pronoun_str, " \t", &phone[pronoun_len], &delim);
00541         if (n < 0)
00542             break;
00543         pronoun_str = phone[pronoun_len] + n + 1;
00544         /*
00545          * An '&' in the phone string indicates that this is a word break and
00546          * and that the previous phone is in the end of word position and the
00547          * next phone is the begining of word position
00548          */
00549         if (phone[pronoun_len][0] == '&') {
00550             position[pronoun_len - 1] = WORD_POSN_END;
00551             position[pronoun_len] = WORD_POSN_BEGIN;
00552             continue;
00553         }
00554         ciPhoneId[pronoun_len] = dict_ciphone_id(dict, phone[pronoun_len]);
00555         if (ciPhoneId[pronoun_len] == -1) {
00556             E_ERROR("'%s': Unknown phone '%s'\n", word_str,
00557                     phone[pronoun_len]);
00558             return NULL;
00559         }
00560         pronoun_len++;
00561         if (delim == '\0')
00562             break;
00563     }
00564 
00565     position[pronoun_len - 1] = WORD_POSN_END;    /* Last phone is at the end */
00566 
00567     /*
00568      * If the position marker sequence 'ee' appears or 'se' appears
00569      * the sequence should be '*s'.
00570      */
00571 
00572     if (position[0] == WORD_POSN_END)     /* Also handle single phone word case */
00573         position[0] = WORD_POSN_SINGLE;
00574 
00575     for (i = 0; i < pronoun_len - 1; i++) {
00576         if (((position[i] == WORD_POSN_END)
00577              || (position[i] == WORD_POSN_SINGLE)) &&
00578             (position[i + 1] == WORD_POSN_END))
00579             position[i + 1] = WORD_POSN_SINGLE;
00580     }
00581 
00582     if (pronoun_len >= 2) {
00583         i = 0;
00584 
00585         if (use_context) {
00586             sprintf(triphoneStr, "%s(%%s,%s)b", phone[i], phone[i + 1]);
00587             lcTabId = addToLeftContextTable(dict, triphoneStr);
00588             triphone_ids[i] = lcTabId;
00589         }
00590         else {
00591             triphone_ids[i] = bin_mdef_phone_id(mdef,
00592                                                 dict_ciphone_id(dict, phone[i]),
00593                                                 -1,
00594                                                 dict_ciphone_id(dict, phone[i+1]),
00595                                                 WORD_POSN_BEGIN);
00596             if (triphone_ids[i] < 0)
00597                 triphone_ids[i] = dict_ciphone_id(dict, phone[i]);
00598             triphone_ids[i] = bin_mdef_pid2ssid(mdef, triphone_ids[i]);
00599         }
00600         assert(triphone_ids[i] >= 0);
00601 
00602         for (i = 1; i < pronoun_len - 1; i++) {
00603             triphone_ids[i] = bin_mdef_phone_id(mdef,
00604                                                 dict_ciphone_id(dict, phone[i]),
00605                                                 dict_ciphone_id(dict, phone[i-1]),
00606                                                 dict_ciphone_id(dict, phone[i+1]),
00607                                                 position[i]);
00608             if (triphone_ids[i] < 0)
00609                 triphone_ids[i] = dict_ciphone_id(dict, phone[i]);
00610             triphone_ids[i] = bin_mdef_pid2ssid(mdef, triphone_ids[i]);
00611             assert(triphone_ids[i] >= 0);
00612         }
00613 
00614         if (use_context) {
00615             sprintf(triphoneStr, "%s(%s,%%s)e", phone[i], phone[i - 1]);
00616             rcTabId = addToRightContextTable(dict, triphoneStr);
00617             triphone_ids[i] = rcTabId;
00618         }
00619         else {
00620             triphone_ids[i] = bin_mdef_phone_id(mdef,
00621                                                 dict_ciphone_id(dict, phone[i]),
00622                                                 dict_ciphone_id(dict, phone[i-1]),
00623                                                 -1,
00624                                                 position[i]);
00625             if (triphone_ids[i] < 0)
00626                 triphone_ids[i] = dict_ciphone_id(dict, phone[i]);
00627             triphone_ids[i] = bin_mdef_pid2ssid(mdef, triphone_ids[i]);
00628         }
00629         assert(triphone_ids[i] >= 0);
00630     }
00631 
00632     /*
00633      * It's too hard to model both contexts so I choose to model only
00634      * the left context.
00635      */
00636     if (pronoun_len == 1) {
00637         if (use_context) {
00638             sprintf(triphoneStr, "%s(%%s,SIL)s", phone[0]);
00639             lcTabId = addToLeftContextTable(dict, triphoneStr);
00640             triphone_ids[0] = lcTabId;
00641             /*
00642              * Put the right context table in the 2 entry
00643              */
00644             sprintf(triphoneStr, "%s(SIL,%%s)s", phone[0]);
00645             rcTabId = addToRightContextTable(dict, triphoneStr);
00646             triphone_ids[1] = rcTabId;
00647         }
00648         else {
00649             triphone_ids[0] = dict_ciphone_id(dict,phone[0]);
00650             triphone_ids[0] = bin_mdef_pid2ssid(mdef,triphone_ids[0]);
00651         }
00652     }
00653 
00654     entry = listelem_malloc(dict->dict_entry_alloc);
00655     entry->word = ckd_salloc(word_str);
00656     entry->len = pronoun_len;
00657     entry->mpx = use_context;
00658     entry->alt = -1;
00659     if (pronoun_len != 0) {
00660         entry->ci_phone_ids =
00661             (int32 *) ckd_calloc((size_t) pronoun_len, sizeof(int32));
00662         memcpy(entry->ci_phone_ids, ciPhoneId,
00663                pronoun_len * sizeof(int32));
00664         /*
00665          * This is a HACK to handle the left right conflict on
00666          * single phone words
00667          */
00668         if (use_context && (pronoun_len == 1))
00669             pronoun_len += 1;
00670         entry->phone_ids =
00671             (int32 *) ckd_calloc((size_t) pronoun_len, sizeof(int32));
00672         memcpy(entry->phone_ids, triphone_ids,
00673                pronoun_len * sizeof(int32));
00674     }
00675     else {
00676         E_WARN("%s has no pronounciation, will treat as dummy word\n",
00677                word_str);
00678     }
00679 
00680     return (entry);
00681 }
00682 
00683 /*
00684  * Replace an existing dictionary entry with the given one.  The existing entry
00685  * might be a dummy placeholder for new OOV words, in which case the new_entry argument
00686  * would be TRUE.  (Some restrictions at this time on words that can be added; the code
00687  * should be self-explanatory.)
00688  * Return 1 if successful, 0 if not.
00689  */
00690 static int32
00691 replace_dict_entry(dict_t * dict,
00692                    dict_entry_t * entry,
00693                    char const *word_str,
00694                    char *pronoun_str,
00695                    int32 use_context,
00696                    int32 new_entry)
00697 {
00698     char *phone[MAX_PRONOUN_LEN];
00699     int32 ciPhoneId[MAX_PRONOUN_LEN];
00700     int32 triphone_ids[MAX_PRONOUN_LEN];
00701     int32 pronoun_len = 0;
00702     int32 i;
00703     char triphoneStr[80];
00704     int32 idx;
00705     int32 basewid;
00706 
00707     /* For the moment assume left/right context words... */
00708     assert(use_context);
00709 
00710     /* For the moment, no phrase dictionary stuff... */
00711     while (1) {
00712         int n;
00713         char delim;
00714 
00715         if (pronoun_len >= MAX_PRONOUN_LEN) {
00716             E_ERROR("'%s': Too many phones for bogus hard-coded limit (%d), skipping\n",
00717                     word_str, MAX_PRONOUN_LEN);
00718             return 0;
00719         }
00720         n = nextword(pronoun_str, " \t", &phone[pronoun_len], &delim);
00721         if (n < 0)
00722             break;
00723         pronoun_str = phone[pronoun_len] + n + 1;
00724 
00725         ciPhoneId[pronoun_len] = dict_ciphone_id(dict, phone[pronoun_len]);
00726         if (ciPhoneId[pronoun_len] == -1) {
00727             E_ERROR("'%s': Unknown phone '%s'\n", word_str,
00728                     phone[pronoun_len]);
00729             return 0;
00730         }
00731         pronoun_len++;
00732         if (delim == '\0')
00733             break;
00734     }
00735 
00736     /* For the moment, no single phone new word... */
00737     if (pronoun_len < 2) {
00738         E_ERROR("Pronunciation string too short\n");
00739         return (0);
00740     }
00741 
00742     /* Check if it's an alternative pronunciation; if so base word must exist */
00743     {
00744         char *p = strrchr(word_str, '(');
00745         if (p && (word_str[strlen(word_str) - 1] == ')')) {
00746             *p = '\0';
00747             if (hash_table_lookup_int32(dict->dict, word_str, &idx)) {
00748                 *p = '(';
00749                 E_ERROR("Base word missing for %s\n", word_str);
00750                 return 0;
00751             }
00752             *p = '(';
00753             basewid = (long)idx;
00754         }
00755         else
00756             basewid = -1;
00757     }
00758 
00759     /* Parse pron; for the moment, the boundary diphones must be already known... */
00760     i = 0;
00761     sprintf(triphoneStr, "%s(%%s,%s)b", phone[i], phone[i + 1]);
00762     if (hash_table_lookup_int32(dict->lcHT, triphoneStr, &idx) < 0) {
00763         E_ERROR("Unknown left diphone '%s'\n", triphoneStr);
00764         return (0);
00765     }
00766     triphone_ids[i] = idx;
00767 
00768     for (i = 1; i < pronoun_len - 1; i++) {
00769         triphone_ids[i] = bin_mdef_phone_id(dict->mdef,
00770                                             dict_ciphone_id(dict, phone[i]),
00771                                             dict_ciphone_id(dict, phone[i-1]),
00772                                             dict_ciphone_id(dict, phone[i+1]),
00773                                             WORD_POSN_INTERNAL);
00774         triphone_ids[i] = bin_mdef_pid2ssid(dict->mdef, triphone_ids[i]);
00775     }
00776 
00777     sprintf(triphoneStr, "%s(%s,%%s)e", phone[i], phone[i - 1]);
00778     if (hash_table_lookup_int32(dict->rcHT, triphoneStr, &idx) < 0) {
00779         E_ERROR("Unknown right diphone '%s'\n", triphoneStr);
00780         return (0);
00781     }
00782     triphone_ids[i] = idx;
00783 
00784     /*
00785      * Set up dictionary entry.  Free the existing attributes (where applicable) and
00786      * replace with new ones.
00787      */
00788     entry->len = pronoun_len;
00789     entry->mpx = use_context;
00790     free(entry->word);
00791     free(entry->ci_phone_ids);
00792     free(entry->phone_ids);
00793     entry->word = ckd_salloc(word_str);
00794     entry->ci_phone_ids =
00795         ckd_calloc((size_t) pronoun_len, sizeof(int32));
00796     entry->phone_ids =
00797         ckd_calloc((size_t) pronoun_len, sizeof(int32));
00798     memcpy(entry->ci_phone_ids, ciPhoneId, pronoun_len * sizeof(int32));
00799     memcpy(entry->phone_ids, triphone_ids, pronoun_len * sizeof(int32));
00800 
00801     /* Update alternatives linking if adding a new entry (not updating existing one) */
00802     if (new_entry) {
00803         entry->alt = -1;
00804         if (basewid >= 0) {
00805             entry->alt = dict->dict_list[(int32) basewid]->alt;
00806             dict->dict_list[(int32) basewid]->alt = entry->wid;
00807             entry->wid = (int32) basewid;
00808         }
00809     }
00810 
00811     return (1);
00812 }
00813 
00814 /*
00815  * Add a new word to the dictionary, replacing a dummy placeholder.  Or replace an
00816  * existing non-dummy word in the dictionary.
00817  * Return the word id of the entry updated if successful.  If any error, return -1.
00818  */
00819 int32
00820 dict_add_word(dict_t * dict, char const *word, char *pron)
00821 {
00822     dict_entry_t *entry;
00823     int32 wid, new_entry;
00824 
00825     /* Word already exists */
00826     new_entry = 0;
00827     if ((wid = dict_to_id(dict, word)) < 0) {
00828         /* FIXME: Do some pointer juggling to make this work? */
00829         /* Or better yet, use a better way to determine what words are
00830          * filler words... */
00831         if (dict->first_dummy > dict->last_dummy) {
00832             E_ERROR("Dictionary full; cannot add word\n");
00833             return -1;
00834         }
00835         wid = dict->first_dummy++;
00836         new_entry = 1;
00837     }
00838 
00839     entry = dict->dict_list[wid];
00840     if (!replace_dict_entry(dict, entry, word, pron, TRUE, new_entry))
00841         return -1;
00842 
00843     (void)hash_table_enter_int32(dict->dict, entry->word, wid);
00844 
00845     return (wid);
00846 }
00847 
00848 static void
00849 _dict_list_add(dict_t * dict, dict_entry_t * entry)
00850 /*------------------------------------------------------------*/
00851 {
00852     if (!dict->dict_list)
00853         dict->dict_list = (dict_entry_t **)
00854             ckd_calloc(hash_table_size(dict->dict), sizeof(dict_entry_t *));
00855 
00856     if (dict->dict_entry_count >= hash_table_size(dict->dict)) {
00857         E_WARN("dict size (%d) exceeded\n", hash_table_size(dict->dict));
00858         dict->dict_list = (dict_entry_t **)
00859             ckd_realloc(dict->dict_list,
00860                         (hash_table_size(dict->dict) + 16) * sizeof(dict_entry_t *));
00861     }
00862 
00863     dict->dict_list[dict->dict_entry_count++] = entry;
00864 }
00865 
00866 static int32
00867 addToContextTable(char *diphone, hash_table_t * table, glist_t *list)
00868 {
00869     int32 idx;
00870     char *cp;
00871 
00872     if (-1 == hash_table_lookup_int32(table, diphone, &idx)) {
00873         cp = ckd_salloc(diphone);
00874         idx = table->inuse;
00875         *list = glist_add_ptr(*list, cp);
00876         (void)hash_table_enter_int32(table, cp, idx);
00877     }
00878     return idx;
00879 }
00880 
00881 static int32
00882 addToLeftContextTable(dict_t *dict, char *diphone)
00883 {
00884     return addToContextTable(diphone, dict->lcHT, &dict->lcList);
00885 }
00886 
00887 static int32
00888 addToRightContextTable(dict_t *dict, char *diphone)
00889 {
00890     return addToContextTable(diphone, dict->rcHT, &dict->rcList);
00891 }
00892 
00893 
00894 static int32
00895 parse_triphone(const char *instr, char *ciph, char *lc, char *rc, char *pc)
00896 /*------------------------------------------------------------*
00897  * The ANSI standard scanf can't deal with empty field matches
00898  * so we have this routine.
00899  */
00900 {
00901     const char *lp;
00902     char *cp;
00903 
00904     ciph[0] = '\0';
00905     lc[0] = '\0';
00906     rc[0] = '\0';
00907     pc[0] = '\0';
00908 
00909     /* parse ci-phone */
00910     for (lp = instr, cp = ciph; (*lp != '(') && (*lp != '\0'); lp++, cp++)
00911         *cp = *lp;
00912     *cp = '\0';
00913     if (*lp == '\0') {
00914         return 1;
00915     }
00916 
00917     /* parse leftcontext */
00918     for (lp++, cp = lc; (*lp != ',') && (*lp != '\0'); lp++, cp++)
00919         *cp = *lp;
00920     *cp = '\0';
00921     if (*lp == '\0') {
00922         return 2;
00923     }
00924 
00925     /* parse rightcontext */
00926     for (lp++, cp = rc; (*lp != ')') && (*lp != '\0'); lp++, cp++)
00927         *cp = *lp;
00928     *cp = '\0';
00929     if (*lp == '\0') {
00930         return 3;
00931     }
00932 
00933     /* parse positioncontext */
00934     for (lp++, cp = pc; (*lp != '\0'); lp++, cp++)
00935         *cp = *lp;
00936     *cp = '\0';
00937     return 4;
00938 }
00939 
00940 static int32
00941 triphone_to_id(dict_t *dict, char const *phone_str)
00942 {
00943     char *ci, *lc, *rc, *pc;
00944     int32 cipid, lcpid, rcpid, pid;
00945     word_posn_t wpos;
00946     size_t len;
00947 
00948     /* Play it safe - subparts must be shorter than phone_str */
00949     len = strlen(phone_str) + 1;
00950     /* Do one malloc to avoid fragmentation on WinCE (and yet, this
00951      * may still be too many). */
00952     ci = ckd_calloc(len * 4 + 1, 1);
00953     lc = ci + len;
00954     rc = lc + len;
00955     pc = rc + len;
00956 
00957     len = parse_triphone(phone_str, ci, lc, rc, pc);
00958     cipid = dict_ciphone_id(dict, ci);
00959     if (cipid < 0) {
00960         free(ci);
00961         return -1;
00962     }
00963     if (len > 1) {
00964         lcpid = dict_ciphone_id(dict, lc);
00965         rcpid = dict_ciphone_id(dict, rc);
00966         if (lcpid < 0 || rcpid < 0) {
00967             free(ci);
00968             return -1;
00969         }
00970         if (len == 4) {
00971             switch (*pc) {
00972             case 'b':
00973                 wpos = WORD_POSN_BEGIN;
00974                 break;
00975             case 'e':
00976                 wpos = WORD_POSN_END;
00977                 break;
00978             case 's':
00979                 wpos = WORD_POSN_SINGLE;
00980                 break;
00981             default:
00982                 wpos = WORD_POSN_INTERNAL;
00983             }
00984         }
00985         else {
00986             wpos = WORD_POSN_INTERNAL;
00987         }
00988         pid = bin_mdef_phone_id(dict->mdef, cipid, lcpid, rcpid, wpos);
00989     }
00990     else
00991         pid = cipid;
00992 
00993     free(ci);
00994     return pid;
00995 }
00996 
00997 static void
00998 buildEntryTable(dict_t *dict, glist_t list, uint16 *** table_p)
00999 {
01000     int32 i, j;
01001     char triphoneStr[128];
01002     int32 ciCount = bin_mdef_n_ciphone(dict->mdef);
01003     int32 silContext = 0;
01004     int32 triphoneContext = 0;
01005     int32 noContext = 0;
01006     uint16 **table;
01007     gnode_t *gn;
01008     int n;
01009 
01010     *table_p = ckd_calloc(glist_count(list), sizeof(**table_p));
01011     table = *table_p;
01012     n = glist_count(list);
01013     E_INFO("Entry Context table contains\n\t%6d entries\n", n);
01014     E_INFO("\t%6d possible cross word triphones.\n", n * ciCount);
01015 
01016     for (i = 0, gn = list; gn; gn = gnode_next(gn), ++i) {
01017         table[i] = ckd_calloc(ciCount, sizeof(**table));
01018         for (j = 0; j < ciCount; j++) {
01019             int32 phoneid;
01020             /*
01021              * Look for the triphone
01022              */
01023             sprintf(triphoneStr, (char *)gnode_ptr(gn),
01024                     bin_mdef_ciphone_str(dict->mdef, j));
01025             phoneid = triphone_to_id(dict, triphoneStr);
01026             if (phoneid >= 0)
01027                 triphoneContext++;
01028             /*
01029              * If we can't find the desired right context use "SIL"
01030              */
01031             if (phoneid < 0) {
01032                 sprintf(triphoneStr, (char *)gnode_ptr(gn), "SIL");
01033                 phoneid = triphone_to_id(dict, triphoneStr);
01034                 if (phoneid >= 0)
01035                     silContext++;
01036             }
01037             /*
01038              * If we can't find "SIL" use context indepedent
01039              */
01040             if (phoneid < 0) {
01041                 char stmp[32];
01042                 char *p;
01043                 strcpy(stmp, (char *)gnode_ptr(gn));
01044                 p = strchr(stmp, '(');
01045                 *p = '\0';
01046                 phoneid = triphone_to_id(dict, stmp);
01047                 noContext++;
01048             }
01049             table[i][j] = bin_mdef_pid2ssid(dict->mdef, phoneid);
01050         }
01051     }
01052     E_INFO("\t%6d triphones\n\t%6d pseudo diphones\n\t%6d uniphones\n",
01053            triphoneContext, silContext, noContext);
01054 }
01055 
01056 static int
01057 cmp(void const *a, void const *b)
01058 {
01059     return (*(int16 const *) a - *(int16 const *) b);
01060 }
01061 
01062 /* FIXME: Not re-entrant. */
01063 static uint16 *linkTable;
01064 
01065 static int
01066 cmpPT(void const *a, void const *b)
01067 {
01068     return (linkTable[*(int16 const *) a] - linkTable[*(int16 const *) b]);
01069 }
01070 
01071 static void
01072 buildExitTable(dict_t *dict, glist_t list,
01073                uint16 *** table_p,
01074                uint16 *** permuTab_p,
01075                uint16 ** sizeTab_p)
01076 {
01077     int32 i, j, k;
01078     char triphoneStr[128];
01079     int32 ciCount = bin_mdef_n_ciphone(dict->mdef);
01080     int32 silContext = 0;
01081     int32 triphoneContext = 0;
01082     int32 noContext = 0;
01083     int32 entries = 0;
01084     uint16 **table;
01085     uint16 **permuTab;
01086     uint16 *sizeTab;
01087     uint16 ptab[128];
01088     gnode_t *gn;
01089     int32 n;
01090 
01091     n = glist_count(list);
01092     *table_p = ckd_calloc_2d(n, ciCount + 1, sizeof(***table_p));
01093     table = *table_p;
01094     *permuTab_p = ckd_calloc_2d(n, ciCount + 1, sizeof(***permuTab_p));
01095     permuTab = *permuTab_p;
01096     *sizeTab_p = ckd_calloc(n, sizeof(**sizeTab_p));
01097     sizeTab = *sizeTab_p;
01098 
01099     E_INFO("Exit Context table contains\n\t%6d entries\n", n);
01100     E_INFO("\t%6d possible cross word triphones.\n", n * ciCount);
01101 
01102     for (i = 0, gn = list; gn; gn = gnode_next(gn), ++i) {
01103         for (j = 0; j < ciCount; j++) {
01104             int32 phoneid;
01105 
01106             /*
01107              * Look for the triphone
01108              */
01109             sprintf(triphoneStr, (char *)gnode_ptr(gn),
01110                     bin_mdef_ciphone_str(dict->mdef, j));
01111             phoneid = triphone_to_id(dict, triphoneStr);
01112             if (phoneid >= 0)
01113                 triphoneContext++;
01114             /*
01115              * If we can't find the desired context use "SIL"
01116              */
01117             if (phoneid < 0) {
01118                 sprintf(triphoneStr, (char *)gnode_ptr(gn), "SIL");
01119                 phoneid = triphone_to_id(dict, triphoneStr);
01120                 if (phoneid >= 0)
01121                     silContext++;
01122             }
01123             /*
01124              * If we can't find "SIL" use context indepedent
01125              */
01126             if (phoneid < 0) {
01127                 char stmp[32];
01128                 char *p;
01129                 strcpy(stmp, (char *)gnode_ptr(gn));
01130                 p = strchr(stmp, '(');
01131                 *p = '\0';
01132                 phoneid = triphone_to_id(dict, stmp);
01133                 noContext++;
01134             }
01135             table[i][j] = bin_mdef_pid2ssid(dict->mdef, phoneid);
01136         }
01137     }
01138     /*
01139      * Now compress the table to eliminate duplicate entries.
01140      */
01141     for (i = 0; i < n; ++i) {
01142         /*
01143          * Set up the permutation table
01144          */
01145         for (k = 0; k < ciCount; k++) {
01146             ptab[k] = k;
01147         }
01148         linkTable = table[i];
01149         qsort(ptab, ciCount, sizeof(*ptab), cmpPT);
01150 
01151         qsort(table[i], ciCount, sizeof(**table), cmp);
01152         for (k = 0, j = 0; j < ciCount; j++) {
01153             if (table[i][k] != table[i][j]) {
01154                 k = k + 1;
01155                 table[i][k] = table[i][j];
01156             }
01157             /*
01158              * Mirror the compression in the permutation table
01159              */
01160             permuTab[i][ptab[j]] = k;
01161         }
01162         table[i][k + 1] = 65535;   /* End of table Marker */
01163         sizeTab[i] = k + 1;
01164         entries += k + 1;
01165     }
01166     E_INFO("\t%6d triphones\n\t%6d pseudo diphones\n\t%6d uniphones\n",
01167            triphoneContext, silContext, noContext);
01168     E_INFO("\t%6d right context entries\n", entries);
01169     E_INFO("\t%6d ave entries per exit context\n",
01170            ((n == 0) ? 0 : entries / n));
01171 }
01172 
01173 int32
01174 dict_get_num_main_words(dict_t * dict)
01175 {
01176     /* FIXME FIXME: Relies on a particular ordering of the dictionary. */
01177     return dict_to_id(dict, "</s>");
01178 }
01179 
01180 int32
01181 dict_pron(dict_t * dict, int32 w, int32 ** pron)
01182 {
01183     *pron = dict->dict_list[w]->ci_phone_ids;
01184     return (dict->dict_list[w]->len);
01185 }
01186 
01187 int32
01188 dict_next_alt(dict_t * dict, int32 w)
01189 {
01190     return (dict->dict_list[w]->alt);
01191 }
01192 
01193 int32
01194 dict_is_filler_word(dict_t * dict, int32 wid)
01195 {
01196     return (wid >= dict->filler_start);
01197 }

Generated on Thu Jan 27 2011 for PocketSphinx by  doxygen 1.7.1