PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #ifndef _S3_DICT_H_ 00039 #define _S3_DICT_H_ 00040 00045 /* SphinxBase headers. */ 00046 #include <sphinxbase/hash_table.h> 00047 00048 /* Local headers. */ 00049 #include "s3types.h" 00050 #include "bin_mdef.h" 00051 #include "pocketsphinx_export.h" 00052 00053 #define S3DICT_INC_SZ 4096 00054 00055 #ifdef __cplusplus 00056 extern "C" { 00057 #endif 00058 #if 0 00059 } /* Fool Emacs into not indenting things. */ 00060 #endif 00061 00066 typedef struct { 00067 char *word; 00068 s3cipid_t *ciphone; 00069 int32 pronlen; 00070 s3wid_t alt; 00071 s3wid_t basewid; 00072 } dictword_t; 00073 00079 typedef struct { 00080 int refcnt; 00081 bin_mdef_t *mdef; 00082 dictword_t *word; 00083 hash_table_t *ht; 00084 int32 max_words; 00085 int32 n_word; 00086 int32 filler_start; 00087 int32 filler_end; 00088 s3wid_t startwid; 00089 s3wid_t finishwid; 00090 s3wid_t silwid; 00091 int nocase; 00092 } dict_t; 00093 00094 00106 dict_t *dict_init(cmd_ln_t *config, 00107 bin_mdef_t *mdef 00108 ); 00109 00113 int dict_write(dict_t *dict, char const *filename, char const *format); 00114 00116 POCKETSPHINX_EXPORT 00117 s3wid_t dict_wordid(dict_t *d, const char *word); 00118 00123 int dict_filler_word(dict_t *d, 00124 s3wid_t w 00125 ); 00126 00130 POCKETSPHINX_EXPORT 00131 int dict_real_word(dict_t *d, 00132 s3wid_t w 00133 ); 00134 00139 s3wid_t dict_add_word(dict_t *d, 00140 char const *word, 00141 s3cipid_t const *p, 00142 int32 np 00143 ); 00144 00148 const char *dict_ciphone_str(dict_t *d, 00149 s3wid_t wid, 00150 int32 pos 00151 ); 00152 00154 #define dict_size(d) ((d)->n_word) 00155 #define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d)) 00156 00161 #define dict_num_real_words(d) \ 00162 (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2) 00163 #define dict_basewid(d,w) ((d)->word[w].basewid) 00164 #define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word) 00165 #define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word) 00166 #define dict_nextalt(d,w) ((d)->word[w].alt) 00167 #define dict_pronlen(d,w) ((d)->word[w].pronlen) 00168 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p]) 00169 #define dict_filler_start(d) ((d)->filler_start) 00170 #define dict_filler_end(d) ((d)->filler_end) 00171 #define dict_startwid(d) ((d)->startwid) 00172 #define dict_finishwid(d) ((d)->finishwid) 00173 #define dict_silwid(d) ((d)->silwid) 00174 #define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1) 00175 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0]) 00176 #define dict_second_phone(d,w) ((d)->word[w].ciphone[1]) 00177 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2]) 00178 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1]) 00179 00180 /* Hard-coded special words */ 00181 #define S3_START_WORD "<s>" 00182 #define S3_FINISH_WORD "</s>" 00183 #define S3_SILENCE_WORD "<sil>" 00184 #define S3_UNKNOWN_WORD "<UNK>" 00185 00193 int32 dict_word2basestr(char *word); 00194 00198 dict_t *dict_retain(dict_t *d); 00199 00203 int dict_free(dict_t *d); 00204 00206 void dict_report(dict_t *d 00207 ); 00208 00209 #if 0 00210 { /* Stop indent from complaining */ 00211 #endif 00212 #ifdef __cplusplus 00213 } 00214 #endif 00215 00216 #endif