PocketSphinx  0.6
src/libpocketsphinx/dict.h
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 #ifndef _S3_DICT_H_
00039 #define _S3_DICT_H_
00040 
00045 /* SphinxBase headers. */
00046 #include <sphinxbase/hash_table.h>
00047 
00048 /* Local headers. */
00049 #include "s3types.h"
00050 #include "bin_mdef.h"
00051 #include "pocketsphinx_export.h"
00052 
00053 #define S3DICT_INC_SZ 4096
00054 
00055 #ifdef __cplusplus
00056 extern "C" {
00057 #endif
00058 #if 0
00059 } /* Fool Emacs into not indenting things. */
00060 #endif
00061 
00066 typedef struct {
00067     char *word;         
00068     s3cipid_t *ciphone; 
00069     int32 pronlen;      
00070     s3wid_t alt;        
00071     s3wid_t basewid;    
00072 } dictword_t;
00073 
00079 typedef struct {
00080     int refcnt;
00081     bin_mdef_t *mdef;   
00082     dictword_t *word;   
00083     hash_table_t *ht;   
00084     int32 max_words;    
00085     int32 n_word;       
00086     int32 filler_start; 
00087     int32 filler_end;   
00088     s3wid_t startwid;   
00089     s3wid_t finishwid;  
00090     s3wid_t silwid;     
00091     int nocase;
00092 } dict_t;
00093 
00094 
00106 dict_t *dict_init(cmd_ln_t *config, 
00107                   bin_mdef_t *mdef  
00108     );
00109 
00113 int dict_write(dict_t *dict, char const *filename, char const *format);
00114 
00116 POCKETSPHINX_EXPORT
00117 s3wid_t dict_wordid(dict_t *d, const char *word);
00118 
00123 int dict_filler_word(dict_t *d,  
00124                      s3wid_t w     
00125     );
00126 
00130 POCKETSPHINX_EXPORT
00131 int dict_real_word(dict_t *d,  
00132                    s3wid_t w     
00133     );
00134 
00139 s3wid_t dict_add_word(dict_t *d,          
00140                       char const *word,   
00141                       s3cipid_t const *p, 
00142                       int32 np            
00143     );
00144 
00148 const char *dict_ciphone_str(dict_t *d, 
00149                              s3wid_t wid,       
00150                              int32 pos          
00151     );
00152 
00154 #define dict_size(d)            ((d)->n_word)
00155 #define dict_num_fillers(d)   (dict_filler_end(d) - dict_filler_start(d))
00156 
00161 #define dict_num_real_words(d)                                          \
00162     (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
00163 #define dict_basewid(d,w)       ((d)->word[w].basewid)
00164 #define dict_wordstr(d,w)       ((w) < 0 ? NULL : (d)->word[w].word)
00165 #define dict_basestr(d,w)       ((d)->word[dict_basewid(d,w)].word)
00166 #define dict_nextalt(d,w)       ((d)->word[w].alt)
00167 #define dict_pronlen(d,w)       ((d)->word[w].pronlen) 
00168 #define dict_pron(d,w,p)        ((d)->word[w].ciphone[p]) 
00169 #define dict_filler_start(d)    ((d)->filler_start)
00170 #define dict_filler_end(d)      ((d)->filler_end)
00171 #define dict_startwid(d)        ((d)->startwid)
00172 #define dict_finishwid(d)       ((d)->finishwid)
00173 #define dict_silwid(d)          ((d)->silwid)
00174 #define dict_is_single_phone(d,w)       ((d)->word[w].pronlen == 1)
00175 #define dict_first_phone(d,w)   ((d)->word[w].ciphone[0])
00176 #define dict_second_phone(d,w)  ((d)->word[w].ciphone[1])
00177 #define dict_second_last_phone(d,w)     ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
00178 #define dict_last_phone(d,w)    ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
00179 
00180 /* Hard-coded special words */
00181 #define S3_START_WORD           "<s>"
00182 #define S3_FINISH_WORD          "</s>"
00183 #define S3_SILENCE_WORD         "<sil>"
00184 #define S3_UNKNOWN_WORD         "<UNK>"
00185 
00193 int32 dict_word2basestr(char *word);
00194 
00198 dict_t *dict_retain(dict_t *d);
00199 
00203 int dict_free(dict_t *d);
00204 
00206 void dict_report(dict_t *d 
00207     );
00208 
00209 #if 0
00210 { /* Stop indent from complaining */
00211 #endif
00212 #ifdef __cplusplus
00213 }
00214 #endif
00215 
00216 #endif