PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2006 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* cmdln_macro.h - Command line definitions for PocketSphinx */ 00039 00040 #ifndef __PS_CMDLN_MACRO_H__ 00041 #define __PS_CMDLN_MACRO_H__ 00042 00043 #include <sphinxbase/cmd_ln.h> 00044 #include <sphinxbase/feat.h> 00045 #include <sphinxbase/fe.h> 00046 00048 #define POCKETSPHINX_OPTIONS \ 00049 waveform_to_cepstral_command_line_macro(), \ 00050 cepstral_to_feature_command_line_macro(), \ 00051 POCKETSPHINX_ACMOD_OPTIONS, \ 00052 POCKETSPHINX_BEAM_OPTIONS, \ 00053 POCKETSPHINX_SEARCH_OPTIONS, \ 00054 POCKETSPHINX_DICT_OPTIONS, \ 00055 POCKETSPHINX_NGRAM_OPTIONS, \ 00056 POCKETSPHINX_FSG_OPTIONS, \ 00057 POCKETSPHINX_DEBUG_OPTIONS 00058 00060 #define POCKETSPHINX_DEBUG_OPTIONS \ 00061 { "-logfn", \ 00062 ARG_STRING, \ 00063 NULL, \ 00064 "File to write log messages in" \ 00065 }, \ 00066 { "-debug", \ 00067 ARG_INT32, \ 00068 NULL, \ 00069 "Verbosity level for debugging messages" \ 00070 }, \ 00071 { "-mfclogdir", \ 00072 ARG_STRING, \ 00073 NULL, \ 00074 "Directory to log feature files to" \ 00075 }, \ 00076 { "-rawlogdir", \ 00077 ARG_STRING, \ 00078 NULL, \ 00079 "Directory to log raw audio files to" }, \ 00080 { "-senlogdir", \ 00081 ARG_STRING, \ 00082 NULL, \ 00083 "Directory to log senone score files to" \ 00084 } 00085 00087 #define POCKETSPHINX_BEAM_OPTIONS \ 00088 { "-beam", \ 00089 ARG_FLOAT64, \ 00090 "1e-48", \ 00091 "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \ 00092 { "-wbeam", \ 00093 ARG_FLOAT64, \ 00094 "7e-29", \ 00095 "Beam width applied to word exits" }, \ 00096 { "-pbeam", \ 00097 ARG_FLOAT64, \ 00098 "1e-48", \ 00099 "Beam width applied to phone transitions" }, \ 00100 { "-lpbeam", \ 00101 ARG_FLOAT64, \ 00102 "1e-40", \ 00103 "Beam width applied to last phone in words" }, \ 00104 { "-lponlybeam", \ 00105 ARG_FLOAT64, \ 00106 "7e-29", \ 00107 "Beam width applied to last phone in single-phone words" }, \ 00108 { "-fwdflatbeam", \ 00109 ARG_FLOAT64, \ 00110 "1e-64", \ 00111 "Beam width applied to every frame in second-pass flat search" }, \ 00112 { "-fwdflatwbeam", \ 00113 ARG_FLOAT64, \ 00114 "7e-29", \ 00115 "Beam width applied to word exits in second-pass flat search" }, \ 00116 { "-pl_window", \ 00117 ARG_INT32, \ 00118 "0", \ 00119 "Phoneme lookahead window size, in frames" }, \ 00120 { "-pl_beam", \ 00121 ARG_FLOAT64, \ 00122 "1e-10", \ 00123 "Beam width applied to phone loop search for lookahead" }, \ 00124 { "-pl_pbeam", \ 00125 ARG_FLOAT64, \ 00126 "1e-5", \ 00127 "Beam width applied to phone loop transitions for lookahead" } 00128 00130 #define POCKETSPHINX_SEARCH_OPTIONS \ 00131 { "-compallsen", \ 00132 ARG_BOOLEAN, \ 00133 "no", \ 00134 "Compute all senone scores in every frame (can be faster when there are many senones)" }, \ 00135 { "-fwdtree", \ 00136 ARG_BOOLEAN, \ 00137 "yes", \ 00138 "Run forward lexicon-tree search (1st pass)" }, \ 00139 { "-fwdflat", \ 00140 ARG_BOOLEAN, \ 00141 "yes", \ 00142 "Run forward flat-lexicon search over word lattice (2nd pass)" }, \ 00143 { "-bestpath", \ 00144 ARG_BOOLEAN, \ 00145 "yes", \ 00146 "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \ 00147 { "-backtrace", \ 00148 ARG_BOOLEAN, \ 00149 "no", \ 00150 "Print results and backtraces to log file." }, \ 00151 { "-latsize", \ 00152 ARG_INT32, \ 00153 "5000", \ 00154 "Initial backpointer table size" }, \ 00155 { "-maxwpf", \ 00156 ARG_INT32, \ 00157 "-1", \ 00158 "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \ 00159 { "-maxhmmpf", \ 00160 ARG_INT32, \ 00161 "-1", \ 00162 "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \ 00163 { "-min_endfr", \ 00164 ARG_INT32, \ 00165 "0", \ 00166 "Nodes ignored in lattice construction if they persist for fewer than N frames" }, \ 00167 { "-fwdflatefwid", \ 00168 ARG_INT32, \ 00169 "4", \ 00170 "Minimum number of end frames for a word to be searched in fwdflat search" }, \ 00171 { "-fwdflatsfwin", \ 00172 ARG_INT32, \ 00173 "25", \ 00174 "Window of frames in lattice to search for successor words in fwdflat search " } 00175 00177 #define POCKETSPHINX_FSG_OPTIONS \ 00178 { "-fsg", \ 00179 ARG_STRING, \ 00180 NULL, \ 00181 "Sphinx format finite state grammar file"}, \ 00182 { "-jsgf", \ 00183 ARG_STRING, \ 00184 NULL, \ 00185 "JSGF grammar file" }, \ 00186 { "-toprule", \ 00187 ARG_STRING, \ 00188 NULL, \ 00189 "Start rule for JSGF (first public rule is default)" }, \ 00190 { "-fsgusealtpron", \ 00191 ARG_BOOLEAN, \ 00192 "yes", \ 00193 "Add alternate pronunciations to FSG"}, \ 00194 { "-fsgusefiller", \ 00195 ARG_BOOLEAN, \ 00196 "yes", \ 00197 "Insert filler words at each state."} 00198 00200 #define POCKETSPHINX_NGRAM_OPTIONS \ 00201 { "-lm", \ 00202 ARG_STRING, \ 00203 NULL, \ 00204 "Word trigram language model input file" }, \ 00205 { "-lmctl", \ 00206 ARG_STRING, \ 00207 NULL, \ 00208 "Specify a set of language model\n"}, \ 00209 { "-lmname", \ 00210 ARG_STRING, \ 00211 "default", \ 00212 "Which language model in -lmctl to use by default"}, \ 00213 { "-lw", \ 00214 ARG_FLOAT32, \ 00215 "6.5", \ 00216 "Language model probability weight" }, \ 00217 { "-fwdflatlw", \ 00218 ARG_FLOAT32, \ 00219 "8.5", \ 00220 "Language model probability weight for flat lexicon (2nd pass) decoding" }, \ 00221 { "-bestpathlw", \ 00222 ARG_FLOAT32, \ 00223 "9.5", \ 00224 "Language model probability weight for bestpath search" }, \ 00225 { "-ascale", \ 00226 ARG_FLOAT32, \ 00227 "20.0", \ 00228 "Inverse of acoustic model scale for confidence score calculation" }, \ 00229 { "-wip", \ 00230 ARG_FLOAT32, \ 00231 "0.65", \ 00232 "Word insertion penalty" }, \ 00233 { "-nwpen", \ 00234 ARG_FLOAT32, \ 00235 "1.0", \ 00236 "New word transition penalty" }, \ 00237 { "-pip", \ 00238 ARG_FLOAT32, \ 00239 "1.0", \ 00240 "Phone insertion penalty" }, \ 00241 { "-uw", \ 00242 ARG_FLOAT32, \ 00243 "1.0", \ 00244 "Unigram weight" }, \ 00245 { "-silprob", \ 00246 ARG_FLOAT32, \ 00247 "0.005", \ 00248 "Silence word transition probability" }, \ 00249 { "-fillprob", \ 00250 ARG_FLOAT32, \ 00251 "1e-8", \ 00252 "Filler word transition probability" }, \ 00253 { "-bghist", \ 00254 ARG_BOOLEAN, \ 00255 "no", \ 00256 "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \ 00257 { "-lextreedump", \ 00258 ARG_INT32, \ 00259 "0", \ 00260 "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" } 00261 00263 #define POCKETSPHINX_DICT_OPTIONS \ 00264 { "-dict", \ 00265 REQARG_STRING, \ 00266 NULL, \ 00267 "Main pronunciation dictionary (lexicon) input file" }, \ 00268 { "-fdict", \ 00269 ARG_STRING, \ 00270 NULL, \ 00271 "Noise word pronunciation dictionary input file" }, \ 00272 { "-dictcase", \ 00273 ARG_BOOLEAN, \ 00274 "no", \ 00275 "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" }, \ 00276 { "-maxnewoov", \ 00277 ARG_INT32, \ 00278 "20", \ 00279 "Maximum new OOVs that can be added at run time" }, \ 00280 { "-usewdphones", \ 00281 ARG_BOOLEAN, \ 00282 "no", \ 00283 "Use within-word phones only" } 00284 00286 #define POCKETSPHINX_ACMOD_OPTIONS \ 00287 { "-hmm", \ 00288 ARG_STRING, \ 00289 NULL, \ 00290 "Directory containing acoustic model files."}, \ 00291 { "-featparams", \ 00292 ARG_STRING, \ 00293 NULL, \ 00294 "File containing feature extraction parameters."}, \ 00295 { "-mdef", \ 00296 ARG_STRING, \ 00297 NULL, \ 00298 "Model definition input file" }, \ 00299 { "-senmgau", \ 00300 ARG_STRING, \ 00301 NULL, \ 00302 "Senone to codebook mapping input file (usually not needed)" }, \ 00303 { "-tmat", \ 00304 ARG_STRING, \ 00305 NULL, \ 00306 "HMM state transition matrix input file" }, \ 00307 { "-tmatfloor", \ 00308 ARG_FLOAT32, \ 00309 "0.0001", \ 00310 "HMM state transition probability floor (applied to -tmat file)" }, \ 00311 { "-mean", \ 00312 ARG_STRING, \ 00313 NULL, \ 00314 "Mixture gaussian means input file" }, \ 00315 { "-var", \ 00316 ARG_STRING, \ 00317 NULL, \ 00318 "Mixture gaussian variances input file" }, \ 00319 { "-varfloor", \ 00320 ARG_FLOAT32, \ 00321 "0.0001", \ 00322 "Mixture gaussian variance floor (applied to data from -var file)" }, \ 00323 { "-mixw", \ 00324 ARG_STRING, \ 00325 NULL, \ 00326 "Senone mixture weights input file (uncompressed)" }, \ 00327 { "-mixwfloor", \ 00328 ARG_FLOAT32, \ 00329 "0.0000001", \ 00330 "Senone mixture weights floor (applied to data from -mixw file)" }, \ 00331 { "-aw", \ 00332 ARG_INT32, \ 00333 "1", \ 00334 "Inverse weight applied to acoustic scores." }, \ 00335 { "-sendump", \ 00336 ARG_STRING, \ 00337 NULL, \ 00338 "Senone dump (compressed mixture weights) input file" }, \ 00339 { "-mllr", \ 00340 ARG_STRING, \ 00341 NULL, \ 00342 "MLLR transformation to apply to means and variances" }, \ 00343 { "-mmap", \ 00344 ARG_BOOLEAN, \ 00345 "yes", \ 00346 "Use memory-mapped I/O (if possible) for model files" }, \ 00347 { "-ds", \ 00348 ARG_INT32, \ 00349 "1", \ 00350 "Frame GMM computation downsampling ratio" }, \ 00351 { "-topn", \ 00352 ARG_INT32, \ 00353 "4", \ 00354 "Maximum number of top Gaussians to use in scoring." }, \ 00355 { "-topn_beam", \ 00356 ARG_STRING, \ 00357 "0", \ 00358 "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\ 00359 { "-kdtree", \ 00360 ARG_STRING, \ 00361 NULL, \ 00362 "kd-Tree file for Gaussian selection" }, \ 00363 { "-kdmaxdepth", \ 00364 ARG_INT32, \ 00365 "0", \ 00366 "Maximum depth of kd-Trees to use" }, \ 00367 { "-kdmaxbbi", \ 00368 ARG_INT32, \ 00369 "-1", \ 00370 "Maximum number of Gaussians per leaf node in kd-Trees" }, \ 00371 { "-logbase", \ 00372 ARG_FLOAT32, \ 00373 "1.0001", \ 00374 "Base in which all log-likelihoods calculated" } 00375 00376 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL } 00377 00378 #endif /* __PS_CMDLN_MACRO_H__ */