PocketSphinx  0.6
include/cmdln_macro.h
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2006 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* cmdln_macro.h - Command line definitions for PocketSphinx */
00039 
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042 
00043 #include <sphinxbase/cmd_ln.h>
00044 #include <sphinxbase/feat.h>
00045 #include <sphinxbase/fe.h>
00046 
00048 #define POCKETSPHINX_OPTIONS \
00049     waveform_to_cepstral_command_line_macro(), \
00050     cepstral_to_feature_command_line_macro(), \
00051     POCKETSPHINX_ACMOD_OPTIONS, \
00052         POCKETSPHINX_BEAM_OPTIONS,   \
00053         POCKETSPHINX_SEARCH_OPTIONS, \
00054         POCKETSPHINX_DICT_OPTIONS, \
00055         POCKETSPHINX_NGRAM_OPTIONS, \
00056         POCKETSPHINX_FSG_OPTIONS, \
00057         POCKETSPHINX_DEBUG_OPTIONS
00058 
00060 #define POCKETSPHINX_DEBUG_OPTIONS                      \
00061     { "-logfn",                                         \
00062             ARG_STRING,                                 \
00063             NULL,                                       \
00064             "File to write log messages in"             \
00065      },                                                 \
00066     { "-debug",                                         \
00067             ARG_INT32,                                  \
00068             NULL,                                       \
00069             "Verbosity level for debugging messages"    \
00070      },                                                 \
00071      { "-mfclogdir",                                    \
00072              ARG_STRING,                                \
00073              NULL,                                      \
00074              "Directory to log feature files to"        \
00075              },                                         \
00076     { "-rawlogdir",                                     \
00077             ARG_STRING,                                 \
00078             NULL,                                       \
00079             "Directory to log raw audio files to" },    \
00080      { "-senlogdir",                                    \
00081              ARG_STRING,                                \
00082              NULL,                                      \
00083              "Directory to log senone score files to"   \
00084              }
00085 
00087 #define POCKETSPHINX_BEAM_OPTIONS                                       \
00088 { "-beam",                                                              \
00089       ARG_FLOAT64,                                                      \
00090       "1e-48",                                                          \
00091       "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00092 { "-wbeam",                                                             \
00093       ARG_FLOAT64,                                                      \
00094       "7e-29",                                                          \
00095       "Beam width applied to word exits" },                             \
00096 { "-pbeam",                                                             \
00097       ARG_FLOAT64,                                                      \
00098       "1e-48",                                                          \
00099       "Beam width applied to phone transitions" },                      \
00100 { "-lpbeam",                                                            \
00101       ARG_FLOAT64,                                                      \
00102       "1e-40",                                                          \
00103       "Beam width applied to last phone in words" },                    \
00104 { "-lponlybeam",                                                        \
00105       ARG_FLOAT64,                                                      \
00106       "7e-29",                                                          \
00107       "Beam width applied to last phone in single-phone words" },       \
00108 { "-fwdflatbeam",                                                       \
00109       ARG_FLOAT64,                                                      \
00110       "1e-64",                                                          \
00111       "Beam width applied to every frame in second-pass flat search" }, \
00112 { "-fwdflatwbeam",                                                      \
00113       ARG_FLOAT64,                                                      \
00114       "7e-29",                                                          \
00115       "Beam width applied to word exits in second-pass flat search" },  \
00116 { "-pl_window",                                                         \
00117       ARG_INT32,                                                        \
00118       "0",                                                              \
00119       "Phoneme lookahead window size, in frames" },                     \
00120 { "-pl_beam",                                                           \
00121       ARG_FLOAT64,                                                      \
00122       "1e-10",                                                          \
00123       "Beam width applied to phone loop search for lookahead" },        \
00124 { "-pl_pbeam",                                                          \
00125       ARG_FLOAT64,                                                      \
00126       "1e-5",                                                           \
00127       "Beam width applied to phone loop transitions for lookahead" }
00128 
00130 #define POCKETSPHINX_SEARCH_OPTIONS \
00131 { "-compallsen",                                                                                \
00132       ARG_BOOLEAN,                                                                              \
00133       "no",                                                                                     \
00134       "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00135 { "-fwdtree",                                                                                   \
00136       ARG_BOOLEAN,                                                                              \
00137       "yes",                                                                                    \
00138       "Run forward lexicon-tree search (1st pass)" },                                           \
00139 { "-fwdflat",                                                                                   \
00140       ARG_BOOLEAN,                                                                              \
00141       "yes",                                                                                    \
00142       "Run forward flat-lexicon search over word lattice (2nd pass)" },                         \
00143 { "-bestpath",                                                                                  \
00144       ARG_BOOLEAN,                                                                              \
00145       "yes",                                                                                    \
00146       "Run bestpath (Dijkstra) search over word lattice (3rd pass)" },                          \
00147 { "-backtrace",                                                                                 \
00148       ARG_BOOLEAN,                                                                              \
00149       "no",                                                                                     \
00150       "Print results and backtraces to log file." },                                            \
00151 { "-latsize",                                                                                   \
00152       ARG_INT32,                                                                                \
00153       "5000",                                                                                   \
00154       "Initial backpointer table size" },                                                       \
00155 { "-maxwpf",                                                                                    \
00156       ARG_INT32,                                                                                \
00157       "-1",                                                                                     \
00158       "Maximum number of distinct word exits at each frame (or -1 for no pruning)" },           \
00159 { "-maxhmmpf",                                                                                  \
00160       ARG_INT32,                                                                                \
00161       "-1",                                                                                     \
00162       "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" },       \
00163 { "-min_endfr",                                                                                 \
00164       ARG_INT32,                                                                                \
00165       "0",                                                                                      \
00166       "Nodes ignored in lattice construction if they persist for fewer than N frames" },        \
00167 { "-fwdflatefwid",                                                                              \
00168       ARG_INT32,                                                                                \
00169       "4",                                                                                      \
00170       "Minimum number of end frames for a word to be searched in fwdflat search" },             \
00171 { "-fwdflatsfwin",                                                                              \
00172       ARG_INT32,                                                                                \
00173       "25",                                                                                     \
00174       "Window of frames in lattice to search for successor words in fwdflat search " }
00175 
00177 #define POCKETSPHINX_FSG_OPTIONS \
00178     { "-fsg",                                                   \
00179             ARG_STRING,                                         \
00180             NULL,                                               \
00181             "Sphinx format finite state grammar file"},         \
00182 { "-jsgf",                                                      \
00183         ARG_STRING,                                             \
00184         NULL,                                                   \
00185         "JSGF grammar file" },                                  \
00186 { "-toprule",                                                   \
00187         ARG_STRING,                                             \
00188         NULL,                                                   \
00189         "Start rule for JSGF (first public rule is default)" }, \
00190 { "-fsgusealtpron",                                             \
00191         ARG_BOOLEAN,                                            \
00192         "yes",                                                  \
00193         "Add alternate pronunciations to FSG"},                 \
00194 { "-fsgusefiller",                                              \
00195         ARG_BOOLEAN,                                            \
00196         "yes",                                                  \
00197         "Insert filler words at each state."}
00198 
00200 #define POCKETSPHINX_NGRAM_OPTIONS \
00201 { "-lm",                                                                                \
00202       ARG_STRING,                                                                       \
00203       NULL,                                                                             \
00204       "Word trigram language model input file" },                                       \
00205 { "-lmctl",                                                                             \
00206       ARG_STRING,                                                                       \
00207       NULL,                                                                             \
00208       "Specify a set of language model\n"},                                             \
00209 { "-lmname",                                                                            \
00210       ARG_STRING,                                                                       \
00211       "default",                                                                        \
00212       "Which language model in -lmctl to use by default"},                              \
00213 { "-lw",                                                                                \
00214       ARG_FLOAT32,                                                                      \
00215       "6.5",                                                                            \
00216       "Language model probability weight" },                                            \
00217 { "-fwdflatlw",                                                                         \
00218       ARG_FLOAT32,                                                                      \
00219       "8.5",                                                                            \
00220       "Language model probability weight for flat lexicon (2nd pass) decoding" },       \
00221 { "-bestpathlw",                                                                        \
00222       ARG_FLOAT32,                                                                      \
00223       "9.5",                                                                            \
00224       "Language model probability weight for bestpath search" },                        \
00225 { "-ascale",                                                                            \
00226       ARG_FLOAT32,                                                                      \
00227       "20.0",                                                                           \
00228       "Inverse of acoustic model scale for confidence score calculation" },             \
00229 { "-wip",                                                                               \
00230       ARG_FLOAT32,                                                                      \
00231       "0.65",                                                                           \
00232       "Word insertion penalty" },                                                       \
00233 { "-nwpen",                                                                             \
00234       ARG_FLOAT32,                                                                      \
00235       "1.0",                                                                            \
00236       "New word transition penalty" },                                                  \
00237 { "-pip",                                                                               \
00238       ARG_FLOAT32,                                                                      \
00239       "1.0",                                                                            \
00240       "Phone insertion penalty" },                                                      \
00241 { "-uw",                                                                                \
00242       ARG_FLOAT32,                                                                      \
00243       "1.0",                                                                            \
00244       "Unigram weight" },                                                               \
00245 { "-silprob",                                                                           \
00246       ARG_FLOAT32,                                                                      \
00247       "0.005",                                                                          \
00248       "Silence word transition probability" },                                          \
00249 { "-fillprob",                                                                          \
00250       ARG_FLOAT32,                                                                      \
00251       "1e-8",                                                                           \
00252         "Filler word transition probability" }, \
00253 { "-bghist",   \
00254       ARG_BOOLEAN, \
00255       "no", \
00256       "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
00257 { "-lextreedump", \
00258       ARG_INT32, \
00259       "0", \
00260       "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
00261 
00263 #define POCKETSPHINX_DICT_OPTIONS \
00264     { "-dict",                                                  \
00265       REQARG_STRING,                                            \
00266       NULL,                                                     \
00267       "Main pronunciation dictionary (lexicon) input file" },   \
00268     { "-fdict",                                                 \
00269       ARG_STRING,                                               \
00270       NULL,                                                     \
00271       "Noise word pronunciation dictionary input file" },       \
00272     { "-dictcase",                                              \
00273       ARG_BOOLEAN,                                              \
00274       "no",                                                     \
00275       "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" },     \
00276     { "-maxnewoov",                                             \
00277       ARG_INT32,                                                \
00278       "20",                                                     \
00279       "Maximum new OOVs that can be added at run time" },       \
00280     { "-usewdphones",                                           \
00281       ARG_BOOLEAN,                                              \
00282       "no",                                                     \
00283       "Use within-word phones only" }
00284 
00286 #define POCKETSPHINX_ACMOD_OPTIONS \
00287 { "-hmm",                                                                       \
00288       ARG_STRING,                                                               \
00289       NULL,                                                                     \
00290       "Directory containing acoustic model files."},                            \
00291 { "-featparams",                                                                \
00292       ARG_STRING,                                                               \
00293       NULL,                                                                     \
00294       "File containing feature extraction parameters."},                        \
00295 { "-mdef",                                                                      \
00296       ARG_STRING,                                                               \
00297       NULL,                                                                     \
00298       "Model definition input file" },                                          \
00299 { "-senmgau", \
00300       ARG_STRING,                                                               \
00301       NULL,                                                                     \
00302       "Senone to codebook mapping input file (usually not needed)" }, \
00303 { "-tmat",                                                                      \
00304       ARG_STRING,                                                               \
00305       NULL,                                                                     \
00306       "HMM state transition matrix input file" },                               \
00307 { "-tmatfloor",                                                                 \
00308       ARG_FLOAT32,                                                              \
00309       "0.0001",                                                                 \
00310       "HMM state transition probability floor (applied to -tmat file)" },       \
00311 { "-mean",                                                                      \
00312       ARG_STRING,                                                               \
00313       NULL,                                                                     \
00314       "Mixture gaussian means input file" },                                    \
00315 { "-var",                                                                       \
00316       ARG_STRING,                                                               \
00317       NULL,                                                                     \
00318       "Mixture gaussian variances input file" },                                \
00319 { "-varfloor",                                                                  \
00320       ARG_FLOAT32,                                                              \
00321       "0.0001",                                                                 \
00322       "Mixture gaussian variance floor (applied to data from -var file)" },     \
00323 { "-mixw",                                                                      \
00324       ARG_STRING,                                                               \
00325       NULL,                                                                     \
00326       "Senone mixture weights input file (uncompressed)" },                     \
00327 { "-mixwfloor",                                                                 \
00328       ARG_FLOAT32,                                                              \
00329       "0.0000001",                                                              \
00330       "Senone mixture weights floor (applied to data from -mixw file)" },       \
00331 { "-aw",                                                                \
00332     ARG_INT32,                                                          \
00333     "1", \
00334         "Inverse weight applied to acoustic scores." },                 \
00335 { "-sendump",                                                                   \
00336       ARG_STRING,                                                               \
00337       NULL,                                                                     \
00338       "Senone dump (compressed mixture weights) input file" },                  \
00339 { "-mllr",                                                                      \
00340       ARG_STRING,                                                               \
00341       NULL,                                                                     \
00342       "MLLR transformation to apply to means and variances" },                  \
00343 { "-mmap",                                                                      \
00344       ARG_BOOLEAN,                                                              \
00345       "yes",                                                                    \
00346       "Use memory-mapped I/O (if possible) for model files" },                  \
00347 { "-ds",                                                                        \
00348       ARG_INT32,                                                                \
00349       "1",                                                                      \
00350       "Frame GMM computation downsampling ratio" },                             \
00351 { "-topn",                                                                      \
00352       ARG_INT32,                                                                \
00353       "4",                                                                      \
00354       "Maximum number of top Gaussians to use in scoring." },                   \
00355 { "-topn_beam",                                                                 \
00356       ARG_STRING,                                                               \
00357       "0",                                                                     \
00358       "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
00359 { "-kdtree",                                                                    \
00360       ARG_STRING,                                                               \
00361       NULL,                                                                     \
00362       "kd-Tree file for Gaussian selection" },                                  \
00363 { "-kdmaxdepth",                                                                \
00364       ARG_INT32,                                                                \
00365       "0",                                                                      \
00366       "Maximum depth of kd-Trees to use" },                                     \
00367 { "-kdmaxbbi",                                                                  \
00368       ARG_INT32,                                                                \
00369       "-1",                                                                     \
00370       "Maximum number of Gaussians per leaf node in kd-Trees" },                \
00371 { "-logbase",                                                                   \
00372       ARG_FLOAT32,                                                              \
00373       "1.0001",                                                                 \
00374       "Base in which all log-likelihoods calculated" }
00375 
00376 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00377 
00378 #endif /* __PS_CMDLN_MACRO_H__ */