SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * feat.h -- Cepstral features computation. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.17 2006/02/23 03:59:40 arthchan2003 00053 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc. 00054 * 00055 * Revision 1.16.4.1 2005/07/05 06:25:08 arthchan2003 00056 * Fixed dox-doc. 00057 * 00058 * Revision 1.16 2005/06/22 03:29:35 arthchan2003 00059 * Makefile.am s for all subdirectory of libs3decoder/ 00060 * 00061 * Revision 1.5 2005/06/13 04:02:56 archan 00062 * Fixed most doxygen-style documentation under libs3decoder. 00063 * 00064 * Revision 1.4 2005/04/21 23:50:26 archan 00065 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00066 * 00067 * Revision 1.3 2005/03/30 01:22:46 archan 00068 * Fixed mistakes in last updates. Add 00069 * 00070 * 00071 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) 00072 * Adding feat_free() to free allocated memory 00073 * 00074 * 04-Jan-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00075 * Started. 00076 */ 00077 00078 00079 #ifndef _S3_FEAT_H_ 00080 #define _S3_FEAT_H_ 00081 00082 #include <stdio.h> 00083 00084 /* Win32/WinCE DLL gunk */ 00085 #include <sphinxbase/sphinxbase_export.h> 00086 #include <sphinxbase/prim_type.h> 00087 #include <sphinxbase/fe.h> 00088 #include <sphinxbase/cmn.h> 00089 #include <sphinxbase/agc.h> 00090 00091 #ifdef __cplusplus 00092 extern "C" { 00093 #endif 00094 #if 0 00095 /* Fool Emacs. */ 00096 } 00097 #endif 00098 00102 #define LIVEBUFBLOCKSIZE 256 00104 #define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */ 00105 00106 #define cepstral_to_feature_command_line_macro() \ 00107 { "-feat", \ 00108 ARG_STRING, \ 00109 "1s_c_d_dd", \ 00110 "Feature stream type, depends on the acoustic model" }, \ 00111 { "-ceplen", \ 00112 ARG_INT32, \ 00113 "13", \ 00114 "Number of components in the input feature vector" }, \ 00115 { "-cmn", \ 00116 ARG_STRING, \ 00117 "current", \ 00118 "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \ 00119 { "-cmninit", \ 00120 ARG_STRING, \ 00121 "8.0", \ 00122 "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \ 00123 { "-varnorm", \ 00124 ARG_BOOLEAN, \ 00125 "no", \ 00126 "Variance normalize each utterance (only if CMN == current)" }, \ 00127 { "-agc", \ 00128 ARG_STRING, \ 00129 "none", \ 00130 "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \ 00131 { "-agcthresh", \ 00132 ARG_FLOAT32, \ 00133 "2.0", \ 00134 "Initial threshold for automatic gain control" }, \ 00135 { "-lda", \ 00136 ARG_STRING, \ 00137 NULL, \ 00138 "File containing transformation matrix to be applied to features (single-stream features only)" }, \ 00139 { "-ldadim", \ 00140 ARG_INT32, \ 00141 "0", \ 00142 "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \ 00143 {"-svspec", \ 00144 ARG_STRING, \ 00145 NULL, \ 00146 "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"} 00147 00155 typedef struct feat_s { 00156 int refcount; 00157 char *name; 00158 int32 cepsize; 00159 int32 n_stream; 00160 int32 *stream_len; 00161 int32 window_size; 00163 int32 n_sv; 00164 int32 *sv_len; 00165 int32 **subvecs; 00166 mfcc_t *sv_buf; 00167 int32 sv_dim; 00169 cmn_type_t cmn; 00170 int32 varnorm; 00172 agc_type_t agc; 00186 void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat); 00187 cmn_t *cmn_struct; 00189 agc_t *agc_struct; 00192 mfcc_t **cepbuf; 00193 mfcc_t **tmpcepbuf; 00194 int32 bufpos; 00195 int32 curpos; 00197 mfcc_t ***lda; 00198 uint32 n_lda; 00199 uint32 out_dim; 00200 } feat_t; 00201 00205 #define feat_name(f) ((f)->name) 00206 00209 #define feat_cepsize(f) ((f)->cepsize) 00210 00213 #define feat_window_size(f) ((f)->window_size) 00214 00219 #define feat_n_stream(f) ((f)->n_stream) 00220 00225 #define feat_stream_len(f,i) ((f)->stream_len[i]) 00226 00229 #define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream) 00230 00233 #define feat_dimension2(f,i) ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i]) 00234 00237 #define feat_dimension(f) ((f)->out_dim) 00238 00241 #define feat_stream_lengths(f) ((f)->sv_len ? (f)->sv_len : f->stream_len) 00242 00265 SPHINXBASE_EXPORT 00266 int32 **parse_subvecs(char const *str); 00267 00271 SPHINXBASE_EXPORT 00272 void subvecs_free(int32 **subvecs); 00273 00274 00287 SPHINXBASE_EXPORT 00288 mfcc_t ***feat_array_alloc(feat_t *fcb, 00290 int32 nfr 00291 ); 00292 00296 SPHINXBASE_EXPORT 00297 void feat_array_free(mfcc_t ***feat); 00298 00299 00315 SPHINXBASE_EXPORT 00316 feat_t *feat_init(char const *type, 00317 cmn_type_t cmn, 00320 int32 varnorm, 00323 agc_type_t agc, 00325 int32 breport, 00326 int32 cepsize 00329 ); 00330 00335 SPHINXBASE_EXPORT 00336 int32 feat_read_lda(feat_t *feat, 00337 const char *ldafile, 00338 int32 dim 00339 ); 00340 00344 SPHINXBASE_EXPORT 00345 void feat_lda_transform(feat_t *fcb, 00346 mfcc_t ***inout_feat, 00347 uint32 nfr 00348 ); 00349 00368 SPHINXBASE_EXPORT 00369 int feat_set_subvecs(feat_t *fcb, int32 **subvecs); 00370 00374 SPHINXBASE_EXPORT 00375 void feat_print(feat_t *fcb, 00376 mfcc_t ***feat, 00377 int32 nfr, 00378 FILE *fp 00379 ); 00380 00381 00398 SPHINXBASE_EXPORT 00399 int32 feat_s2mfc2feat(feat_t *fcb, 00400 const char *file, 00401 const char *dir, 00403 const char *cepext, 00406 int32 sf, int32 ef, /* Start/End frames 00407 within file to be read. Use 00408 0,-1 to process entire 00409 file */ 00410 mfcc_t ***feat, 00412 int32 maxfr 00416 ); 00417 00418 00447 SPHINXBASE_EXPORT 00448 int32 feat_s2mfc2feat_live(feat_t *fcb, 00449 mfcc_t **uttcep, 00450 int32 *inout_ncep, 00452 int32 beginutt, 00453 int32 endutt, 00454 mfcc_t ***ofeat 00457 ); 00458 00459 00465 SPHINXBASE_EXPORT 00466 feat_t *feat_retain(feat_t *f); 00467 00473 SPHINXBASE_EXPORT 00474 int feat_free(feat_t *f 00475 ); 00476 00480 SPHINXBASE_EXPORT 00481 void feat_report(feat_t *f 00482 ); 00483 #ifdef __cplusplus 00484 } 00485 #endif 00486 00487 00488 #endif