SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2001 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * cont_ad.h -- Continuous A/D listening and silence filtering module. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * 00049 * 13-Jul-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00050 * Added spf and adbufsize to cont_ad_t in order to support variable 00051 * frame sizes depending on audio sampling rate. 00052 * 00053 * 30-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00054 * Added FILE* argument to cont_ad_powhist_dump(). 00055 * 00056 * 16-Jan-98 Paul Placeway (pwp@cs.cmu.edu) at Carnegie Mellon University 00057 * Changed to use dB instead of the weird power measure. 00058 * Added most system parameters to cont_ad_t instead of hardwiring 00059 * them in cont_ad.c. 00060 * Added cont_ad_set_params() and cont_ad_get_params(). 00061 * 00062 * 28-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00063 * Added cont_ad_t.siglvl. 00064 * 00065 * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00066 * Added the option for cont_ad_read to return -1 on EOF. 00067 * 00068 * 21-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00069 * Added cont_ad_set_thresh(). 00070 * 00071 * 20-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00072 * Separated thresholds for speech and silence. 00073 * 00074 * 17-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00075 * Created, based loosely on Steve Reed's original implementation. 00076 */ 00077 00078 00079 #ifndef _CONT_AD_H_ 00080 #define _CONT_AD_H_ 00081 00082 /* Win32/WinCE DLL gunk */ 00083 #include <sphinxbase/sphinxbase_export.h> 00084 #include <sphinxbase/prim_type.h> 00085 #include <sphinxbase/ad.h> 00086 00114 #include <stdio.h> 00115 00116 00117 #ifdef __cplusplus 00118 extern "C" { 00119 #endif 00120 #if 0 00121 /* Fool Emacs. */ 00122 } 00123 #endif 00124 00125 /* States of continuous listening module */ 00126 #define CONT_AD_STATE_SIL 0 00127 #define CONT_AD_STATE_SPEECH 1 00128 00129 00135 typedef struct spseg_s { 00136 int32 startfrm; 00137 int32 nfrm; 00138 struct spseg_s *next; 00139 } spseg_t; 00140 00141 00151 typedef struct { 00152 /* Function to be called for obtaining A/D data (see prototype for ad_read in ad.h) */ 00153 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max); 00154 ad_rec_t *ad; 00156 int32 rawmode; 00158 int16 *adbuf; 00160 /* ************************************************************************** 00161 * state, read_ts, and siglvl are provided for READ-ONLY use by client 00162 * applications, and are updated by calls to cont_ad_read() (see below). All 00163 * other variables should be left alone. 00164 */ 00165 int32 state; 00167 int32 read_ts; 00171 int32 seglen; 00175 int32 siglvl; 00178 /* ************************************************************************ */ 00179 00180 int32 sps; 00183 int32 eof; 00185 int32 spf; 00186 int32 adbufsize; 00187 int32 prev_sample; 00188 int32 headfrm; 00189 int32 n_frm; 00190 int32 n_sample; 00191 int32 tot_frm; 00192 int32 noise_level; 00194 int32 *pow_hist; 00195 char *frm_pow; 00197 int32 auto_thresh; 00198 int32 delta_sil; 00199 int32 delta_speech; 00200 int32 min_noise; 00201 int32 max_noise; 00202 int32 winsize; 00203 int32 speech_onset; 00204 int32 sil_onset; 00205 int32 leader; 00206 int32 trailer; 00208 int32 thresh_speech; 00210 int32 thresh_sil; 00212 int32 thresh_update; 00213 float32 adapt_rate; 00217 int32 tail_state; 00220 int32 win_startfrm; 00221 int32 win_validfrm; 00222 int32 n_other; 00224 spseg_t *spseg_head; 00225 spseg_t *spseg_tail; 00227 FILE *rawfp; 00231 FILE *logfp; 00236 int32 n_calib_frame; 00237 } cont_ad_t; 00238 00239 00255 SPHINXBASE_EXPORT 00256 cont_ad_t *cont_ad_init (ad_rec_t *ad, 00257 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max) 00261 ); 00262 00269 SPHINXBASE_EXPORT 00270 cont_ad_t *cont_ad_init_rawmode (ad_rec_t *ad, 00271 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max)); 00272 00273 00302 SPHINXBASE_EXPORT 00303 int32 cont_ad_read (cont_ad_t *r, 00304 int16 *buf, 00307 int32 max 00310 ); 00311 00315 SPHINXBASE_EXPORT 00316 int32 cont_ad_buffer_space(cont_ad_t *r); 00317 00330 SPHINXBASE_EXPORT 00331 int32 cont_ad_calib (cont_ad_t *cont 00332 ); 00333 00345 SPHINXBASE_EXPORT 00346 int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max); 00347 00359 SPHINXBASE_EXPORT 00360 int32 cont_ad_calib_size(cont_ad_t *r); 00361 00374 SPHINXBASE_EXPORT 00375 int32 cont_ad_set_thresh (cont_ad_t *cont, 00376 int32 sil, 00377 int32 sp 00378 ); 00379 00380 00388 SPHINXBASE_EXPORT 00389 int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech, 00390 int32 min_noise, int32 max_noise, 00391 int32 winsize, int32 speech_onset, int32 sil_onset, 00392 int32 leader, int32 trailer, 00393 float32 adapt_rate); 00394 00402 SPHINXBASE_EXPORT 00403 int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, 00404 int32 *min_noise, int32 *max_noise, 00405 int32 *winsize, int32 *speech_onset, int32 *sil_onset, 00406 int32 *leader, int32 *trailer, 00407 float32 *adapt_rate); 00408 00413 SPHINXBASE_EXPORT 00414 int32 cont_ad_reset (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */ 00415 00416 00420 SPHINXBASE_EXPORT 00421 int32 cont_ad_close (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */ 00422 00423 00427 SPHINXBASE_EXPORT 00428 void cont_ad_powhist_dump (FILE *fp, cont_ad_t *cont); 00429 00430 00435 SPHINXBASE_EXPORT 00436 int32 cont_ad_detach (cont_ad_t *c); 00437 00438 00444 SPHINXBASE_EXPORT 00445 int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32)); 00446 00447 00459 SPHINXBASE_EXPORT 00460 int32 cont_ad_set_rawfp (cont_ad_t *c, /* The cont_ad object being addressed */ 00461 FILE *fp); /* File to which raw audio data is to 00462 be dumped; NULL to stop dumping. */ 00463 00471 SPHINXBASE_EXPORT 00472 int32 cont_ad_set_logfp (cont_ad_t *c, /* The cont_ad object being addressed */ 00473 FILE *fp); /* File to which logs are written; 00474 NULL to stop logging. */ 00475 00484 SPHINXBASE_EXPORT 00485 int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech); 00486 00487 #ifdef __cplusplus 00488 } 00489 #endif 00490 00491 00492 #endif