00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef __FE_INTERNAL_H__
00039 #define __FE_INTERNAL_H__
00040
00041 #ifdef HAVE_CONFIG_H
00042 #include <config.h>
00043 #endif
00044
00045 #include <fe.h>
00046 #include <fixpoint.h>
00047
00048 #ifdef __cplusplus
00049 extern "C" {
00050 #endif
00051 #if 0
00052
00053 }
00054 #endif
00055
00056 #ifdef FIXED16
00057
00058 typedef int16 frame_t;
00059 typedef int16 window_t;
00060 typedef int32 powspec_t;
00061 typedef struct { int16 r, i; } complex;
00062 #elif defined(FIXED_POINT)
00063 typedef fixed32 frame_t;
00064 typedef int32 powspec_t;
00065 typedef fixed32 window_t;
00066 typedef struct { fixed32 r, i; } complex;
00067 #else
00068 typedef float64 frame_t;
00069 typedef float64 powspec_t;
00070 typedef float64 window_t;
00071 typedef struct { float64 r, i; } complex;
00072 #endif
00073
00074
00075 enum {
00076 RAW_LOG_SPEC = 1,
00077 SMOOTH_LOG_SPEC = 2
00078 };
00079
00080
00081 enum {
00082 LEGACY_DCT = 0,
00083 DCT_II = 1,
00084 DCT_HTK = 2
00085 };
00086
00087 typedef struct melfb_s melfb_t;
00089 struct melfb_s {
00090 float32 sampling_rate;
00091 int32 num_cepstra;
00092 int32 num_filters;
00093 int32 fft_size;
00094 float32 lower_filt_freq;
00095 float32 upper_filt_freq;
00096
00097 mfcc_t **mel_cosine;
00098
00099 mfcc_t *filt_coeffs;
00100 int16 *spec_start;
00101 int16 *filt_start;
00102 int16 *filt_width;
00103
00104 int32 doublewide;
00105 char const *warp_type;
00106 char const *warp_params;
00107 uint32 warp_id;
00108
00109 mfcc_t sqrt_inv_n, sqrt_inv_2n;
00110
00111 int32 lifter_val;
00112 mfcc_t *lifter;
00113
00114 int32 unit_area;
00115
00116
00117 int32 round_filters;
00118 };
00119
00120
00121 #define SQRT_HALF FLOAT2MFCC(0.707106781186548)
00122
00124 struct fe_s {
00125 cmd_ln_t *config;
00126 int refcount;
00127
00128 float32 sampling_rate;
00129 int16 frame_rate;
00130 int16 frame_shift;
00131
00132 float32 window_length;
00133 int16 frame_size;
00134 int16 fft_size;
00135
00136 uint8 fft_order;
00137 uint8 feature_dimension;
00138 uint8 num_cepstra;
00139 uint8 remove_dc;
00140 uint8 log_spec;
00141 uint8 swap;
00142 uint8 dither;
00143 uint8 transform;
00144
00145 float32 pre_emphasis_alpha;
00146 int32 seed;
00147
00148 int16 frame_counter;
00149 uint8 start_flag;
00150 uint8 reserved;
00151
00152
00153 frame_t *ccc, *sss;
00154
00155 melfb_t *mel_fb;
00156
00157 window_t *hamming_window;
00158
00159
00160
00161 int16 *spch;
00162 frame_t *frame;
00163 powspec_t *spec, *mfspec;
00164 int16 *overflow_samps;
00165 int16 num_overflow_samps;
00166 int16 prior;
00167 };
00168
00169 #define BB_SAMPLING_RATE 16000
00170 #define DEFAULT_BB_FFT_SIZE 512
00171 #define DEFAULT_BB_FRAME_SHIFT 160
00172 #define DEFAULT_BB_NUM_FILTERS 40
00173 #define DEFAULT_BB_LOWER_FILT_FREQ 133.33334
00174 #define DEFAULT_BB_UPPER_FILT_FREQ 6855.4976
00175
00176 #define NB_SAMPLING_RATE 8000
00177 #define DEFAULT_NB_FFT_SIZE 256
00178 #define DEFAULT_NB_FRAME_SHIFT 80
00179 #define DEFAULT_NB_NUM_FILTERS 31
00180 #define DEFAULT_NB_LOWER_FILT_FREQ 200
00181 #define DEFAULT_NB_UPPER_FILT_FREQ 3500
00182
00183 void fe_init_dither(int32 seed);
00184
00185
00186 int32 fe_dither(int16 *buffer, int32 nsamps);
00187
00188
00189 int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
00190
00191
00192 int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
00193
00194
00195 int32 fe_write_frame(fe_t *fe, mfcc_t *fea);
00196
00197
00198 int32 fe_build_melfilters(melfb_t *MEL_FB);
00199 int32 fe_compute_melcosine(melfb_t *MEL_FB);
00200 void fe_create_hamming(window_t *in, int32 in_len);
00201 void fe_create_twiddle(fe_t *fe);
00202
00203
00204 void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
00205 void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
00206 void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
00207
00208 #ifdef __cplusplus
00209 }
00210 #endif
00211
00212 #endif