SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2006 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * lda.c -- Read and apply LDA matrices to features. 00039 * 00040 * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> 00041 */ 00042 00043 #include <assert.h> 00044 #include <string.h> 00045 #ifdef HAVE_CONFIG_H 00046 #include <config.h> 00047 #endif 00048 00049 #ifdef _MSC_VER 00050 #pragma warning (disable: 4018) 00051 #endif 00052 00053 #include "sphinxbase/feat.h" 00054 #include "sphinxbase/ckd_alloc.h" 00055 #include "sphinxbase/bio.h" 00056 #include "sphinxbase/err.h" 00057 00058 #define MATRIX_FILE_VERSION "0.1" 00059 00060 int32 00061 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) 00062 { 00063 FILE *fh; 00064 int32 byteswap, chksum_present; 00065 uint32 chksum, i, m, n; 00066 char **argname, **argval; 00067 00068 assert(feat); 00069 if (feat->n_stream != 1) { 00070 E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", 00071 feat->n_stream); 00072 return -1; 00073 } 00074 00075 if ((fh = fopen(ldafile, "rb")) == NULL) { 00076 E_ERROR_SYSTEM("Failed to open transform file '%s' for reading: %s\n", ldafile, strerror(errno)); 00077 return -1; 00078 } 00079 00080 if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { 00081 E_ERROR("Failed to read header from transform file '%s'\n", ldafile); 00082 fclose(fh); 00083 return -1; 00084 } 00085 00086 chksum_present = 0; 00087 for (i = 0; argname[i]; i++) { 00088 if (strcmp(argname[i], "version") == 0) { 00089 if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) 00090 E_WARN("%s: Version mismatch: %s, expecting %s\n", 00091 ldafile, argval[i], MATRIX_FILE_VERSION); 00092 } 00093 else if (strcmp(argname[i], "chksum0") == 0) { 00094 chksum_present = 1; /* Ignore the associated value */ 00095 } 00096 } 00097 00098 bio_hdrarg_free(argname, argval); 00099 argname = argval = NULL; 00100 00101 chksum = 0; 00102 00103 if (feat->lda) 00104 ckd_free_3d((void ***)feat->lda); 00105 00106 { 00107 /* Use a temporary variable to avoid strict-aliasing problems. */ 00108 void ***outlda; 00109 00110 if (bio_fread_3d(&outlda, sizeof(float32), 00111 &feat->n_lda, &m, &n, 00112 fh, byteswap, &chksum) < 0) { 00113 E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); 00114 fclose(fh); 00115 return -1; 00116 } 00117 feat->lda = (void *)outlda; 00118 } 00119 fclose(fh); 00120 00121 #ifdef FIXED_POINT 00122 /* FIXME: This is a fragile hack that depends on mfcc_t and 00123 * float32 being the same size (which they are, but...) */ 00124 for (i = 0; i < feat->n_lda * m * n; ++i) { 00125 feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); 00126 } 00127 #endif 00128 00129 /* Note that SphinxTrain stores the eigenvectors as row vectors. */ 00130 if (n != feat->stream_len[0]) 00131 E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); 00132 00133 /* Override dim from file if it is 0 or greater than m. */ 00134 if (dim > m || dim <= 0) { 00135 dim = m; 00136 } 00137 feat->out_dim = dim; 00138 00139 return 0; 00140 } 00141 00142 void 00143 feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) 00144 { 00145 mfcc_t *tmp; 00146 uint32 i, j, k; 00147 00148 tmp = ckd_calloc(fcb->stream_len[0], sizeof(mfcc_t)); 00149 for (i = 0; i < nfr; ++i) { 00150 /* Do the matrix multiplication inline here since fcb->lda 00151 * is transposed (eigenvectors in rows not columns). */ 00152 /* FIXME: In the future we ought to use the BLAS. */ 00153 memset(tmp, 0, sizeof(mfcc_t) * fcb->stream_len[0]); 00154 for (j = 0; j < feat_dimension(fcb); ++j) { 00155 for (k = 0; k < fcb->stream_len[0]; ++k) { 00156 tmp[j] += MFCCMUL(inout_feat[i][0][k], fcb->lda[0][j][k]); 00157 } 00158 } 00159 memcpy(inout_feat[i][0], tmp, fcb->stream_len[0] * sizeof(mfcc_t)); 00160 } 00161 ckd_free(tmp); 00162 }