PocketSphinx
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2010 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00042 /* System headers. */ 00043 00044 /* SphinxBase headers. */ 00045 #include <sphinxbase/ckd_alloc.h> 00046 00047 /* Local headers. */ 00048 #include "ps_alignment.h" 00049 00050 ps_alignment_t * 00051 ps_alignment_init(dict2pid_t *d2p) 00052 { 00053 ps_alignment_t *al = ckd_calloc(1, sizeof(*al)); 00054 al->d2p = dict2pid_retain(d2p); 00055 return al; 00056 } 00057 00058 int 00059 ps_alignment_free(ps_alignment_t *al) 00060 { 00061 if (al == NULL) 00062 return 0; 00063 dict2pid_free(al->d2p); 00064 ckd_free(al->word.seq); 00065 ckd_free(al->sseq.seq); 00066 ckd_free(al->state.seq); 00067 ckd_free(al); 00068 return 0; 00069 } 00070 00071 #define VECTOR_GROW 10 00072 static void * 00073 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size) 00074 { 00075 int newsize = *n + 1; 00076 if (newsize < *n_alloc) { 00077 *n += 1; 00078 return ptr; 00079 } 00080 newsize += VECTOR_GROW; 00081 if (newsize > 0xffff) 00082 return NULL; 00083 ptr = ckd_realloc(ptr, newsize * item_size); 00084 *n += 1; 00085 *n_alloc = newsize; 00086 return ptr; 00087 } 00088 00089 static ps_alignment_entry_t * 00090 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec) 00091 { 00092 void *ptr; 00093 ptr = vector_grow_one(vec->seq, &vec->n_alloc, 00094 &vec->n_ent, sizeof(*vec->seq)); 00095 if (ptr == NULL) 00096 return NULL; 00097 vec->seq = ptr; 00098 return vec->seq + vec->n_ent - 1; 00099 } 00100 00101 static void 00102 ps_alignment_vector_empty(ps_alignment_vector_t *vec) 00103 { 00104 vec->n_ent = 0; 00105 } 00106 00107 int 00108 ps_alignment_add_word(ps_alignment_t *al, 00109 int32 wid, int duration) 00110 { 00111 ps_alignment_entry_t *ent; 00112 00113 if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL) 00114 return 0; 00115 ent->id.wid = wid; 00116 if (al->word.n_ent > 1) 00117 ent->start = ent[-1].start + ent[-1].duration; 00118 else 00119 ent->start = 0; 00120 ent->duration = duration; 00121 ent->parent = PS_ALIGNMENT_NONE; 00122 ent->child = PS_ALIGNMENT_NONE; 00123 00124 return al->word.n_ent; 00125 } 00126 00127 int 00128 ps_alignment_populate(ps_alignment_t *al) 00129 { 00130 dict2pid_t *d2p; 00131 dict_t *dict; 00132 bin_mdef_t *mdef; 00133 int i, lc; 00134 00135 /* Clear phone and state sequences. */ 00136 ps_alignment_vector_empty(&al->sseq); 00137 ps_alignment_vector_empty(&al->state); 00138 00139 /* For each word, expand to phones/senone sequences. */ 00140 d2p = al->d2p; 00141 dict = d2p->dict; 00142 mdef = d2p->mdef; 00143 lc = bin_mdef_silphone(mdef); 00144 for (i = 0; i < al->word.n_ent; ++i) { 00145 ps_alignment_entry_t *went = al->word.seq + i; 00146 ps_alignment_entry_t *sent; 00147 int wid = went->id.wid; 00148 int len = dict_pronlen(dict, wid); 00149 int j, rc; 00150 00151 if (i < al->word.n_ent - 1) 00152 rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); 00153 else 00154 rc = bin_mdef_silphone(mdef); 00155 00156 /* First phone. */ 00157 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { 00158 E_ERROR("Failed to add phone entry!\n"); 00159 return -1; 00160 } 00161 sent->id.pid.cipid = dict_first_phone(dict, wid); 00162 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); 00163 sent->start = went->start; 00164 sent->duration = went->duration; 00165 sent->parent = i; 00166 went->child = (uint16)(sent - al->sseq.seq); 00167 if (len == 1) 00168 sent->id.pid.ssid 00169 = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); 00170 else 00171 sent->id.pid.ssid 00172 = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, 00173 dict_second_phone(dict, wid), lc); 00174 assert(sent->id.pid.ssid != BAD_SSID); 00175 00176 /* Internal phones. */ 00177 for (j = 1; j < len - 1; ++j) { 00178 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { 00179 E_ERROR("Failed to add phone entry!\n"); 00180 return -1; 00181 } 00182 sent->id.pid.cipid = dict_pron(dict, wid, j); 00183 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); 00184 sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); 00185 assert(sent->id.pid.ssid != BAD_SSID); 00186 sent->start = went->start; 00187 sent->duration = went->duration; 00188 sent->parent = i; 00189 } 00190 00191 /* Last phone. */ 00192 if (j < len) { 00193 xwdssid_t *rssid; 00194 assert(j == len - 1); 00195 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { 00196 E_ERROR("Failed to add phone entry!\n"); 00197 return -1; 00198 } 00199 sent->id.pid.cipid = dict_last_phone(dict, wid); 00200 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); 00201 rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, 00202 dict_second_last_phone(dict, wid)); 00203 sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; 00204 assert(sent->id.pid.ssid != BAD_SSID); 00205 sent->start = went->start; 00206 sent->duration = went->duration; 00207 sent->parent = i; 00208 } 00209 /* Update lc. Could just use sent->id.pid.cipid here but that 00210 * seems needlessly obscure. */ 00211 lc = dict_last_phone(dict, wid); 00212 } 00213 00214 /* For each senone sequence, expand to senones. (we could do this 00215 * nested above but this makes it more clear and easier to 00216 * refactor) */ 00217 for (i = 0; i < al->sseq.n_ent; ++i) { 00218 ps_alignment_entry_t *pent = al->sseq.seq + i; 00219 ps_alignment_entry_t *sent; 00220 int j; 00221 00222 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { 00223 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { 00224 E_ERROR("Failed to add state entry!\n"); 00225 return -1; 00226 } 00227 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); 00228 assert(sent->id.senid != BAD_SENID); 00229 sent->start = pent->start; 00230 sent->duration = pent->duration; 00231 sent->parent = i; 00232 if (j == 0) 00233 pent->child = (uint16)(sent - al->state.seq); 00234 } 00235 } 00236 00237 return 0; 00238 } 00239 00240 /* FIXME: Somewhat the same as the above function, needs refactoring */ 00241 int 00242 ps_alignment_populate_ci(ps_alignment_t *al) 00243 { 00244 dict2pid_t *d2p; 00245 dict_t *dict; 00246 bin_mdef_t *mdef; 00247 int i; 00248 00249 /* Clear phone and state sequences. */ 00250 ps_alignment_vector_empty(&al->sseq); 00251 ps_alignment_vector_empty(&al->state); 00252 00253 /* For each word, expand to phones/senone sequences. */ 00254 d2p = al->d2p; 00255 dict = d2p->dict; 00256 mdef = d2p->mdef; 00257 for (i = 0; i < al->word.n_ent; ++i) { 00258 ps_alignment_entry_t *went = al->word.seq + i; 00259 ps_alignment_entry_t *sent; 00260 int wid = went->id.wid; 00261 int len = dict_pronlen(dict, wid); 00262 int j; 00263 00264 for (j = 0; j < len; ++j) { 00265 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { 00266 E_ERROR("Failed to add phone entry!\n"); 00267 return -1; 00268 } 00269 sent->id.pid.cipid = dict_pron(dict, wid, j); 00270 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); 00271 sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid); 00272 assert(sent->id.pid.ssid != BAD_SSID); 00273 sent->start = went->start; 00274 sent->duration = went->duration; 00275 sent->parent = i; 00276 } 00277 } 00278 00279 /* For each senone sequence, expand to senones. (we could do this 00280 * nested above but this makes it more clear and easier to 00281 * refactor) */ 00282 for (i = 0; i < al->sseq.n_ent; ++i) { 00283 ps_alignment_entry_t *pent = al->sseq.seq + i; 00284 ps_alignment_entry_t *sent; 00285 int j; 00286 00287 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { 00288 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { 00289 E_ERROR("Failed to add state entry!\n"); 00290 return -1; 00291 } 00292 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); 00293 assert(sent->id.senid != BAD_SENID); 00294 sent->start = pent->start; 00295 sent->duration = pent->duration; 00296 sent->parent = i; 00297 if (j == 0) 00298 pent->child = (uint16)(sent - al->state.seq); 00299 } 00300 } 00301 00302 return 0; 00303 } 00304 00305 int 00306 ps_alignment_propagate(ps_alignment_t *al) 00307 { 00308 ps_alignment_entry_t *last_ent = NULL; 00309 int i; 00310 00311 /* Propagate duration up from states to phones. */ 00312 for (i = 0; i < al->state.n_ent; ++i) { 00313 ps_alignment_entry_t *sent = al->state.seq + i; 00314 ps_alignment_entry_t *pent = al->sseq.seq + sent->parent; 00315 if (pent != last_ent) { 00316 pent->start = sent->start; 00317 pent->duration = 0; 00318 } 00319 pent->duration += sent->duration; 00320 last_ent = pent; 00321 } 00322 00323 /* Propagate duration up from phones to words. */ 00324 last_ent = NULL; 00325 for (i = 0; i < al->sseq.n_ent; ++i) { 00326 ps_alignment_entry_t *pent = al->sseq.seq + i; 00327 ps_alignment_entry_t *went = al->word.seq + pent->parent; 00328 if (went != last_ent) { 00329 went->start = pent->start; 00330 went->duration = 0; 00331 } 00332 went->duration += pent->duration; 00333 last_ent = went; 00334 } 00335 00336 return 0; 00337 } 00338 00339 int 00340 ps_alignment_n_words(ps_alignment_t *al) 00341 { 00342 return (int)al->word.n_ent; 00343 } 00344 00345 int 00346 ps_alignment_n_phones(ps_alignment_t *al) 00347 { 00348 return (int)al->sseq.n_ent; 00349 } 00350 00351 int 00352 ps_alignment_n_states(ps_alignment_t *al) 00353 { 00354 return (int)al->state.n_ent; 00355 } 00356 00357 ps_alignment_iter_t * 00358 ps_alignment_words(ps_alignment_t *al) 00359 { 00360 ps_alignment_iter_t *itor; 00361 00362 if (al->word.n_ent == 0) 00363 return NULL; 00364 itor = ckd_calloc(1, sizeof(*itor)); 00365 itor->al = al; 00366 itor->vec = &al->word; 00367 itor->pos = 0; 00368 return itor; 00369 } 00370 00371 ps_alignment_iter_t * 00372 ps_alignment_phones(ps_alignment_t *al) 00373 { 00374 ps_alignment_iter_t *itor; 00375 00376 if (al->sseq.n_ent == 0) 00377 return NULL; 00378 itor = ckd_calloc(1, sizeof(*itor)); 00379 itor->al = al; 00380 itor->vec = &al->sseq; 00381 itor->pos = 0; 00382 return itor; 00383 } 00384 00385 ps_alignment_iter_t * 00386 ps_alignment_states(ps_alignment_t *al) 00387 { 00388 ps_alignment_iter_t *itor; 00389 00390 if (al->state.n_ent == 0) 00391 return NULL; 00392 itor = ckd_calloc(1, sizeof(*itor)); 00393 itor->al = al; 00394 itor->vec = &al->state; 00395 itor->pos = 0; 00396 return itor; 00397 } 00398 00399 ps_alignment_entry_t * 00400 ps_alignment_iter_get(ps_alignment_iter_t *itor) 00401 { 00402 return itor->vec->seq + itor->pos; 00403 } 00404 00405 int 00406 ps_alignment_iter_free(ps_alignment_iter_t *itor) 00407 { 00408 ckd_free(itor); 00409 return 0; 00410 } 00411 00412 ps_alignment_iter_t * 00413 ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos) 00414 { 00415 if (itor == NULL) 00416 return NULL; 00417 if (pos >= itor->vec->n_ent) { 00418 ps_alignment_iter_free(itor); 00419 return NULL; 00420 } 00421 itor->pos = pos; 00422 return itor; 00423 } 00424 00425 ps_alignment_iter_t * 00426 ps_alignment_iter_next(ps_alignment_iter_t *itor) 00427 { 00428 if (itor == NULL) 00429 return NULL; 00430 if (++itor->pos >= itor->vec->n_ent) { 00431 ps_alignment_iter_free(itor); 00432 return NULL; 00433 } 00434 return itor; 00435 } 00436 00437 ps_alignment_iter_t * 00438 ps_alignment_iter_prev(ps_alignment_iter_t *itor) 00439 { 00440 if (itor == NULL) 00441 return NULL; 00442 if (--itor->pos < 0) { 00443 ps_alignment_iter_free(itor); 00444 return NULL; 00445 } 00446 return itor; 00447 } 00448 00449 ps_alignment_iter_t * 00450 ps_alignment_iter_up(ps_alignment_iter_t *itor) 00451 { 00452 ps_alignment_iter_t *itor2; 00453 if (itor == NULL) 00454 return NULL; 00455 if (itor->vec == &itor->al->word) 00456 return NULL; 00457 if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE) 00458 return NULL; 00459 itor2 = ckd_calloc(1, sizeof(*itor2)); 00460 itor2->al = itor->al; 00461 itor2->pos = itor->vec->seq[itor->pos].parent; 00462 if (itor->vec == &itor->al->sseq) 00463 itor2->vec = &itor->al->word; 00464 else 00465 itor2->vec = &itor->al->sseq; 00466 return itor2; 00467 } 00468 00469 ps_alignment_iter_t * 00470 ps_alignment_iter_down(ps_alignment_iter_t *itor) 00471 { 00472 ps_alignment_iter_t *itor2; 00473 if (itor == NULL) 00474 return NULL; 00475 if (itor->vec == &itor->al->state) 00476 return NULL; 00477 if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE) 00478 return NULL; 00479 itor2 = ckd_calloc(1, sizeof(*itor2)); 00480 itor2->al = itor->al; 00481 itor2->pos = itor->vec->seq[itor->pos].child; 00482 if (itor->vec == &itor->al->word) 00483 itor2->vec = &itor->al->sseq; 00484 else 00485 itor2->vec = &itor->al->state; 00486 return itor2; 00487 }