PocketSphinx  0.6
src/libpocketsphinx/ps_alignment.c
Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2010 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 
00044 /* SphinxBase headers. */
00045 #include <sphinxbase/ckd_alloc.h>
00046 
00047 /* Local headers. */
00048 #include "ps_alignment.h"
00049 
00050 ps_alignment_t *
00051 ps_alignment_init(dict2pid_t *d2p)
00052 {
00053     ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
00054     al->d2p = dict2pid_retain(d2p);
00055     return al;
00056 }
00057 
00058 int
00059 ps_alignment_free(ps_alignment_t *al)
00060 {
00061     if (al == NULL)
00062         return 0;
00063     dict2pid_free(al->d2p);
00064     ckd_free(al->word.seq);
00065     ckd_free(al->sseq.seq);
00066     ckd_free(al->state.seq);
00067     ckd_free(al);
00068     return 0;
00069 }
00070 
00071 #define VECTOR_GROW 10
00072 static void *
00073 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
00074 {
00075     int newsize = *n + 1;
00076     if (newsize < *n_alloc) {
00077         *n += 1;
00078         return ptr;
00079     }
00080     newsize += VECTOR_GROW;
00081     if (newsize > 0xffff)
00082         return NULL;
00083     ptr = ckd_realloc(ptr, newsize * item_size);
00084     *n += 1;
00085     *n_alloc = newsize;
00086     return ptr;
00087 }
00088 
00089 static ps_alignment_entry_t *
00090 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
00091 {
00092     void *ptr;
00093     ptr = vector_grow_one(vec->seq, &vec->n_alloc,
00094                           &vec->n_ent, sizeof(*vec->seq));
00095     if (ptr == NULL)
00096         return NULL;
00097     vec->seq = ptr;
00098     return vec->seq + vec->n_ent - 1;
00099 }
00100 
00101 static void
00102 ps_alignment_vector_empty(ps_alignment_vector_t *vec)
00103 {
00104     vec->n_ent = 0;
00105 }
00106 
00107 int
00108 ps_alignment_add_word(ps_alignment_t *al,
00109                       int32 wid, int duration)
00110 {
00111     ps_alignment_entry_t *ent;
00112 
00113     if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
00114         return 0;
00115     ent->id.wid = wid;
00116     if (al->word.n_ent > 1)
00117         ent->start = ent[-1].start + ent[-1].duration;
00118     else
00119         ent->start = 0;
00120     ent->duration = duration;
00121     ent->parent = PS_ALIGNMENT_NONE;
00122     ent->child = PS_ALIGNMENT_NONE;
00123 
00124     return al->word.n_ent;
00125 }
00126 
00127 int
00128 ps_alignment_populate(ps_alignment_t *al)
00129 {
00130     dict2pid_t *d2p;
00131     dict_t *dict;
00132     bin_mdef_t *mdef;
00133     int i, lc;
00134 
00135     /* Clear phone and state sequences. */
00136     ps_alignment_vector_empty(&al->sseq);
00137     ps_alignment_vector_empty(&al->state);
00138 
00139     /* For each word, expand to phones/senone sequences. */
00140     d2p = al->d2p;
00141     dict = d2p->dict;
00142     mdef = d2p->mdef;
00143     lc = bin_mdef_silphone(mdef);
00144     for (i = 0; i < al->word.n_ent; ++i) {
00145         ps_alignment_entry_t *went = al->word.seq + i;
00146         ps_alignment_entry_t *sent;
00147         int wid = went->id.wid;
00148         int len = dict_pronlen(dict, wid);
00149         int j, rc;
00150 
00151         if (i < al->word.n_ent - 1)
00152             rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
00153         else
00154             rc = bin_mdef_silphone(mdef);
00155 
00156         /* First phone. */
00157         if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
00158             E_ERROR("Failed to add phone entry!\n");
00159             return -1;
00160         }
00161         sent->id.pid.cipid = dict_first_phone(dict, wid);
00162         sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
00163         sent->start = went->start;
00164         sent->duration = went->duration;
00165         sent->parent = i;
00166         went->child = (uint16)(sent - al->sseq.seq);
00167         if (len == 1)
00168             sent->id.pid.ssid
00169                 = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
00170         else
00171             sent->id.pid.ssid
00172                 = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
00173                                     dict_second_phone(dict, wid), lc);
00174         assert(sent->id.pid.ssid != BAD_SSID);
00175 
00176         /* Internal phones. */
00177         for (j = 1; j < len - 1; ++j) {
00178             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
00179                 E_ERROR("Failed to add phone entry!\n");
00180                 return -1;
00181             }
00182             sent->id.pid.cipid = dict_pron(dict, wid, j);
00183             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
00184             sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
00185             assert(sent->id.pid.ssid != BAD_SSID);
00186             sent->start = went->start;
00187             sent->duration = went->duration;
00188             sent->parent = i;
00189         }
00190 
00191         /* Last phone. */
00192         if (j < len) {
00193             xwdssid_t *rssid;
00194             assert(j == len - 1);
00195             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
00196                 E_ERROR("Failed to add phone entry!\n");
00197                 return -1;
00198             }
00199             sent->id.pid.cipid = dict_last_phone(dict, wid);
00200             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
00201             rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
00202                                    dict_second_last_phone(dict, wid));
00203             sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
00204             assert(sent->id.pid.ssid != BAD_SSID);
00205             sent->start = went->start;
00206             sent->duration = went->duration;
00207             sent->parent = i;
00208         }
00209         /* Update lc.  Could just use sent->id.pid.cipid here but that
00210          * seems needlessly obscure. */
00211         lc = dict_last_phone(dict, wid);
00212     }
00213 
00214     /* For each senone sequence, expand to senones.  (we could do this
00215      * nested above but this makes it more clear and easier to
00216      * refactor) */
00217     for (i = 0; i < al->sseq.n_ent; ++i) {
00218         ps_alignment_entry_t *pent = al->sseq.seq + i;
00219         ps_alignment_entry_t *sent;
00220         int j;
00221 
00222         for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
00223             if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
00224                 E_ERROR("Failed to add state entry!\n");
00225                 return -1;
00226             }
00227             sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
00228             assert(sent->id.senid != BAD_SENID);
00229             sent->start = pent->start;
00230             sent->duration = pent->duration;
00231             sent->parent = i;
00232             if (j == 0)
00233                 pent->child = (uint16)(sent - al->state.seq);
00234         }
00235     }
00236 
00237     return 0;
00238 }
00239 
00240 /* FIXME: Somewhat the same as the above function, needs refactoring */
00241 int
00242 ps_alignment_populate_ci(ps_alignment_t *al)
00243 {
00244     dict2pid_t *d2p;
00245     dict_t *dict;
00246     bin_mdef_t *mdef;
00247     int i;
00248 
00249     /* Clear phone and state sequences. */
00250     ps_alignment_vector_empty(&al->sseq);
00251     ps_alignment_vector_empty(&al->state);
00252 
00253     /* For each word, expand to phones/senone sequences. */
00254     d2p = al->d2p;
00255     dict = d2p->dict;
00256     mdef = d2p->mdef;
00257     for (i = 0; i < al->word.n_ent; ++i) {
00258         ps_alignment_entry_t *went = al->word.seq + i;
00259         ps_alignment_entry_t *sent;
00260         int wid = went->id.wid;
00261         int len = dict_pronlen(dict, wid);
00262         int j;
00263 
00264         for (j = 0; j < len; ++j) {
00265             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
00266                 E_ERROR("Failed to add phone entry!\n");
00267                 return -1;
00268             }
00269             sent->id.pid.cipid = dict_pron(dict, wid, j);
00270             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
00271             sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
00272             assert(sent->id.pid.ssid != BAD_SSID);
00273             sent->start = went->start;
00274             sent->duration = went->duration;
00275             sent->parent = i;
00276         }
00277     }
00278 
00279     /* For each senone sequence, expand to senones.  (we could do this
00280      * nested above but this makes it more clear and easier to
00281      * refactor) */
00282     for (i = 0; i < al->sseq.n_ent; ++i) {
00283         ps_alignment_entry_t *pent = al->sseq.seq + i;
00284         ps_alignment_entry_t *sent;
00285         int j;
00286 
00287         for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
00288             if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
00289                 E_ERROR("Failed to add state entry!\n");
00290                 return -1;
00291             }
00292             sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
00293             assert(sent->id.senid != BAD_SENID);
00294             sent->start = pent->start;
00295             sent->duration = pent->duration;
00296             sent->parent = i;
00297             if (j == 0)
00298                 pent->child = (uint16)(sent - al->state.seq);
00299         }
00300     }
00301 
00302     return 0;
00303 }
00304 
00305 int
00306 ps_alignment_propagate(ps_alignment_t *al)
00307 {
00308     ps_alignment_entry_t *last_ent = NULL;
00309     int i;
00310 
00311     /* Propagate duration up from states to phones. */
00312     for (i = 0; i < al->state.n_ent; ++i) {
00313         ps_alignment_entry_t *sent = al->state.seq + i;
00314         ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
00315         if (pent != last_ent) {
00316             pent->start = sent->start;
00317             pent->duration = 0;
00318         }
00319         pent->duration += sent->duration;
00320         last_ent = pent;
00321     }
00322 
00323     /* Propagate duration up from phones to words. */
00324     last_ent = NULL;
00325     for (i = 0; i < al->sseq.n_ent; ++i) {
00326         ps_alignment_entry_t *pent = al->sseq.seq + i;
00327         ps_alignment_entry_t *went = al->word.seq + pent->parent;
00328         if (went != last_ent) {
00329             went->start = pent->start;
00330             went->duration = 0;
00331         }
00332         went->duration += pent->duration;
00333         last_ent = went;
00334     }
00335 
00336     return 0;
00337 }
00338 
00339 int
00340 ps_alignment_n_words(ps_alignment_t *al)
00341 {
00342     return (int)al->word.n_ent;
00343 }
00344 
00345 int
00346 ps_alignment_n_phones(ps_alignment_t *al)
00347 {
00348     return (int)al->sseq.n_ent;
00349 }
00350 
00351 int
00352 ps_alignment_n_states(ps_alignment_t *al)
00353 {
00354     return (int)al->state.n_ent;
00355 }
00356 
00357 ps_alignment_iter_t *
00358 ps_alignment_words(ps_alignment_t *al)
00359 {
00360     ps_alignment_iter_t *itor;
00361 
00362     if (al->word.n_ent == 0)
00363         return NULL;
00364     itor = ckd_calloc(1, sizeof(*itor));
00365     itor->al = al;
00366     itor->vec = &al->word;
00367     itor->pos = 0;
00368     return itor;
00369 }
00370 
00371 ps_alignment_iter_t *
00372 ps_alignment_phones(ps_alignment_t *al)
00373 {
00374     ps_alignment_iter_t *itor;
00375 
00376     if (al->sseq.n_ent == 0)
00377         return NULL;
00378     itor = ckd_calloc(1, sizeof(*itor));
00379     itor->al = al;
00380     itor->vec = &al->sseq;
00381     itor->pos = 0;
00382     return itor;
00383 }
00384 
00385 ps_alignment_iter_t *
00386 ps_alignment_states(ps_alignment_t *al)
00387 {
00388     ps_alignment_iter_t *itor;
00389 
00390     if (al->state.n_ent == 0)
00391         return NULL;
00392     itor = ckd_calloc(1, sizeof(*itor));
00393     itor->al = al;
00394     itor->vec = &al->state;
00395     itor->pos = 0;
00396     return itor;
00397 }
00398 
00399 ps_alignment_entry_t *
00400 ps_alignment_iter_get(ps_alignment_iter_t *itor)
00401 {
00402     return itor->vec->seq + itor->pos;
00403 }
00404 
00405 int
00406 ps_alignment_iter_free(ps_alignment_iter_t *itor)
00407 {
00408     ckd_free(itor);
00409     return 0;
00410 }
00411 
00412 ps_alignment_iter_t *
00413 ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
00414 {
00415     if (itor == NULL)
00416         return NULL;
00417     if (pos >= itor->vec->n_ent) {
00418         ps_alignment_iter_free(itor);
00419         return NULL;
00420     }
00421     itor->pos = pos;
00422     return itor;
00423 }
00424 
00425 ps_alignment_iter_t *
00426 ps_alignment_iter_next(ps_alignment_iter_t *itor)
00427 {
00428     if (itor == NULL)
00429         return NULL;
00430     if (++itor->pos >= itor->vec->n_ent) {
00431         ps_alignment_iter_free(itor);
00432         return NULL;
00433     }
00434     return itor;
00435 }
00436 
00437 ps_alignment_iter_t *
00438 ps_alignment_iter_prev(ps_alignment_iter_t *itor)
00439 {
00440     if (itor == NULL)
00441         return NULL;
00442     if (--itor->pos < 0) {
00443         ps_alignment_iter_free(itor);
00444         return NULL;
00445     }
00446     return itor;
00447 }
00448 
00449 ps_alignment_iter_t *
00450 ps_alignment_iter_up(ps_alignment_iter_t *itor)
00451 {
00452     ps_alignment_iter_t *itor2;
00453     if (itor == NULL)
00454         return NULL;
00455     if (itor->vec == &itor->al->word)
00456         return NULL;
00457     if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
00458         return NULL;
00459     itor2 = ckd_calloc(1, sizeof(*itor2));
00460     itor2->al = itor->al;
00461     itor2->pos = itor->vec->seq[itor->pos].parent;
00462     if (itor->vec == &itor->al->sseq)
00463         itor2->vec = &itor->al->word;
00464     else
00465         itor2->vec = &itor->al->sseq;
00466     return itor2;
00467 }
00468 
00469 ps_alignment_iter_t *
00470 ps_alignment_iter_down(ps_alignment_iter_t *itor)
00471 {
00472     ps_alignment_iter_t *itor2;
00473     if (itor == NULL)
00474         return NULL;
00475     if (itor->vec == &itor->al->state)
00476         return NULL;
00477     if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
00478         return NULL;
00479     itor2 = ckd_calloc(1, sizeof(*itor2));
00480     itor2->al = itor->al;
00481     itor2->pos = itor->vec->seq[itor->pos].child;
00482     if (itor->vec == &itor->al->word)
00483         itor2->vec = &itor->al->sseq;
00484     else
00485         itor2->vec = &itor->al->state;
00486     return itor2;
00487 }