Classes | Macros | Functions
dict.h File Reference

Operations on dictionary. More...

#include <hash_table.h>
#include <s3types.h>
#include "mdef.h"
#include "lts.h"

Go to the source code of this file.

Classes

struct  dictword_t
 a structure for one dictionary word. More...
 
struct  dict_t
 a structure for a dictionary. More...
 

Macros

#define DICT_INC_SZ   4096
 
#define dict_size(d)   ((d)->n_word)
 
#define dict_basewid(d, w)   ((d)->word[w].basewid)
 
#define dict_wordstr(d, w)   ((d)->word[w].word)
 
#define dict_nextalt(d, w)   ((d)->word[w].alt)
 
#define dict_pronlen(d, w)   ((d)->word[w].pronlen)
 
#define dict_pron(d, w, p)   ((d)->word[w].ciphone[p])
 
#define dict_filler_start(d)   ((d)->filler_start)
 
#define dict_filler_end(d)   ((d)->filler_end)
 
#define dict_startwid(d)   ((d)->startwid)
 
#define dict_finishwid(d)   ((d)->finishwid)
 
#define dict_silwid(d)   ((d)->silwid)
 
#define dict_first_phone(d, w)   ((d)->word[w].ciphone[0])
 
#define dict_second_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
 
#define dict_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
 
#define S3_START_WORD   "<s>"
 
#define S3_FINISH_WORD   "</s>"
 
#define S3_SILENCE_WORD   "<sil>"
 
#define S3_UNKNOWN_WORD   "<UNK>"
 

Functions

S3DECODER_EXPORT dict_tdict_init (mdef_t *mdef, const char *dictfile, const char *fillerfile, const char comp_sep, int useLTS, int breport)
 
S3DECODER_EXPORT s3wid_t dict_wordid (dict_t *d, const char *word)
 
S3DECODER_EXPORT int32 dict_filler_word (dict_t *d, s3wid_t w)
 
s3wid_t dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np)
 
s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len)
 
const char * dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos)
 
s3wid_t _dict_basewid (dict_t *d, s3wid_t w)
 
char * _dict_wordstr (dict_t *d, s3wid_t wid)
 
s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid)
 
int32 dict_word2basestr (char *word)
 
S3DECODER_EXPORT void dict_free (dict_t *d)
 
void dict_report (dict_t *d)
 

Detailed Description

Operations on dictionary.

Macro Definition Documentation

◆ dict_basewid

#define dict_basewid (   d,
 
)    ((d)->word[w].basewid)

◆ dict_filler_end

#define dict_filler_end (   d)    ((d)->filler_end)

◆ dict_filler_start

#define dict_filler_start (   d)    ((d)->filler_start)

◆ dict_finishwid

#define dict_finishwid (   d)    ((d)->finishwid)

◆ dict_first_phone

#define dict_first_phone (   d,
 
)    ((d)->word[w].ciphone[0])

◆ DICT_INC_SZ

#define DICT_INC_SZ   4096

◆ dict_last_phone

#define dict_last_phone (   d,
 
)    ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])

◆ dict_nextalt

#define dict_nextalt (   d,
 
)    ((d)->word[w].alt)

◆ dict_pron

#define dict_pron (   d,
  w,
 
)    ((d)->word[w].ciphone[p])

The CI phones of the word w at position p

◆ dict_pronlen

#define dict_pronlen (   d,
 
)    ((d)->word[w].pronlen)

◆ dict_second_last_phone

#define dict_second_last_phone (   d,
 
)    ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])

◆ dict_silwid

#define dict_silwid (   d)    ((d)->silwid)

◆ dict_size

#define dict_size (   d)    ((d)->n_word)

Packaged macro access to dictionary members

◆ dict_startwid

#define dict_startwid (   d)    ((d)->startwid)

◆ dict_wordstr

#define dict_wordstr (   d,
 
)    ((d)->word[w].word)

◆ S3_FINISH_WORD

#define S3_FINISH_WORD   "</s>"

◆ S3_SILENCE_WORD

#define S3_SILENCE_WORD   "<sil>"

◆ S3_START_WORD

#define S3_START_WORD   "<s>"

◆ S3_UNKNOWN_WORD

#define S3_UNKNOWN_WORD   "<UNK>"

Function Documentation

◆ _dict_basewid()

s3wid_t _dict_basewid ( dict_t d,
s3wid_t  w 
)

Return base word id for given word id w (which may be itself). w must be valid.

◆ _dict_nextalt()

s3wid_t _dict_nextalt ( dict_t d,
s3wid_t  wid 
)

Return the next alternative word id for the given word id, which must be valid. The returned id may be BAD_S3WID if there is none.

◆ _dict_wordstr()

char* _dict_wordstr ( dict_t d,
s3wid_t  wid 
)

Return word string for given word id, which must be valid.

◆ dict_add_word()

s3wid_t dict_add_word ( dict_t d,
char *  word,
s3cipid_t p,
int32  np 
)

Add a word with the given ciphone pronunciation list to the dictionary. Return value: Result word id if successful, BAD_S3WID otherwise

Parameters
dThe dictionary structure
wordThe word

◆ dict_ciphone_str()

const char* dict_ciphone_str ( dict_t d,
s3wid_t  wid,
int32  pos 
)

Return value: CI phone string for the given word, phone position.

Parameters
dIn: Dictionary to look up
widIn: Component word being looked up
posIn: Pronunciation phone position

◆ dict_filler_word()

S3DECODER_EXPORT int32 dict_filler_word ( dict_t d,
s3wid_t  w 
)

Return 1 if w is a filler word, 0 if not. A filler word is one that was read in from the filler dictionary; however, sentence START and FINISH words are not filler words.

Parameters
dThe dictionary structure
wThe The word

◆ dict_free()

S3DECODER_EXPORT void dict_free ( dict_t d)

Free memory allocated for the dictionary

◆ dict_init()

S3DECODER_EXPORT dict_t* dict_init ( mdef_t mdef,
const char *  dictfile,
const char *  fillerfile,
const char  comp_sep,
int  useLTS,
int  breport 
)

Initialize with given main and filler dictionary files. fillerfile can be NULL (but external modules might impose their own requirements). Return ptr to dict_t if successful, NULL otherwise.

Parameters
mdefFor looking up CI phone IDs; NULL if none, in which case CI phones kept internally
dictfileMain dictionary file
fillerfileFiller dictionary file
comp_sepCompound word separator character, or 0 if no compound words
useLTSWhether to use letter-to-sound rules
breportWhether we should report the progress

◆ dict_report()

void dict_report ( dict_t d)

Report a diciontary structure

Parameters
dA dictionary structure

◆ dict_wids2compwid()

s3wid_t dict_wids2compwid ( dict_t d,
s3wid_t wid,
int32  len 
)

Look for a compound word that matches the given word-id sequence. Return value: Base ID of compound word if found, else BAD_S3WID.

Parameters
dIn: Dictionary to look up
widIn: Component words to look for
lenIn: No. of component words

◆ dict_word2basestr()

int32 dict_word2basestr ( char *  word)

If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation specification), strip that trailing portion from it. Note that the given string is modified. Return value: If string was modified, the character position at which the original string was truncated; otherwise -1.

◆ dict_wordid()

S3DECODER_EXPORT s3wid_t dict_wordid ( dict_t d,
const char *  word 
)

Return word id for given word string if present. Otherwise return BAD_S3WID