PocketSphinx 5prealpha
ps_alignment.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2010 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
42/* System headers. */
43
44/* SphinxBase headers. */
45#include <sphinxbase/ckd_alloc.h>
46
47/* Local headers. */
48#include "ps_alignment.h"
49
52{
53 ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
54 al->d2p = dict2pid_retain(d2p);
55 return al;
56}
57
58int
60{
61 if (al == NULL)
62 return 0;
63 dict2pid_free(al->d2p);
64 ckd_free(al->word.seq);
65 ckd_free(al->sseq.seq);
66 ckd_free(al->state.seq);
67 ckd_free(al);
68 return 0;
69}
70
71#define VECTOR_GROW 10
72static void *
73vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
74{
75 int newsize = *n + 1;
76 if (newsize < *n_alloc) {
77 *n += 1;
78 return ptr;
79 }
80 newsize += VECTOR_GROW;
81 if (newsize > 0xffff)
82 return NULL;
83 ptr = ckd_realloc(ptr, newsize * item_size);
84 *n += 1;
85 *n_alloc = newsize;
86 return ptr;
87}
88
90ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
91{
92 void *ptr;
93 ptr = vector_grow_one(vec->seq, &vec->n_alloc,
94 &vec->n_ent, sizeof(*vec->seq));
95 if (ptr == NULL)
96 return NULL;
97 vec->seq = ptr;
98 return vec->seq + vec->n_ent - 1;
99}
100
101static void
102ps_alignment_vector_empty(ps_alignment_vector_t *vec)
103{
104 vec->n_ent = 0;
105}
106
107int
109 int32 wid, int duration)
110{
112
113 if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
114 return 0;
115 ent->id.wid = wid;
116 if (al->word.n_ent > 1)
117 ent->start = ent[-1].start + ent[-1].duration;
118 else
119 ent->start = 0;
120 ent->duration = duration;
121 ent->score = 0;
122 ent->parent = PS_ALIGNMENT_NONE;
123 ent->child = PS_ALIGNMENT_NONE;
124
125 return al->word.n_ent;
126}
127
128int
130{
131 dict2pid_t *d2p;
132 dict_t *dict;
133 bin_mdef_t *mdef;
134 int i, lc;
135
136 /* Clear phone and state sequences. */
137 ps_alignment_vector_empty(&al->sseq);
138 ps_alignment_vector_empty(&al->state);
139
140 /* For each word, expand to phones/senone sequences. */
141 d2p = al->d2p;
142 dict = d2p->dict;
143 mdef = d2p->mdef;
144 lc = bin_mdef_silphone(mdef);
145 for (i = 0; i < al->word.n_ent; ++i) {
146 ps_alignment_entry_t *went = al->word.seq + i;
148 int wid = went->id.wid;
149 int len = dict_pronlen(dict, wid);
150 int j, rc;
151
152 if (i < al->word.n_ent - 1)
153 rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
154 else
155 rc = bin_mdef_silphone(mdef);
156
157 /* First phone. */
158 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
159 E_ERROR("Failed to add phone entry!\n");
160 return -1;
161 }
162 sent->id.pid.cipid = dict_first_phone(dict, wid);
163 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
164 sent->start = went->start;
165 sent->duration = went->duration;
166 sent->score = 0;
167 sent->parent = i;
168 went->child = (uint16)(sent - al->sseq.seq);
169 if (len == 1)
170 sent->id.pid.ssid
171 = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
172 else
173 sent->id.pid.ssid
174 = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
175 dict_second_phone(dict, wid), lc);
176 assert(sent->id.pid.ssid != BAD_SSID);
177
178 /* Internal phones. */
179 for (j = 1; j < len - 1; ++j) {
180 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
181 E_ERROR("Failed to add phone entry!\n");
182 return -1;
183 }
184 sent->id.pid.cipid = dict_pron(dict, wid, j);
185 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
186 sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
187 assert(sent->id.pid.ssid != BAD_SSID);
188 sent->start = went->start;
189 sent->duration = went->duration;
190 sent->score = 0;
191 sent->parent = i;
192 }
193
194 /* Last phone. */
195 if (j < len) {
196 xwdssid_t *rssid;
197 assert(j == len - 1);
198 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
199 E_ERROR("Failed to add phone entry!\n");
200 return -1;
201 }
202 sent->id.pid.cipid = dict_last_phone(dict, wid);
203 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
204 rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
205 dict_second_last_phone(dict, wid));
206 sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
207 assert(sent->id.pid.ssid != BAD_SSID);
208 sent->start = went->start;
209 sent->duration = went->duration;
210 sent->score = 0;
211 sent->parent = i;
212 }
213 /* Update lc. Could just use sent->id.pid.cipid here but that
214 * seems needlessly obscure. */
215 lc = dict_last_phone(dict, wid);
216 }
217
218 /* For each senone sequence, expand to senones. (we could do this
219 * nested above but this makes it more clear and easier to
220 * refactor) */
221 for (i = 0; i < al->sseq.n_ent; ++i) {
222 ps_alignment_entry_t *pent = al->sseq.seq + i;
224 int j;
225
226 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
227 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
228 E_ERROR("Failed to add state entry!\n");
229 return -1;
230 }
231 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
232 assert(sent->id.senid != BAD_SENID);
233 sent->start = pent->start;
234 sent->duration = pent->duration;
235 sent->score = 0;
236 sent->parent = i;
237 if (j == 0)
238 pent->child = (uint16)(sent - al->state.seq);
239 }
240 }
241
242 return 0;
243}
244
245/* FIXME: Somewhat the same as the above function, needs refactoring */
246int
248{
249 dict2pid_t *d2p;
250 dict_t *dict;
251 bin_mdef_t *mdef;
252 int i;
253
254 /* Clear phone and state sequences. */
255 ps_alignment_vector_empty(&al->sseq);
256 ps_alignment_vector_empty(&al->state);
257
258 /* For each word, expand to phones/senone sequences. */
259 d2p = al->d2p;
260 dict = d2p->dict;
261 mdef = d2p->mdef;
262 for (i = 0; i < al->word.n_ent; ++i) {
263 ps_alignment_entry_t *went = al->word.seq + i;
265 int wid = went->id.wid;
266 int len = dict_pronlen(dict, wid);
267 int j;
268
269 for (j = 0; j < len; ++j) {
270 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
271 E_ERROR("Failed to add phone entry!\n");
272 return -1;
273 }
274 sent->id.pid.cipid = dict_pron(dict, wid, j);
275 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
276 sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
277 assert(sent->id.pid.ssid != BAD_SSID);
278 sent->start = went->start;
279 sent->duration = went->duration;
280 sent->score = 0;
281 sent->parent = i;
282 }
283 }
284
285 /* For each senone sequence, expand to senones. (we could do this
286 * nested above but this makes it more clear and easier to
287 * refactor) */
288 for (i = 0; i < al->sseq.n_ent; ++i) {
289 ps_alignment_entry_t *pent = al->sseq.seq + i;
291 int j;
292
293 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
294 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
295 E_ERROR("Failed to add state entry!\n");
296 return -1;
297 }
298 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
299 assert(sent->id.senid != BAD_SENID);
300 sent->start = pent->start;
301 sent->duration = pent->duration;
302 sent->score = 0;
303 sent->parent = i;
304 if (j == 0)
305 pent->child = (uint16)(sent - al->state.seq);
306 }
307 }
308
309 return 0;
310}
311
312int
314{
315 ps_alignment_entry_t *last_ent = NULL;
316 int i;
317
318 /* Propagate duration up from states to phones. */
319 for (i = 0; i < al->state.n_ent; ++i) {
320 ps_alignment_entry_t *sent = al->state.seq + i;
321 ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
322 if (pent != last_ent) {
323 pent->start = sent->start;
324 pent->duration = 0;
325 pent->score = 0;
326 }
327 pent->duration += sent->duration;
328 pent->score += sent->score;
329 last_ent = pent;
330 }
331
332 /* Propagate duration up from phones to words. */
333 last_ent = NULL;
334 for (i = 0; i < al->sseq.n_ent; ++i) {
335 ps_alignment_entry_t *pent = al->sseq.seq + i;
336 ps_alignment_entry_t *went = al->word.seq + pent->parent;
337 if (went != last_ent) {
338 went->start = pent->start;
339 went->duration = 0;
340 went->score = 0;
341 }
342 went->duration += pent->duration;
343 went->score += pent->score;
344 last_ent = went;
345 }
346
347 return 0;
348}
349
350int
352{
353 return (int)al->word.n_ent;
354}
355
356int
358{
359 return (int)al->sseq.n_ent;
360}
361
362int
364{
365 return (int)al->state.n_ent;
366}
367
370{
372
373 if (al->word.n_ent == 0)
374 return NULL;
375 itor = ckd_calloc(1, sizeof(*itor));
376 itor->al = al;
377 itor->vec = &al->word;
378 itor->pos = 0;
379 return itor;
380}
381
384{
386
387 if (al->sseq.n_ent == 0)
388 return NULL;
389 itor = ckd_calloc(1, sizeof(*itor));
390 itor->al = al;
391 itor->vec = &al->sseq;
392 itor->pos = 0;
393 return itor;
394}
395
398{
400
401 if (al->state.n_ent == 0)
402 return NULL;
403 itor = ckd_calloc(1, sizeof(*itor));
404 itor->al = al;
405 itor->vec = &al->state;
406 itor->pos = 0;
407 return itor;
408}
409
412{
413 return itor->vec->seq + itor->pos;
414}
415
416int
418{
419 ckd_free(itor);
420 return 0;
421}
422
425{
426 if (itor == NULL)
427 return NULL;
428 if (pos >= itor->vec->n_ent) {
430 return NULL;
431 }
432 itor->pos = pos;
433 return itor;
434}
435
438{
439 if (itor == NULL)
440 return NULL;
441 if (++itor->pos >= itor->vec->n_ent) {
443 return NULL;
444 }
445 return itor;
446}
447
450{
451 if (itor == NULL)
452 return NULL;
453 if (--itor->pos < 0) {
455 return NULL;
456 }
457 return itor;
458}
459
462{
463 ps_alignment_iter_t *itor2;
464 if (itor == NULL)
465 return NULL;
466 if (itor->vec == &itor->al->word)
467 return NULL;
468 if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
469 return NULL;
470 itor2 = ckd_calloc(1, sizeof(*itor2));
471 itor2->al = itor->al;
472 itor2->pos = itor->vec->seq[itor->pos].parent;
473 if (itor->vec == &itor->al->sseq)
474 itor2->vec = &itor->al->word;
475 else
476 itor2->vec = &itor->al->sseq;
477 return itor2;
478}
479
482{
483 ps_alignment_iter_t *itor2;
484 if (itor == NULL)
485 return NULL;
486 if (itor->vec == &itor->al->state)
487 return NULL;
488 if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
489 return NULL;
490 itor2 = ckd_calloc(1, sizeof(*itor2));
491 itor2->al = itor->al;
492 itor2->pos = itor->vec->seq[itor->pos].child;
493 if (itor->vec == &itor->al->word)
494 itor2->vec = &itor->al->sseq;
495 else
496 itor2->vec = &itor->al->state;
497 return itor2;
498}
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94
#define BAD_SENID
Invalid senone ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:98
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
ps_alignment_iter_t * ps_alignment_states(ps_alignment_t *al)
Iterate over the alignment starting at the first state.
Definition: ps_alignment.c:397
ps_alignment_iter_t * ps_alignment_iter_next(ps_alignment_iter_t *itor)
Move an alignment iterator forward.
Definition: ps_alignment.c:437
ps_alignment_iter_t * ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
Move alignment iterator to given index.
Definition: ps_alignment.c:424
ps_alignment_iter_t * ps_alignment_words(ps_alignment_t *al)
Iterate over the alignment starting at the first word.
Definition: ps_alignment.c:369
ps_alignment_t * ps_alignment_init(dict2pid_t *d2p)
Create a new, empty alignment.
Definition: ps_alignment.c:51
ps_alignment_iter_t * ps_alignment_iter_prev(ps_alignment_iter_t *itor)
Move an alignment iterator back.
Definition: ps_alignment.c:449
int ps_alignment_populate(ps_alignment_t *al)
Populate lower layers using available word information.
Definition: ps_alignment.c:129
ps_alignment_entry_t * ps_alignment_iter_get(ps_alignment_iter_t *itor)
Get the alignment entry pointed to by an iterator.
Definition: ps_alignment.c:411
ps_alignment_iter_t * ps_alignment_iter_up(ps_alignment_iter_t *itor)
Get a new iterator starting at the parent of the current node.
Definition: ps_alignment.c:461
int ps_alignment_iter_free(ps_alignment_iter_t *itor)
Release an iterator before completing all iterations.
Definition: ps_alignment.c:417
int ps_alignment_n_words(ps_alignment_t *al)
Number of words.
Definition: ps_alignment.c:351
int ps_alignment_n_phones(ps_alignment_t *al)
Number of phones.
Definition: ps_alignment.c:357
int ps_alignment_add_word(ps_alignment_t *al, int32 wid, int duration)
Append a word.
Definition: ps_alignment.c:108
int ps_alignment_free(ps_alignment_t *al)
Release an alignment.
Definition: ps_alignment.c:59
int ps_alignment_n_states(ps_alignment_t *al)
Number of states.
Definition: ps_alignment.c:363
ps_alignment_iter_t * ps_alignment_phones(ps_alignment_t *al)
Iterate over the alignment starting at the first phone.
Definition: ps_alignment.c:383
int ps_alignment_propagate(ps_alignment_t *al)
Propagate timing information up from state sequence.
Definition: ps_alignment.c:313
int ps_alignment_populate_ci(ps_alignment_t *al)
Populate lower layers using context-independent phones.
Definition: ps_alignment.c:247
ps_alignment_iter_t * ps_alignment_iter_down(ps_alignment_iter_t *itor)
Get a new iterator starting at the first child of the current node.
Definition: ps_alignment.c:481
Multi-level alignment structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
bin_mdef_t * mdef
Model definition, used to generate internal ssids on the fly.
Definition: dict2pid.h:87
dict_t * dict
Dictionary this table refers to.
Definition: dict2pid.h:89
a structure for a dictionary.
Definition: dict.h:76
Definition: ps_alignment.h:56
cross word triphone model structure
Definition: dict2pid.h:73
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition: dict2pid.h:74