SphinxBase  0.6
pio.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <config.h>
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SYS_STAT_H
47 #include <sys/stat.h>
48 #endif
49 #ifdef HAVE_SYS_TYPES_H
50 #include <sys/types.h>
51 #endif
52 #include <assert.h>
53 
54 #include "sphinxbase/pio.h"
55 #include "sphinxbase/filename.h"
56 #include "sphinxbase/err.h"
57 #include "sphinxbase/strfuncs.h"
58 #include "sphinxbase/ckd_alloc.h"
59 
60 #ifndef EXEEXT
61 #define EXEEXT ""
62 #endif
63 
64 enum {
65  COMP_NONE,
66  COMP_COMPRESS,
67  COMP_GZIP,
68  COMP_BZIP2
69 };
70 
71 static void
72 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
73 {
74  int k;
75 
76  k = strlen(file);
77  *ispipe = 0;
78  *isgz = COMP_NONE;
79  if ((k > 2)
80  && ((strcmp(file + k - 2, ".Z") == 0)
81  || (strcmp(file + k - 2, ".z") == 0))) {
82  *ispipe = 1;
83  *isgz = COMP_COMPRESS;
84  }
85  else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
86  || (strcmp(file + k - 3, ".GZ") == 0))) {
87  *ispipe = 1;
88  *isgz = COMP_GZIP;
89  }
90  else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
91  || (strcmp(file + k - 4, ".BZ2") == 0))) {
92  *ispipe = 1;
93  *isgz = COMP_BZIP2;
94  }
95 }
96 
97 FILE *
98 fopen_comp(const char *file, const char *mode, int32 * ispipe)
99 {
100  FILE *fp;
101 
102 #ifndef HAVE_POPEN
103  *ispipe = 0; /* No popen() on WinCE */
104 #else /* HAVE_POPEN */
105  int32 isgz;
106  guess_comptype(file, ispipe, &isgz);
107 #endif /* HAVE_POPEN */
108 
109  if (*ispipe) {
110 #ifndef HAVE_POPEN
111  /* Shouldn't get here, anyway */
112  E_FATAL("No popen() on WinCE\n");
113 #else
114  if (strcmp(mode, "r") == 0) {
115  char *command;
116  switch (isgz) {
117  case COMP_GZIP:
118  command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
119  break;
120  case COMP_COMPRESS:
121  command = string_join("zcat" EXEEXT, " ", file, NULL);
122  break;
123  case COMP_BZIP2:
124  command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
125  break;
126  default:
127  command = NULL; /* Make compiler happy. */
128  E_FATAL("Unknown compression type %d\n", isgz);
129  }
130  if ((fp = popen(command, mode)) == NULL) {
131  E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
132  ckd_free(command);
133  return NULL;
134  }
135  ckd_free(command);
136  }
137  else if (strcmp(mode, "w") == 0) {
138  char *command;
139  switch (isgz) {
140  case COMP_GZIP:
141  command = string_join("gzip" EXEEXT, " > ", file, NULL);
142  break;
143  case COMP_COMPRESS:
144  command = string_join("compress" EXEEXT, " -c > ", file, NULL);
145  break;
146  case COMP_BZIP2:
147  command = string_join("bzip2" EXEEXT, " > ", file, NULL);
148  break;
149  default:
150  command = NULL; /* Make compiler happy. */
151  E_FATAL("Unknown compression type %d\n", isgz);
152  }
153  if ((fp = popen(command, mode)) == NULL) {
154  E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
155  ckd_free(command);
156  return NULL;
157  }
158  ckd_free(command);
159  }
160  else {
161  E_ERROR("fopen_comp not implemented for mode = %s\n", mode);
162  return NULL;
163  }
164 #endif /* HAVE_POPEN */
165  }
166  else {
167  fp = fopen(file, mode);
168  }
169 
170  return (fp);
171 }
172 
173 
174 void
175 fclose_comp(FILE * fp, int32 ispipe)
176 {
177  if (ispipe) {
178 #ifdef HAVE_POPEN
179 #if defined(_WIN32) && (!defined(__SYMBIAN32__))
180  _pclose(fp);
181 #else
182  pclose(fp);
183 #endif
184 #endif
185  }
186  else
187  fclose(fp);
188 }
189 
190 
191 FILE *
192 fopen_compchk(const char *file, int32 * ispipe)
193 {
194 #ifndef HAVE_POPEN
195  *ispipe = 0; /* No popen() on WinCE */
196  /* And therefore the rest of this function is useless. */
197  return (fopen_comp(file, "r", ispipe));
198 #else /* HAVE_POPEN */
199  int32 isgz;
200  FILE *fh;
201 
202  /* First just try to fopen_comp() it */
203  if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
204  return fh;
205  else {
206  char *tmpfile;
207  int k;
208 
209  /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
210  guess_comptype(file, ispipe, &isgz);
211  k = strlen(file);
212  tmpfile = ckd_calloc(k+5, 1);
213  strcpy(tmpfile, file);
214  switch (isgz) {
215  case COMP_GZIP:
216  tmpfile[k - 3] = '\0';
217  break;
218  case COMP_BZIP2:
219  tmpfile[k - 4] = '\0';
220  break;
221  case COMP_COMPRESS:
222  tmpfile[k - 2] = '\0';
223  break;
224  case COMP_NONE:
225  strcpy(tmpfile + k, ".gz");
226  if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
227  E_WARN("Using %s instead of %s\n", tmpfile, file);
228  ckd_free(tmpfile);
229  return fh;
230  }
231  strcpy(tmpfile + k, ".bz2");
232  if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
233  E_WARN("Using %s instead of %s\n", tmpfile, file);
234  ckd_free(tmpfile);
235  return fh;
236  }
237  strcpy(tmpfile + k, ".Z");
238  if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
239  E_WARN("Using %s instead of %s\n", tmpfile, file);
240  ckd_free(tmpfile);
241  return fh;
242  }
243  ckd_free(tmpfile);
244  return NULL;
245  }
246  E_WARN("Using %s instead of %s\n", tmpfile, file);
247  fh = fopen_comp(tmpfile, "r", ispipe);
248  ckd_free(tmpfile);
249  return NULL;
250  }
251 #endif /* HAVE_POPEN */
252 }
253 
254 lineiter_t *
255 lineiter_start(FILE *fh)
256 {
257  lineiter_t *li;
258 
259  li = ckd_calloc(1, sizeof(*li));
260  li->buf = ckd_malloc(128);
261  li->buf[0] = '\0';
262  li->bsiz = 128;
263  li->len = 0;
264  li->fh = fh;
265 
266  return lineiter_next(li);
267 }
268 
269 lineiter_t *
271 {
272  /* Read a line and check for EOF. */
273  if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
274  lineiter_free(li);
275  return NULL;
276  }
277  /* If we managed to read the whole thing, then we are done
278  * (this will be by far the most common result). */
279  li->len = strlen(li->buf);
280  if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
281  return li;
282 
283  /* Otherwise we have to reallocate and keep going. */
284  while (1) {
285  li->bsiz *= 2;
286  li->buf = ckd_realloc(li->buf, li->bsiz);
287  /* If we get an EOF, we are obviously done. */
288  if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
289  li->len += strlen(li->buf + li->len);
290  return li;
291  }
292  li->len += strlen(li->buf + li->len);
293  /* If we managed to read the whole thing, then we are done. */
294  if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
295  return li;
296  }
297 
298  /* Shouldn't get here. */
299  return li;
300 }
301 
302 void
304 {
305  if (li == NULL)
306  return;
307  ckd_free(li->buf);
308  ckd_free(li);
309 }
310 
311 char *
312 fread_line(FILE *stream, size_t *out_len)
313 {
314  char *output, *outptr;
315  char buf[128];
316 
317  output = outptr = NULL;
318  while (fgets(buf, sizeof(buf), stream)) {
319  size_t len = strlen(buf);
320  /* Append this data to the buffer. */
321  if (output == NULL) {
322  output = ckd_malloc(len + 1);
323  outptr = output;
324  }
325  else {
326  size_t cur = outptr - output;
327  output = ckd_realloc(output, cur + len + 1);
328  outptr = output + cur;
329  }
330  memcpy(outptr, buf, len + 1);
331  outptr += len;
332  /* Stop on a short read or end of line. */
333  if (len < sizeof(buf)-1 || buf[len-1] == '\n')
334  break;
335  }
336  if (out_len) *out_len = outptr - output;
337  return output;
338 }
339 
340 
341 #define FREAD_RETRY_COUNT 60
342 
343 int32
344 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
345 {
346  char *data;
347  uint32 n_items_read;
348  uint32 n_items_rem;
349  uint32 n_retry_rem;
350  int32 loc;
351 
352  n_retry_rem = FREAD_RETRY_COUNT;
353 
354  data = pointer;
355  loc = 0;
356  n_items_rem = num_items;
357 
358  do {
359  n_items_read = fread(&data[loc], size, n_items_rem, stream);
360 
361  n_items_rem -= n_items_read;
362 
363  if (n_items_rem > 0) {
364  /* an incomplete read occurred */
365 
366  if (n_retry_rem == 0)
367  return -1;
368 
369  if (n_retry_rem == FREAD_RETRY_COUNT) {
370  E_ERROR_SYSTEM("fread() failed; retrying...\n");
371  }
372 
373  --n_retry_rem;
374 
375  loc += n_items_read * size;
376 #ifdef HAVE_UNISTD_H
377  sleep(1);
378 #endif
379  }
380  } while (n_items_rem > 0);
381 
382  return num_items;
383 }
384 
385 
386 /* Silvio Moioli: updated to use Unicode */
387 #ifdef _WIN32_WCE /* No stat() on WinCE */
388 int32
389 stat_retry(const char *file, struct stat * statbuf)
390 {
391  WIN32_FIND_DATAW file_data;
392  HANDLE *h;
393  wchar_t *wfile;
394  size_t len;
395 
396  len = mbstowcs(NULL, file, 0) + 1;
397  wfile = ckd_calloc(len, sizeof(*wfile));
398  mbstowcs(wfile, file, len);
399  if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
400  ckd_free(wfile);
401  return -1;
402  }
403  ckd_free(wfile);
404  memset(statbuf, 0, sizeof(statbuf));
405  statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
406  statbuf->st_size = file_data.nFileSizeLow;
407  FindClose(h);
408 
409  return 0;
410 }
411 
412 
413 int32
414 stat_mtime(const char *file)
415 {
416  struct stat statbuf;
417 
418  if (stat_retry(file, &statbuf) != 0)
419  return -1;
420 
421  return ((int32) statbuf.st_mtime);
422 }
423 #else
424 #define STAT_RETRY_COUNT 10
425 int32
426 stat_retry(const char *file, struct stat * statbuf)
427 {
428  int32 i;
429 
430 
431 
432  for (i = 0; i < STAT_RETRY_COUNT; i++) {
433 
434 #ifndef HAVE_SYS_STAT_H
435  FILE *fp;
436 
437  if ((fp=(FILE *)fopen(file, "r"))!= 0)
438  {
439  fseek( fp, 0, SEEK_END);
440  statbuf->st_size = ftell( fp );
441  fclose(fp);
442  return 0;
443  }
444 
445 #else /* HAVE_SYS_STAT_H */
446  if (stat(file, statbuf) == 0)
447  return 0;
448 #endif
449  if (i == 0) {
450  E_ERROR_SYSTEM("stat(%s) failed; retrying...\n", file);
451  }
452 #ifdef HAVE_UNISTD_H
453  sleep(1);
454 #endif
455  }
456 
457  return -1;
458 }
459 
460 int32
461 stat_mtime(const char *file)
462 {
463  struct stat statbuf;
464 
465 #ifdef HAVE_SYS_STAT_H
466  if (stat(file, &statbuf) != 0)
467  return -1;
468 #else /* HAVE_SYS_STAT_H */
469  if (stat_retry(file, &statbuf) != 0)
470  return -1;
471 #endif /* HAVE_SYS_STAT_H */
472 
473  return ((int32) statbuf.st_mtime);
474 }
475 #endif /* !_WIN32_WCE */
476 
477 struct bit_encode_s {
478  FILE *fh;
479  unsigned char buf, bbits;
480  int16 refcount;
481 };
482 
483 bit_encode_t *
484 bit_encode_attach(FILE *outfh)
485 {
486  bit_encode_t *be;
487 
488  be = ckd_calloc(1, sizeof(*be));
489  be->refcount = 1;
490  be->fh = outfh;
491  return be;
492 }
493 
494 bit_encode_t *
496 {
497  ++be->refcount;
498  return be;
499 }
500 
501 int
503 {
504  if (be == NULL)
505  return 0;
506  if (--be->refcount > 0)
507  return be->refcount;
508  ckd_free(be);
509 
510  return 0;
511 }
512 
513 int
514 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
515 {
516  int tbits;
517 
518  tbits = nbits + be->bbits;
519  if (tbits < 8) {
520  /* Append to buffer. */
521  be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
522  }
523  else {
524  int i = 0;
525  while (tbits >= 8) {
526  /* Shift bits out of the buffer and splice with high-order bits */
527  fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
528  /* Put low-order bits back into buffer */
529  be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
530  tbits -= 8;
531  ++i;
532  }
533  }
534  /* tbits contains remaining number of bits. */
535  be->bbits = tbits;
536 
537  return nbits;
538 }
539 
540 int
541 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
542 {
543  unsigned char bits[4];
544  codeword <<= (32 - nbits);
545  bits[0] = (codeword >> 24) & 0xff;
546  bits[1] = (codeword >> 16) & 0xff;
547  bits[2] = (codeword >> 8) & 0xff;
548  bits[3] = codeword & 0xff;
549  return bit_encode_write(be, bits, nbits);
550 }
551 
552 int
554 {
555  if (be->bbits) {
556  fputc(be->buf, be->fh);
557  be->bbits = 0;
558  }
559  return 0;
560 }
561 
562 #ifdef HAVE_SYS_STAT_H /* Unix, Cygwin */
563 int
564 build_directory(const char *path)
565 {
566  int rv;
567 
568  /* Utterly failed... */
569  if (strlen(path) == 0)
570  return -1;
571  /* Utterly succeeded... */
572  else if ((rv = mkdir(path, 0777)) == 0)
573  return 0;
574  /* Or, it already exists... */
575  else if (errno == EEXIST)
576  return 0;
577  else if (errno != ENOENT) {
578  E_ERROR_SYSTEM("Failed to create %s");
579  return -1;
580  }
581  else {
582  char *dirname = ckd_salloc(path);
583  path2dirname(path, dirname);
584  build_directory(dirname);
585  ckd_free(dirname);
586  return mkdir(path, 0777);
587  }
588 }
589 #elif defined(_WIN32)
590 /* FIXME: Implement this. */
591 int
592 build_directory(const char *path)
593 {
594  E_ERROR("build_directory() unimplemented on your platform!\n");
595  return -1;
596 }
597 #else
598 int
599 build_directory(const char *path)
600 {
601  E_ERROR("build_directory() unimplemented on your platform!\n");
602  return -1;
603 }
604 #endif