From 5c43a1fdc9dbdf141200c1b2353d5c3502426433 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Fri, 3 May 2013 11:38:48 +0100 Subject: [PATCH] Removed more dependencies on utils.h bamlite.c now includes some wrappers for gzopen/gzread/gzclose that print messages when errors occur. They do not attempt to quit the program but pass on the return code. bwaseqio.c now checks the return codes from bam_open, bam_close and bam_read1. Code in bwt_gen.c now checks for IO errors itself instead of using the wrappers. A benefit of this is it can now say which file had a problem. Removed call to err_fatal_simple in is_bwt and unnecessary inclusion of malloc_wrap.h in ksw.h. --- Makefile | 10 +++--- bamlite.c | 46 ++++++++++++++++++++++++++++ bamlite.h | 25 ++++++++++++--- bwaseqio.c | 10 ++++-- bwt_gen.c | 90 +++++++++++++++++++++++++++++++++++++++++++----------- is.c | 3 +- ksw.h | 4 --- 7 files changed, 152 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index 7d78889..d39a787 100644 --- a/Makefile +++ b/Makefile @@ -38,9 +38,9 @@ depend: # DO NOT DELETE THIS LINE -- make depend depends on it. QSufSort.o: QSufSort.h -bamlite.o: bamlite.h utils.h malloc_wrap.h +bamlite.o: bamlite.h malloc_wrap.h bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h -bwa.o: bntseq.h bwa.h bwt.h ksw.h malloc_wrap.h utils.h kseq.h +bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h malloc_wrap.h kseq.h bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h bwamem.o: ksort.h utils.h kbtree.h bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h @@ -51,7 +51,7 @@ bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h malloc_wrap.h bwase.o: bwa.h ksw.h bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h malloc_wrap.h kseq.h bwt.o: utils.h bwt.h kvec.h malloc_wrap.h -bwt_gen.o: QSufSort.h utils.h malloc_wrap.h +bwt_gen.o: QSufSort.h malloc_wrap.h bwt_lite.o: bwt_lite.h malloc_wrap.h bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h @@ -66,11 +66,11 @@ bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h bwtsw2_pair.o: malloc_wrap.h ksw.h example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h -is.o: utils.h malloc_wrap.h +is.o: malloc_wrap.h kopen.o: malloc_wrap.h kstring.o: kstring.h malloc_wrap.h ksw.o: ksw.h malloc_wrap.h main.o: utils.h malloc_wrap.o: malloc_wrap.h -pemerge.o: ksw.h malloc_wrap.h kseq.h kstring.h bwa.h bntseq.h bwt.h utils.h +pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h utils.o: utils.h ksort.h malloc_wrap.h kseq.h diff --git a/bamlite.c b/bamlite.c index 851cb6f..3704beb 100644 --- a/bamlite.c +++ b/bamlite.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "bamlite.h" #ifdef USE_MALLOC_WRAPPERS @@ -162,3 +163,48 @@ int bam_read1(bamFile fp, bam1_t *b) if (bam_is_be) swap_endian_data(c, b->data_len, b->data); return 4 + block_len; } + + +#ifdef USE_VERBOSE_ZLIB_WRAPPERS +// Versions of gzopen, gzread and gzclose that print up error messages + +gzFile bamlite_gzopen(const char *fn, const char *mode) { + gzFile fp; + if (strcmp(fn, "-") == 0) { + fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + if (!fp) { + fprintf(stderr, "Couldn't open %s : %s", + (strstr(mode, "r"))? "stdin" : "stdout", + strerror(errno)); + } + return fp; + } + if ((fp = gzopen(fn, mode)) == 0) { + fprintf(stderr, "Couldn't open %s : %s\n", fn, + errno ? strerror(errno) : "Out of memory"); + } + return fp; +} + +int bamlite_gzread(gzFile file, void *ptr, unsigned int len) { + int ret = gzread(file, ptr, len); + + if (ret < 0) { + int errnum = 0; + const char *msg = gzerror(file, &errnum); + fprintf(stderr, "gzread error: %s\n", + Z_ERRNO == errnum ? strerror(errno) : msg); + } + return ret; +} + +int bamlite_gzclose(gzFile file) { + int ret = gzclose(file); + if (Z_OK != ret) { + fprintf(stderr, "gzclose error: %s\n", + Z_ERRNO == ret ? strerror(errno) : zError(ret)); + } + + return ret; +} +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ diff --git a/bamlite.h b/bamlite.h index 640e863..efab7ac 100644 --- a/bamlite.h +++ b/bamlite.h @@ -3,17 +3,26 @@ #include #include -#include "utils.h" #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" #endif +#define USE_VERBOSE_ZLIB_WRAPPERS + typedef gzFile bamFile; -#define bam_open(fn, mode) xzopen(fn, mode) -#define bam_dopen(fd, mode) gzdopen(fd, mode) -#define bam_close(fp) err_gzclose(fp) -#define bam_read(fp, buf, size) err_gzread(fp, buf, size) +#ifdef USE_VERBOSE_ZLIB_WRAPPERS +/* These print error messages on failure */ +# define bam_open(fn, mode) bamlite_gzopen(fn, mode) +# define bam_dopen(fd, mode) gzdopen(fd, mode) +# define bam_close(fp) bamlite_gzclose(fp) +# define bam_read(fp, buf, size) bamlite_gzread(fp, buf, size) +#else +# define bam_open(fn, mode) gzopen(fn, mode) +# define bam_dopen(fd, mode) gzdopen(fd, mode) +# define bam_close(fp) gzclose(fp) +# define bam_read(fp, buf, size) gzread(fp, buf, size) +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ typedef struct { int32_t n_targets; @@ -92,6 +101,12 @@ extern "C" { bam_header_t *bam_header_read(bamFile fp); int bam_read1(bamFile fp, bam1_t *b); +#ifdef USE_VERBOSE_ZLIB_WRAPPERS + gzFile bamlite_gzopen(const char *fn, const char *mode); + int bamlite_gzread(gzFile file, void *ptr, unsigned int len); + int bamlite_gzclose(gzFile file); +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ + #ifdef __cplusplus } #endif diff --git a/bwaseqio.c b/bwaseqio.c index d157945..d850307 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -30,6 +30,7 @@ bwa_seqio_t *bwa_bam_open(const char *fn, int which) bs->is_bam = 1; bs->which = which; bs->fp = bam_open(fn, "r"); + if (0 == bs->fp) err_fatal_simple("Couldn't open bam file"); h = bam_header_read(bs->fp); bam_header_destroy(h); return bs; @@ -48,8 +49,9 @@ bwa_seqio_t *bwa_seq_open(const char *fn) void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; - if (bs->is_bam) bam_close(bs->fp); - else { + if (bs->is_bam) { + if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); + } else { err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } @@ -94,11 +96,12 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com int n_seqs, l, i; long n_trimmed = 0, n_tot = 0; bam1_t *b; + int res; b = bam_init1(); n_seqs = 0; seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); - while (bam_read1(bs->fp, b) >= 0) { + while ((res = bam_read1(bs->fp, b)) >= 0) { uint8_t *s, *q; int go = 0; if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; @@ -130,6 +133,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com p->name = strdup((const char*)bam1_qname(b)); if (n_seqs == n_needed) break; } + if (res < 0 && res != -1) err_fatal_simple("Error reading bam file"); *n = n_seqs; if (n_seqs && trim_qual >= 1) fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); diff --git a/bwt_gen.c b/bwt_gen.c index d68b30f..6139d80 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -27,8 +27,8 @@ #include #include #include +#include #include "QSufSort.h" -#include "utils.h" #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" @@ -1448,13 +1448,29 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB packedFile = (FILE*)fopen(inputFileName, "rb"); if (packedFile == NULL) { - fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n"); + fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open %s : %s\n", + inputFileName, strerror(errno)); exit(1); } - err_fseek(packedFile, -1, SEEK_END); - packedFileLen = err_ftell(packedFile); - err_fread_noeof(&lastByteLength, sizeof(unsigned char), 1, packedFile); + if (fseek(packedFile, -1, SEEK_END) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + packedFileLen = ftell(packedFile); + if (packedFileLen == -1) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't ftell on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(&lastByteLength, sizeof(unsigned char), 1, packedFile) != 1) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength); bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize); @@ -1468,10 +1484,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB } textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte - err_fseek(packedFile, -2, SEEK_CUR); - err_fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); - err_fread_noeof(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile); - err_fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR); + if (fseek(packedFile, -((long)textSizeInByte + 2), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile) != textSizeInByte + 1) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } + if (fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); @@ -1484,9 +1513,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB textToLoad = totalTextLength - processedTextLength; } textSizeInByte = textToLoad / CHAR_PER_BYTE; - err_fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); - err_fread_noeof(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile); - err_fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); + if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile) != textSizeInByte) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } + if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); processedTextLength += textToLoad; @@ -1531,15 +1574,28 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o bwtFile = (FILE*)fopen(bwtFileName, "wb"); if (bwtFile == NULL) { - fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n"); + fprintf(stderr, + "BWTSaveBwtCodeAndOcc(): Cannot open %s for writing: %s\n", + bwtFileName, strerror(errno)); exit(1); } - err_fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile); - err_fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile); bwtLength = BWTFileSizeInWord(bwt->textLength); - err_fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile); - err_fclose(bwtFile); + + if (fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile) != 1 + || fwrite(bwt->cumulativeFreq + 1, + sizeof(bgint_t), ALPHABET_SIZE, bwtFile) != ALPHABET_SIZE + || fwrite(bwt->bwtCode, + sizeof(unsigned int), bwtLength, bwtFile) != bwtLength) { + fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error writing to %s : %s\n", + bwtFileName, strerror(errno)); + exit(1); + } + if (fclose(bwtFile) != 0) { + fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error on closing %s : %s\n", + bwtFileName, strerror(errno)); + exit(1); + } } void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) diff --git a/is.c b/is.c index 1891668..46f1772 100644 --- a/is.c +++ b/is.c @@ -25,7 +25,6 @@ */ #include -#include "utils.h" #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" @@ -211,7 +210,7 @@ int is_bwt(ubyte_t *T, int n) int *SA, i, primary = 0; SA = (int*)calloc(n+1, sizeof(int)); - if (is_sa(T, SA, n)) err_fatal_simple("is_sa failed"); + if (is_sa(T, SA, n)) return -1; for (i = 0; i <= n; ++i) { if (SA[i] == 0) primary = i; diff --git a/ksw.h b/ksw.h index f9d22c6..97559fd 100644 --- a/ksw.h +++ b/ksw.h @@ -3,10 +3,6 @@ #include -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - #define KSW_XBYTE 0x10000 #define KSW_XSTOP 0x20000 #define KSW_XSUBO 0x40000