diff --git a/.gitignore b/.gitignore index 57cb318..fba548e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ bwa test test64 .*.swp +Makefile.bak +bwamem-lite diff --git a/Makefile b/Makefile index abbe42f..d39a787 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ CC= gcc CFLAGS= -g -Wall -O2 +WRAP_MALLOC= -DUSE_MALLOC_WRAPPERS AR= ar -DFLAGS= -DHAVE_PTHREAD +DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ is.o bwtindex.o bwape.o kopen.o pemerge.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ - bwtsw2_chain.o fastmap.o bwtsw2_pair.o + bwtsw2_chain.o fastmap.o bwtsw2_pair.o malloc_wrap.o PROG= bwa INCLUDES= LIBS= -lm -lz -lpthread @@ -28,26 +29,48 @@ bwamem-lite:libbwa.a example.o libbwa.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) -ksw.o:ksw.h -kstring.o:kstring.h -utils.o:utils.h ksort.h kseq.h -bntseq.o:bntseq.h -bwt.o:bwt.h utils.h -bwa.o:bwa.h bwt.h bntseq.h -bwamem.o:ksw.h kbtree.h ksort.h kvec.h kstring.h utils.h bwamem.h -bwamem_pair.o:ksw.h kvec.h kstring.h utils.h bwamem.h - -QSufSort.o:QSufSort.h -bwt_gen.o:QSufSort.h - -fastmap.o:bwt.h bwamem.h - -bwtaln.o:bwt.h bwtaln.h kseq.h -bwtgap.o:bwtgap.h bwtaln.h bwt.h - -bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h -bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h -bwtsw2_main.o:bwtsw2.h - clean: rm -f gmon.out *.o a.out $(PROG) *~ *.a + +depend: + ( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c ) + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +QSufSort.o: QSufSort.h +bamlite.o: bamlite.h malloc_wrap.h +bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h +bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h malloc_wrap.h kseq.h +bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h +bwamem.o: ksort.h utils.h kbtree.h +bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h +bwamem_pair.o: utils.h ksw.h +bwape.o: bwtaln.h bwt.h kvec.h malloc_wrap.h bntseq.h utils.h bwase.h bwa.h +bwape.o: ksw.h khash.h +bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h malloc_wrap.h +bwase.o: bwa.h ksw.h +bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h malloc_wrap.h kseq.h +bwt.o: utils.h bwt.h kvec.h malloc_wrap.h +bwt_gen.o: QSufSort.h malloc_wrap.h +bwt_lite.o: bwt_lite.h malloc_wrap.h +bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h +bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h +bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h +bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h +bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h +bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h +bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h malloc_wrap.h +bwtsw2_core.o: khash.h ksort.h +bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h bwa.h +bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h +bwtsw2_pair.o: malloc_wrap.h ksw.h +example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h +fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h +is.o: malloc_wrap.h +kopen.o: malloc_wrap.h +kstring.o: kstring.h malloc_wrap.h +ksw.o: ksw.h malloc_wrap.h +main.o: utils.h +malloc_wrap.o: malloc_wrap.h +pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h +utils.o: utils.h ksort.h malloc_wrap.h kseq.h diff --git a/bamlite.c b/bamlite.c index 5aad392..3704beb 100644 --- a/bamlite.c +++ b/bamlite.c @@ -2,8 +2,13 @@ #include #include #include +#include #include "bamlite.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + /********************* * from bam_endian.c * *********************/ @@ -62,11 +67,11 @@ void bam_header_destroy(bam_header_t *header) if (header == 0) return; if (header->target_name) { for (i = 0; i < header->n_targets; ++i) - free(header->target_name[i]); + if (header->target_name[i]) free(header->target_name[i]); + if (header->target_len) free(header->target_len); free(header->target_name); - free(header->target_len); } - free(header->text); + if (header->text) free(header->text); free(header); } @@ -80,28 +85,33 @@ bam_header_t *bam_header_read(bamFile fp) magic_len = bam_read(fp, buf, 4); if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); - return 0; + return NULL; } header = bam_header_init(); // read plain text and the number of reference sequences - bam_read(fp, &header->l_text, 4); + if (bam_read(fp, &header->l_text, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->l_text); header->text = (char*)calloc(header->l_text + 1, 1); - bam_read(fp, header->text, header->l_text); - bam_read(fp, &header->n_targets, 4); + if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail; + if (bam_read(fp, &header->n_targets, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->n_targets); // read reference sequence names and lengths header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); header->target_len = (uint32_t*)calloc(header->n_targets, 4); for (i = 0; i != header->n_targets; ++i) { - bam_read(fp, &name_len, 4); + if (bam_read(fp, &name_len, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&name_len); header->target_name[i] = (char*)calloc(name_len, 1); - bam_read(fp, header->target_name[i], name_len); - bam_read(fp, &header->target_len[i], 4); + if (bam_read(fp, header->target_name[i], name_len) != name_len) { + goto fail; + } + if (bam_read(fp, &header->target_len[i], 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); } return header; + fail: + bam_header_destroy(header); + return NULL; } static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) @@ -153,3 +163,48 @@ int bam_read1(bamFile fp, bam1_t *b) if (bam_is_be) swap_endian_data(c, b->data_len, b->data); return 4 + block_len; } + + +#ifdef USE_VERBOSE_ZLIB_WRAPPERS +// Versions of gzopen, gzread and gzclose that print up error messages + +gzFile bamlite_gzopen(const char *fn, const char *mode) { + gzFile fp; + if (strcmp(fn, "-") == 0) { + fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + if (!fp) { + fprintf(stderr, "Couldn't open %s : %s", + (strstr(mode, "r"))? "stdin" : "stdout", + strerror(errno)); + } + return fp; + } + if ((fp = gzopen(fn, mode)) == 0) { + fprintf(stderr, "Couldn't open %s : %s\n", fn, + errno ? strerror(errno) : "Out of memory"); + } + return fp; +} + +int bamlite_gzread(gzFile file, void *ptr, unsigned int len) { + int ret = gzread(file, ptr, len); + + if (ret < 0) { + int errnum = 0; + const char *msg = gzerror(file, &errnum); + fprintf(stderr, "gzread error: %s\n", + Z_ERRNO == errnum ? strerror(errno) : msg); + } + return ret; +} + +int bamlite_gzclose(gzFile file) { + int ret = gzclose(file); + if (Z_OK != ret) { + fprintf(stderr, "gzclose error: %s\n", + Z_ERRNO == ret ? strerror(errno) : zError(ret)); + } + + return ret; +} +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ diff --git a/bamlite.h b/bamlite.h index 167fa44..efab7ac 100644 --- a/bamlite.h +++ b/bamlite.h @@ -4,11 +4,25 @@ #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + +#define USE_VERBOSE_ZLIB_WRAPPERS + typedef gzFile bamFile; -#define bam_open(fn, mode) gzopen(fn, mode) -#define bam_dopen(fd, mode) gzdopen(fd, mode) -#define bam_close(fp) gzclose(fp) -#define bam_read(fp, buf, size) gzread(fp, buf, size) +#ifdef USE_VERBOSE_ZLIB_WRAPPERS +/* These print error messages on failure */ +# define bam_open(fn, mode) bamlite_gzopen(fn, mode) +# define bam_dopen(fd, mode) gzdopen(fd, mode) +# define bam_close(fp) bamlite_gzclose(fp) +# define bam_read(fp, buf, size) bamlite_gzread(fp, buf, size) +#else +# define bam_open(fn, mode) gzopen(fn, mode) +# define bam_dopen(fd, mode) gzdopen(fd, mode) +# define bam_close(fp) gzclose(fp) +# define bam_read(fp, buf, size) gzread(fp, buf, size) +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ typedef struct { int32_t n_targets; @@ -87,6 +101,12 @@ extern "C" { bam_header_t *bam_header_read(bamFile fp); int bam_read1(bamFile fp, bam1_t *b); +#ifdef USE_VERBOSE_ZLIB_WRAPPERS + gzFile bamlite_gzopen(const char *fn, const char *mode); + int bamlite_gzread(gzFile file, void *ptr, unsigned int len); + int bamlite_gzclose(gzFile file); +#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ + #ifdef __cplusplus } #endif diff --git a/bntseq.c b/bntseq.c index 29b3b12..e1cd323 100644 --- a/bntseq.c +++ b/bntseq.c @@ -30,12 +30,17 @@ #include #include #include +#include #include "bntseq.h" #include "utils.h" #include "kseq.h" KSEQ_DECLARE(gzFile) +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + unsigned char nst_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -63,25 +68,27 @@ void bns_dump(const bntseq_t *bns, const char *prefix) { // dump .ann strcpy(str, prefix); strcat(str, ".ann"); fp = xopen(str, "w"); - fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed); + err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed); for (i = 0; i != bns->n_seqs; ++i) { bntann1_t *p = bns->anns + i; - fprintf(fp, "%d %s", p->gi, p->name); - if (p->anno[0]) fprintf(fp, " %s\n", p->anno); - else fprintf(fp, "\n"); - fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs); + err_fprintf(fp, "%d %s", p->gi, p->name); + if (p->anno[0]) err_fprintf(fp, " %s\n", p->anno); + else err_fprintf(fp, "\n"); + err_fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs); } - fclose(fp); + err_fflush(fp); + err_fclose(fp); } { // dump .amb strcpy(str, prefix); strcat(str, ".amb"); fp = xopen(str, "w"); - fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes); + err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes); for (i = 0; i != bns->n_holes; ++i) { bntamb1_t *p = bns->ambs + i; - fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb); + err_fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb); } - fclose(fp); + err_fflush(fp); + err_fclose(fp); } } @@ -89,13 +96,16 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c { char str[1024]; FILE *fp; + const char *fname; bntseq_t *bns; long long xx; int i; + int scanres; bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); { // read .ann - fp = xopen(ann_filename, "r"); - fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); + fp = xopen(fname = ann_filename, "r"); + scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); + if (scanres != 3) goto badread; bns->l_pac = xx; bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t)); for (i = 0; i < bns->n_seqs; ++i) { @@ -103,39 +113,54 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c char *q = str; int c; // read gi and sequence name - fscanf(fp, "%u%s", &p->gi, str); + scanres = fscanf(fp, "%u%s", &p->gi, str); + if (scanres != 2) goto badread; p->name = strdup(str); // read fasta comments - while ((c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; + while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; + while (c != '\n' && c != EOF) c = fgetc(fp); + if (c == EOF) { + scanres = EOF; + goto badread; + } *q = 0; if (q - str > 1) p->anno = strdup(str + 1); // skip leading space else p->anno = strdup(""); // read the rest - fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); + scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); + if (scanres != 3) goto badread; p->offset = xx; } - fclose(fp); + err_fclose(fp); } { // read .amb int64_t l_pac; int32_t n_seqs; - fp = xopen(amb_filename, "r"); - fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes); + fp = xopen(fname = amb_filename, "r"); + scanres = fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes); + if (scanres != 3) goto badread; l_pac = xx; xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files."); bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0; for (i = 0; i < bns->n_holes; ++i) { bntamb1_t *p = bns->ambs + i; - fscanf(fp, "%lld%d%s", &xx, &p->len, str); + scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str); + if (scanres != 3) goto badread; p->offset = xx; p->amb = str[0]; } - fclose(fp); + err_fclose(fp); } { // open .pac bns->fp_pac = xopen(pac_filename, "rb"); } return bns; + + badread: + if (EOF == scanres) { + err_fatal(__func__, "Error reading %s : %s\n", fname, ferror(fp) ? strerror(errno) : "Unexpected end of file"); + } + err_fatal(__func__, "Parse error reading %s\n", fname); } bntseq_t *bns_restore(const char *prefix) @@ -152,7 +177,7 @@ void bns_destroy(bntseq_t *bns) if (bns == 0) return; else { int i; - if (bns->fp_pac) fclose(bns->fp_pac); + if (bns->fp_pac) err_fclose(bns->fp_pac); free(bns->ambs); for (i = 0; i < bns->n_seqs; ++i) { free(bns->anns[i].name); @@ -250,16 +275,17 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only) ret = bns->l_pac; { // finalize .pac file ubyte_t ct; - fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); + err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; - fwrite(&ct, 1, 1, fp); + err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; - fwrite(&ct, 1, 1, fp); + err_fwrite(&ct, 1, 1, fp); // close .pac file - fclose(fp); + err_fflush(fp); + err_fclose(fp); } bns_dump(bns, prefix); bns_destroy(bns); @@ -283,7 +309,7 @@ int bwa_fa2pac(int argc, char *argv[]) } fp = xzopen(argv[optind], "r"); bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only); - gzclose(fp); + err_gzclose(fp); return 0; } diff --git a/bwa.c b/bwa.c index e86a87e..a20c027 100644 --- a/bwa.c +++ b/bwa.c @@ -7,6 +7,10 @@ #include "ksw.h" #include "utils.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + int bwa_verbose = 3; char bwa_rg_id[256]; @@ -258,8 +262,8 @@ bwaidx_t *bwa_idx_load(const char *hint, int which) idx->bns = bns_restore(prefix); if (which & BWA_IDX_PAC) { idx->pac = calloc(idx->bns->l_pac/4+1, 1); - fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence - fclose(idx->bns->fp_pac); + err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence + err_fclose(idx->bns->fp_pac); idx->bns->fp_pac = 0; } } diff --git a/bwamem.c b/bwamem.c index c3a08cd..779a221 100644 --- a/bwamem.c +++ b/bwamem.c @@ -15,6 +15,10 @@ #include "ksort.h" #include "utils.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + /* Theory on probability and scoring *ungapped* alignment * * s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution @@ -229,16 +233,16 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn) int i, j; for (i = 0; i < chn->n; ++i) { mem_chain_t *p = &chn->a[i]; - printf("%d", p->n); + err_printf("%d", p->n); for (j = 0; j < p->n; ++j) { bwtint_t pos; int is_rev, ref_id; pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev); if (is_rev) pos -= p->seeds[j].len - 1; bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id); - printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); + err_printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); } - putchar('\n'); + err_putchar('\n'); } } diff --git a/bwamem_pair.c b/bwamem_pair.c index 0f6ff08..06aacff 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -8,6 +8,11 @@ #include "utils.h" #include "ksw.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + + #define MIN_RATIO 0.8 #define MIN_DIR_CNT 10 #define MIN_DIR_RATIO 0.05 diff --git a/bwape.c b/bwape.c index 6752a18..2a7f46e 100644 --- a/bwape.c +++ b/bwape.c @@ -12,6 +12,10 @@ #include "bwa.h" #include "ksw.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef struct { int n; bwtint_t *a; @@ -280,11 +284,11 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw p[j] = seqs[j] + i; p[j]->n_multi = 0; p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2); - fread(&n_aln, 4, 1, fp_sa[j]); + err_fread_noeof(&n_aln, 4, 1, fp_sa[j]); if (n_aln > kv_max(d->aln[j])) kv_resize(bwt_aln1_t, d->aln[j], n_aln); d->aln[j].n = n_aln; - fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); + err_fread_noeof(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j] // generate SE alignment and mapping quality bwa_aln2seq(n_aln, d->aln[j].a, p[j]); @@ -498,8 +502,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, // load reference sequence if (_pacseq == 0) { pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = (ubyte_t*)_pacseq; if (!popt->is_sw || ii->avg < 0.0) return pacseq; @@ -644,18 +648,18 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f g_hash = kh_init(b128); last_ii.avg = -1.0; - fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); ks[0] = bwa_open_reads(opt.mode, fn_fa[0]); opt0 = opt; - fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! ks[1] = bwa_open_reads(opt.mode, fn_fa[1]); { // for Illumina alignment only if (popt->is_preload) { strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); } } @@ -709,7 +713,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f bns_destroy(bns); for (i = 0; i < 2; ++i) { bwa_seq_close(ks[i]); - fclose(fp_sa[i]); + err_fclose(fp_sa[i]); } for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter) if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a); @@ -765,7 +769,7 @@ int bwa_sai2sam_pe(int argc, char *argv[]) } if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); - return 0; + return 1; } bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line); free(prefix); free(popt); diff --git a/bwase.c b/bwase.c index b2ea700..85165d6 100644 --- a/bwase.c +++ b/bwase.c @@ -12,6 +12,10 @@ #include "bwa.h" #include "ksw.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + int g_log_n[256]; void bwa_print_sam_PG(); @@ -312,8 +316,8 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t if (!_pacseq) { pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = _pacseq; for (i = 0; i != n_seqs; ++i) { bwa_seq_t *s = seqs + i; @@ -381,6 +385,26 @@ static int64_t pos_5(const bwa_seq_t *p) return -1; } +void bwa_print_seq(FILE *stream, bwa_seq_t *seq) { + char buffer[4096]; + const int bsz = sizeof(buffer); + int i, j, l; + + if (seq->strand == 0) { + for (i = 0; i < seq->full_len; i += bsz) { + l = seq->full_len - i > bsz ? bsz : seq->full_len - i; + for (j = 0; j < l; j++) buffer[j] = "ACGTN"[seq->seq[i + j]]; + err_fwrite(buffer, 1, l, stream); + } + } else { + for (i = seq->full_len - 1; i >= 0; i -= bsz) { + l = i + 1 > bsz ? bsz : i + 1; + for (j = 0; j < l; j++) buffer[j] = "TGCAN"[seq->seq[i - j]]; + err_fwrite(buffer, 1, l, stream); + } + } +} + void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2) { int j; @@ -432,10 +456,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in else err_printf("\t*\t0\t0\t"); // print sequence and quality - if (p->strand == 0) - for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]); - else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]); - putchar('\t'); + bwa_print_seq(stdout, p); + err_putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality err_printf("%s", p->qual); @@ -478,14 +500,16 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in } } } - putchar('\n'); + err_putchar('\n'); } else { // this read has no match - ubyte_t *s = p->strand? p->rseq : p->seq; + //ubyte_t *s = p->strand? p->rseq : p->seq; int flag = p->extra_flag | SAM_FSU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); - for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); - putchar('\t'); + //Why did this work differently to the version above?? + //for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); + bwa_print_seq(stdout, p); + err_putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality err_printf("%s", p->qual); @@ -493,7 +517,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id); if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc); if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len); - putchar('\n'); + err_putchar('\n'); } } @@ -522,7 +546,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f fp_sa = xopen(fn_sa, "r"); m_aln = 0; - fread(&opt, sizeof(gap_opt_t), 1, fp_sa); + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa); bwa_print_sam_hdr(bns, rg_line); //bwa_print_sam_PG(); // set ks @@ -536,12 +560,12 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; int n_aln; - fread(&n_aln, 4, 1, fp_sa); + err_fread_noeof(&n_aln, 4, 1, fp_sa); if (n_aln > m_aln) { m_aln = n_aln; aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); } - fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); + err_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); bwa_aln2seq_core(n_aln, aln, p, 1, n_occ); } @@ -565,7 +589,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f // destroy bwa_seq_close(ks); bns_destroy(bns); - fclose(fp_sa); + err_fclose(fp_sa); free(aln); } @@ -591,7 +615,7 @@ int bwa_sai2sam_se(int argc, char *argv[]) } if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); - return 0; + return 1; } bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line); free(prefix); diff --git a/bwaseqio.c b/bwaseqio.c index c1e9f97..d850307 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -7,6 +7,10 @@ #include "kseq.h" KSEQ_DECLARE(gzFile) +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + extern unsigned char nst_nt4_table[256]; static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; @@ -26,6 +30,7 @@ bwa_seqio_t *bwa_bam_open(const char *fn, int which) bs->is_bam = 1; bs->which = which; bs->fp = bam_open(fn, "r"); + if (0 == bs->fp) err_fatal_simple("Couldn't open bam file"); h = bam_header_read(bs->fp); bam_header_destroy(h); return bs; @@ -44,9 +49,10 @@ bwa_seqio_t *bwa_seq_open(const char *fn) void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; - if (bs->is_bam) bam_close(bs->fp); - else { - gzclose(bs->ks->f->f); + if (bs->is_bam) { + if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); + } else { + err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); @@ -90,11 +96,12 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com int n_seqs, l, i; long n_trimmed = 0, n_tot = 0; bam1_t *b; + int res; b = bam_init1(); n_seqs = 0; seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); - while (bam_read1(bs->fp, b) >= 0) { + while ((res = bam_read1(bs->fp, b)) >= 0) { uint8_t *s, *q; int go = 0; if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; @@ -126,6 +133,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com p->name = strdup((const char*)bam1_qname(b)); if (n_seqs == n_needed) break; } + if (res < 0 && res != -1) err_fatal_simple("Error reading bam file"); *n = n_seqs; if (n_seqs && trim_qual >= 1) fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); @@ -184,7 +192,7 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri p->qual = 0; p->full_len = p->clip_len = p->len = l; n_tot += p->full_len; - p->seq = (ubyte_t*)calloc(p->len, 1); + p->seq = (ubyte_t*)calloc(p->full_len, 1); for (i = 0; i != p->full_len; ++i) p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]]; if (seq->qual.l) { // copy quality diff --git a/bwt.c b/bwt.c index 8656b0e..c9bf6a3 100644 --- a/bwt.c +++ b/bwt.c @@ -34,6 +34,10 @@ #include "bwt.h" #include "kvec.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + void bwt_gen_cnt_table(bwt_t *bwt) { int i, j; @@ -67,10 +71,6 @@ void bwt_cal_sa(bwt_t *bwt, int intv) bwt->sa_intv = intv; bwt->n_sa = (bwt->seq_len + intv) / intv; bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); - if (bwt->sa == 0) { - fprintf(stderr, "[%s] Fail to allocate %.3fMB memory. Abort!\n", __func__, bwt->n_sa * sizeof(bwtint_t) / 1024.0/1024.0); - abort(); - } // calculate SA value isa = 0; sa = bwt->seq_len; for (i = 0; i < bwt->seq_len; ++i) { @@ -354,22 +354,24 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt) { FILE *fp; fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(bwt->bwt, 4, bwt->bwt_size, fp); - fclose(fp); + err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp); + err_fflush(fp); + err_fclose(fp); } void bwt_dump_sa(const char *fn, const bwt_t *bwt) { FILE *fp; fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); - fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); - fclose(fp); + err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + err_fflush(fp); + err_fclose(fp); } static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a) @@ -378,7 +380,7 @@ static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a) bwtint_t offset = 0; while (size) { int x = bufsize < size? bufsize : size; - if ((x = fread(a + offset, 1, x, fp)) == 0) break; + if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break; size -= x; offset += x; } return offset; @@ -391,11 +393,11 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) bwtint_t primary; fp = xopen(fn, "rb"); - fread(&primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp); xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same."); - fread(skipped, sizeof(bwtint_t), 4, fp); // skip - fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fread(&primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip + err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp); xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; @@ -403,7 +405,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) bwt->sa[0] = -1; fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1); - fclose(fp); + err_fclose(fp); } bwt_t *bwt_restore_bwt(const char *fn) @@ -413,15 +415,15 @@ bwt_t *bwt_restore_bwt(const char *fn) bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); fp = xopen(fn, "rb"); - fseek(fp, 0, SEEK_END); - bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2; + err_fseek(fp, 0, SEEK_END); + bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2; bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); - fseek(fp, 0, SEEK_SET); - fread(&bwt->primary, sizeof(bwtint_t), 1, fp); - fread(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fseek(fp, 0, SEEK_SET); + err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp); fread_fix(fp, bwt->bwt_size<<2, bwt->bwt); bwt->seq_len = bwt->L2[4]; - fclose(fp); + err_fclose(fp); bwt_gen_cnt_table(bwt); return bwt; diff --git a/bwt_gen.c b/bwt_gen.c index cac6a5f..6139d80 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -27,8 +27,13 @@ #include #include #include +#include #include "QSufSort.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef uint64_t bgint_t; typedef int64_t sbgint_t; @@ -1443,13 +1448,29 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB packedFile = (FILE*)fopen(inputFileName, "rb"); if (packedFile == NULL) { - fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n"); + fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open %s : %s\n", + inputFileName, strerror(errno)); exit(1); } - fseek(packedFile, -1, SEEK_END); + if (fseek(packedFile, -1, SEEK_END) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } packedFileLen = ftell(packedFile); - fread(&lastByteLength, sizeof(unsigned char), 1, packedFile); + if (packedFileLen == -1) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't ftell on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(&lastByteLength, sizeof(unsigned char), 1, packedFile) != 1) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength); bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize); @@ -1463,10 +1484,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB } textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte - fseek(packedFile, -2, SEEK_CUR); - fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); - fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile); - fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR); + if (fseek(packedFile, -((long)textSizeInByte + 2), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile) != textSizeInByte + 1) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } + if (fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); @@ -1479,9 +1513,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB textToLoad = totalTextLength - processedTextLength; } textSizeInByte = textToLoad / CHAR_PER_BYTE; - fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); - fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile); - fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); + if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } + if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile) != textSizeInByte) { + fprintf(stderr, + "BWTIncConstructFromPacked() : Can't read from %s : %s\n", + inputFileName, + ferror(packedFile)? strerror(errno) : "Unexpected end of file"); + exit(1); + } + if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) { + fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n", + inputFileName, strerror(errno)); + exit(1); + } ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); processedTextLength += textToLoad; @@ -1526,15 +1574,28 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o bwtFile = (FILE*)fopen(bwtFileName, "wb"); if (bwtFile == NULL) { - fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n"); + fprintf(stderr, + "BWTSaveBwtCodeAndOcc(): Cannot open %s for writing: %s\n", + bwtFileName, strerror(errno)); exit(1); } - fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile); - fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile); bwtLength = BWTFileSizeInWord(bwt->textLength); - fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile); - fclose(bwtFile); + + if (fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile) != 1 + || fwrite(bwt->cumulativeFreq + 1, + sizeof(bgint_t), ALPHABET_SIZE, bwtFile) != ALPHABET_SIZE + || fwrite(bwt->bwtCode, + sizeof(unsigned int), bwtLength, bwtFile) != bwtLength) { + fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error writing to %s : %s\n", + bwtFileName, strerror(errno)); + exit(1); + } + if (fclose(bwtFile) != 0) { + fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error on closing %s : %s\n", + bwtFileName, strerror(errno)); + exit(1); + } } void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) diff --git a/bwt_lite.c b/bwt_lite.c index 902e0fc..6cd3b1d 100644 --- a/bwt_lite.c +++ b/bwt_lite.c @@ -3,6 +3,10 @@ #include #include "bwt_lite.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + int is_sa(const uint8_t *T, uint32_t *SA, int n); int is_bwt(uint8_t *T, int n); diff --git a/bwtaln.c b/bwtaln.c index d132157..e772792 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -17,6 +17,10 @@ #include #endif +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + gap_opt_t *gap_init_opt() { gap_opt_t *o; @@ -306,7 +310,7 @@ int bwa_aln(int argc, char *argv[]) if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); free(opt); - return 0; + return 1; } bwa_aln_core(prefix, argv[optind+1], opt); free(opt); free(prefix); diff --git a/bwtgap.c b/bwtgap.c index 364717c..16d9025 100644 --- a/bwtgap.c +++ b/bwtgap.c @@ -4,6 +4,10 @@ #include "bwtgap.h" #include "bwtaln.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #define STATE_M 0 #define STATE_I 1 #define STATE_D 2 diff --git a/bwtindex.c b/bwtindex.c index 934b382..9e3ec15 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -39,6 +39,11 @@ #include "divsufsort.h" #endif +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + + int is_bwt(ubyte_t *T, int n); int64_t bwa_seq_len(const char *fn_pac) @@ -47,10 +52,10 @@ int64_t bwa_seq_len(const char *fn_pac) int64_t pac_len; ubyte_t c; fp = xopen(fn_pac, "rb"); - fseek(fp, -1, SEEK_END); - pac_len = ftell(fp); - fread(&c, 1, 1, fp); - fclose(fp); + err_fseek(fp, -1, SEEK_END); + pac_len = err_ftell(fp); + err_fread_noeof(&c, 1, 1, fp); + err_fclose(fp); return (pac_len - 1) * 4 + (int)c; } @@ -70,8 +75,8 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) // prepare sequence pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1); buf2 = (ubyte_t*)calloc(pac_size, 1); - fread(buf2, 1, pac_size, fp); - fclose(fp); + err_fread_noeof(buf2, 1, pac_size, fp); + err_fclose(fp); memset(bwt->L2, 0, 5 * 4); buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1); for (i = 0; i < bwt->seq_len; ++i) { @@ -229,7 +234,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 0); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - gzclose(fp); + err_gzclose(fp); } if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT { @@ -263,7 +268,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 1); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - gzclose(fp); + err_gzclose(fp); } { bwt_t *bwt; diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index d26d3fa..d225187 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -22,6 +22,11 @@ KSEQ_DECLARE(gzFile) #define __left_lt(a, b) ((a).end > (b).end) KSORT_INIT(hit, bsw2hit_t, __left_lt) +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + + extern unsigned char nst_nt4_table[256]; unsigned char nt_comp_table[256] = { @@ -710,12 +715,12 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t * // print and reset for (i = 0; i < _seq->n; ++i) { bsw2seq1_t *p = _seq->seq + i; - if (p->sam) printf("%s", p->sam); + if (p->sam) err_printf("%s", p->sam); free(p->name); free(p->seq); free(p->qual); free(p->sam); p->tid = -1; p->l = 0; p->name = p->seq = p->qual = p->sam = 0; } - fflush(stdout); + err_fflush(stdout); _seq->n = 0; } @@ -729,13 +734,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c bseq1_t *bseq; pac = calloc(bns->l_pac/4+1, 1); - if (pac == 0) { - fprintf(stderr, "[bsw2_aln] insufficient memory!\n"); - return; - } for (l = 0; l < bns->n_seqs; ++l) - printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); - fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); + err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); + err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); fp = xzopen(fn, "r"); ks = kseq_init(fp); _seq = calloc(1, sizeof(bsw2seq_t)); @@ -767,9 +768,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c free(pac); free(_seq->seq); free(_seq); kseq_destroy(ks); - gzclose(fp); + err_gzclose(fp); if (fn2) { kseq_destroy(ks2); - gzclose(fp2); + err_gzclose(fp2); } } diff --git a/bwtsw2_chain.c b/bwtsw2_chain.c index 381d0b7..ade77e7 100644 --- a/bwtsw2_chain.c +++ b/bwtsw2_chain.c @@ -1,6 +1,10 @@ #include #include "bwtsw2.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef struct { uint32_t tbeg, tend; int qbeg, qend; diff --git a/bwtsw2_core.c b/bwtsw2_core.c index 67f126c..1119601 100644 --- a/bwtsw2_core.c +++ b/bwtsw2_core.c @@ -8,6 +8,10 @@ #include "bwt.h" #include "kvec.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef struct { bwtint_t k, l; } qintv_t; diff --git a/bwtsw2_main.c b/bwtsw2_main.c index ab126f2..40a9e0a 100644 --- a/bwtsw2_main.c +++ b/bwtsw2_main.c @@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[]) case 'S': opt->skip_sw = 1; break; case 'C': opt->cpy_cmt = 1; break; case 'G': opt->max_chain_gap = atoi(optarg); break; + default: return 1; } } opt->qr = opt->q + opt->r; @@ -79,7 +80,7 @@ int bwa_bwtsw2(int argc, char *argv[]) opt->t *= opt->a; opt->coef *= opt->a; - if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 0; + if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1; bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); bwa_idx_destroy(idx); free(opt); diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index cdd822f..24905df 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -2,13 +2,17 @@ #include #include #include +#include "utils.h" #include "bwt.h" #include "bntseq.h" #include "bwtsw2.h" #include "kstring.h" -#include "utils.h" #include "ksw.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #define MIN_RATIO 0.8 #define OUTLIER_BOUND 2.0 #define MAX_STDDEV 4.0 diff --git a/example.c b/example.c index 7c25674..a6c9bdd 100644 --- a/example.c +++ b/example.c @@ -1,11 +1,16 @@ #include #include #include +#include #include #include "bwamem.h" #include "kseq.h" // for the FASTA/Q parser KSEQ_DECLARE(gzFile) +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + int main(int argc, char *argv[]) { bwaidx_t *idx; @@ -19,9 +24,17 @@ int main(int argc, char *argv[]) } idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index - assert(idx); + if (NULL == idx) { + fprintf(stderr, "Index load failed.\n"); + exit(EXIT_FAILURE); + } fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r"); - assert(fp); + if (NULL == fp) { + fprintf(stderr, "Couldn't open %s : %s\n", + strcmp(argv[2], "-") ? argv[2] : "stdin", + errno ? strerror(errno) : "Out of memory"); + exit(EXIT_FAILURE); + } ks = kseq_init(fp); // initialize the FASTA/Q parser opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values @@ -34,10 +47,10 @@ int main(int argc, char *argv[]) if (ar.a[i].secondary >= 0) continue; // skip secondary alignments a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR // print alignment - printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq); + err_printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq); for (k = 0; k < a.n_cigar; ++k) // print CIGAR - printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]); - printf("\t%d\n", a.NM); // print edit distance + err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]); + err_printf("\t%d\n", a.NM); // print edit distance free(a.cigar); // don't forget to deallocate CIGAR } free(ar.a); // and deallocate the hit list @@ -45,7 +58,7 @@ int main(int argc, char *argv[]) free(opt); kseq_destroy(ks); - gzclose(fp); + err_gzclose(fp); bwa_idx_destroy(idx); return 0; } diff --git a/fastmap.c b/fastmap.c index 69dbcbe..592df02 100644 --- a/fastmap.c +++ b/fastmap.c @@ -7,6 +7,7 @@ #include "kvec.h" #include "utils.h" #include "kseq.h" +#include "utils.h" KSEQ_DECLARE(gzFile) extern unsigned char nst_nt4_table[256]; @@ -51,6 +52,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'R') { if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak } else if (c == 's') opt->split_width = atoi(optarg); + else return 1; } if (opt->n_threads < 1) opt->n_threads = 1; if (optind + 1 >= argc) { @@ -130,7 +132,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size); mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0); for (i = 0; i < n; ++i) { - fputs(seqs[i].sam, stdout); + err_fputs(seqs[i].sam, stdout); free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); } free(seqs); @@ -139,10 +141,10 @@ int main_mem(int argc, char *argv[]) free(opt); bwa_idx_destroy(idx); kseq_destroy(ks); - gzclose(fp); kclose(ko); + err_gzclose(fp); kclose(ko); if (ks2) { kseq_destroy(ks2); - gzclose(fp2); kclose(ko2); + err_gzclose(fp2); kclose(ko2); } return 0; } @@ -163,6 +165,7 @@ int main_fastmap(int argc, char *argv[]) case 'p': print_seq = 1; break; case 'w': min_iwidth = atoi(optarg); break; case 'l': min_len = atoi(optarg); break; + default: return 1; } } if (optind + 1 >= argc) { @@ -170,16 +173,16 @@ int main_fastmap(int argc, char *argv[]) return 1; } - fp = gzopen(argv[optind + 1], "r"); + fp = xzopen(argv[optind + 1], "r"); seq = kseq_init(fp); - idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS); + if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1; itr = smem_itr_init(idx->bwt); while (kseq_read(seq) >= 0) { - printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l); + err_printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l); if (print_seq) { - putchar('\t'); - puts(seq->seq.s); - } else putchar('\n'); + err_putchar('\t'); + err_puts(seq->seq.s); + } else err_putchar('\n'); for (i = 0; i < seq->seq.l; ++i) seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s); @@ -187,7 +190,7 @@ int main_fastmap(int argc, char *argv[]) for (i = 0; i < a->n; ++i) { bwtintv_t *p = &a->a[i]; if ((uint32_t)p->info - (p->info>>32) < min_len) continue; - printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); + err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); if (p->x[2] <= min_iwidth) { for (k = 0; k < p->x[2]; ++k) { bwtint_t pos; @@ -196,18 +199,18 @@ int main_fastmap(int argc, char *argv[]) pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev); if (is_rev) pos -= len - 1; bns_cnt_ambi(idx->bns, pos, len, &ref_id); - printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1); + err_printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1); } - } else fputs("\t*", stdout); - putchar('\n'); + } else err_puts("\t*"); + err_putchar('\n'); } } - puts("//"); + err_puts("//"); } smem_itr_destroy(itr); bwa_idx_destroy(idx); kseq_destroy(seq); - gzclose(fp); + err_gzclose(fp); return 0; } diff --git a/is.c b/is.c index 9e50faf..46f1772 100644 --- a/is.c +++ b/is.c @@ -26,6 +26,10 @@ #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef unsigned char ubyte_t; #define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i]) @@ -205,7 +209,8 @@ int is_bwt(ubyte_t *T, int n) { int *SA, i, primary = 0; SA = (int*)calloc(n+1, sizeof(int)); - is_sa(T, SA, n); + + if (is_sa(T, SA, n)) return -1; for (i = 0; i <= n; ++i) { if (SA[i] == 0) primary = i; diff --git a/kbtree.h b/kbtree.h index 5ed5330..2b76953 100644 --- a/kbtree.h +++ b/kbtree.h @@ -32,6 +32,10 @@ #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef struct { int32_t is_internal:1, n:31; } kbnode_t; diff --git a/khash.h b/khash.h index 2422044..12e5542 100644 --- a/khash.h +++ b/khash.h @@ -116,6 +116,10 @@ int main() { #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + /* compipler specific configuration */ #if UINT_MAX == 0xffffffffu diff --git a/kopen.c b/kopen.c index c0bc975..d238226 100644 --- a/kopen.c +++ b/kopen.c @@ -14,6 +14,10 @@ #include #endif +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #ifdef _WIN32 #define _KO_NO_NET #endif @@ -54,10 +58,26 @@ static int socket_connect(const char *host, const char *port) #undef __err_connect } +static int write_bytes(int fd, const char *buf, size_t len) +{ + ssize_t bytes; + do { + bytes = write(fd, buf, len); + if (bytes >= 0) { + len -= bytes; + } else if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) { + return -1; + } + } while (len > 0); + + return 0; +} + static int http_open(const char *fn) { char *p, *proxy, *q, *http_host, *host, *port, *path, *buf; int fd, ret, l; + ssize_t bytes = 0, bufsz = 0x10000; /* parse URL; adapted from khttp_parse_url() in knetfile.c */ if (strstr(fn, "http://") != fn) return 0; @@ -87,26 +107,35 @@ static int http_open(const char *fn) /* connect; adapted from khttp_connect() in knetfile.c */ l = 0; fd = socket_connect(host, port); - buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough. - l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", path, http_host); - l += sprintf(buf + l, "\r\n"); - write(fd, buf, l); + buf = calloc(bufsz, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough. + l += snprintf(buf + l, bufsz, "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n", + path, http_host); + if (write_bytes(fd, buf, l) != 0) { + close(fd); + fd = -1; + goto out; + } l = 0; - while (read(fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency + retry: + while (l < bufsz && (bytes = read(fd, buf + l, 1)) > 0) { // read HTTP header; FIXME: bad efficiency if (buf[l] == '\n' && l >= 3) if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break; ++l; } + if (bytes < 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) goto retry; + buf[l] = 0; - if (l < 14) { // prematured header + if (bytes < 0 || l < 14) { // prematured header close(fd); fd = -1; + goto out; } ret = strtol(buf + 8, &p, 0); // HTTP return code if (ret != 200) { close(fd); fd = -1; } + out: free(buf); free(http_host); free(host); free(port); free(path); return fd; } @@ -143,7 +172,7 @@ static int kftp_get_response(ftpaux_t *aux) static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get) { if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing - write(aux->ctrl_fd, cmd, strlen(cmd)); + if (write_bytes(aux->ctrl_fd, cmd, strlen(cmd)) != 0) return -1; return is_get? kftp_get_response(aux) : 0; } @@ -262,7 +291,7 @@ void *kopen(const char *fn, int *_fd) if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':') break; need_shell = (*q != 0); - pipe(pfd); + if (pipe(pfd) != 0) return 0; pid = vfork(); if (pid == -1) { /* vfork() error */ close(pfd[0]); close(pfd[1]); diff --git a/kseq.h b/kseq.h index a5cec7c..642cd33 100644 --- a/kseq.h +++ b/kseq.h @@ -32,6 +32,10 @@ #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r #define KS_SEP_TAB 1 // isspace() && !' ' #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) @@ -50,9 +54,9 @@ #define __KS_BASIC(type_t, __bufsize) \ static inline kstream_t *ks_init(type_t f) \ { \ - kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ + kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ ks->f = f; \ - ks->buf = (unsigned char*)malloc(__bufsize); \ + ks->buf = (unsigned char*)malloc(__bufsize); \ return ks; \ } \ static inline void ks_destroy(kstream_t *ks) \ @@ -120,7 +124,7 @@ typedef struct __kstring_t { if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ str->m = str->l + (i - ks->begin) + 1; \ kroundup32(str->m); \ - str->s = (char*)realloc(str->s, str->m); \ + str->s = (char*)realloc(str->s, str->m); \ } \ memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ str->l = str->l + (i - ks->begin); \ @@ -151,7 +155,7 @@ typedef struct __kstring_t { #define __KSEQ_BASIC(SCOPE, type_t) \ SCOPE kseq_t *kseq_init(type_t fd) \ { \ - kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ + kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ s->f = ks_init(fd); \ return s; \ } \ diff --git a/ksort.h b/ksort.h index ad66a17..5851b0d 100644 --- a/ksort.h +++ b/ksort.h @@ -58,6 +58,10 @@ #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + typedef struct { void *left, *right; int depth; @@ -72,7 +76,7 @@ typedef struct { int curr, shift; \ \ a2[0] = array; \ - a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ + a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ for (curr = 0, shift = 0; (1ul< #include "kstring.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + int ksprintf(kstring_t *s, const char *fmt, ...) { va_list ap; diff --git a/kstring.h b/kstring.h index 04f1c42..fe7fa95 100644 --- a/kstring.h +++ b/kstring.h @@ -4,6 +4,10 @@ #include #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif diff --git a/ksw.c b/ksw.c index 7d9dfb3..454c49d 100644 --- a/ksw.c +++ b/ksw.c @@ -28,6 +28,10 @@ #include #include "ksw.h" +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #ifdef __GNUC__ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) @@ -553,7 +557,7 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, #include #include #include "kseq.h" -KSEQ_INIT(gzFile, gzread) +KSEQ_INIT(gzFile, err_gzread) unsigned char seq_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -609,8 +613,8 @@ int main(int argc, char *argv[]) } for (j = 0; j < 5; ++j) mat[k++] = 0; // open file - fpt = gzopen(argv[optind], "r"); kst = kseq_init(fpt); - fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq); + fpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt); + fpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq); // all-pair alignment while (kseq_read(ksq) > 0) { kswq_t *q[2] = {0, 0}; @@ -629,18 +633,18 @@ int main(int argc, char *argv[]) for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]); if (r.score >= minsc) - printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2); + err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2); if (rseq) { r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]); if (r.score >= minsc) - printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); + err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); } } free(q[0]); free(q[1]); } free(rseq); - kseq_destroy(kst); gzclose(fpt); - kseq_destroy(ksq); gzclose(fpq); + kseq_destroy(kst); err_gzclose(fpt); + kseq_destroy(ksq); err_gzclose(fpq); return 0; } #endif diff --git a/kvec.h b/kvec.h index 9c9ca6e..83ad483 100644 --- a/kvec.h +++ b/kvec.h @@ -50,6 +50,10 @@ int main() { #include +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #define kvec_t(type) struct { size_t n, m; type *a; } @@ -71,7 +75,7 @@ int main() { #define kv_push(type, v, x) do { \ if ((v).n == (v).m) { \ (v).m = (v).m? (v).m<<1 : 2; \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ + (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ } \ (v).a[(v).n++] = (x); \ } while (0) diff --git a/main.c b/main.c index d40d996..cc1a4b4 100644 --- a/main.c +++ b/main.c @@ -55,7 +55,7 @@ static int usage() void bwa_print_sam_PG() { - printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION); + err_printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION); } int main(int argc, char *argv[]) diff --git a/malloc_wrap.c b/malloc_wrap.c new file mode 100644 index 0000000..100b8cb --- /dev/null +++ b/malloc_wrap.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#ifdef USE_MALLOC_WRAPPERS +/* Don't wrap ourselves */ +# undef USE_MALLOC_WRAPPERS +#endif +#include "malloc_wrap.h" + +void *wrap_calloc(size_t nmemb, size_t size, + const char *file, unsigned int line, const char *func) { + void *p = calloc(nmemb, size); + if (NULL == p) { + fprintf(stderr, + "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + func, nmemb * size, file, line, strerror(errno)); + exit(EXIT_FAILURE); + } + return p; +} + +void *wrap_malloc(size_t size, + const char *file, unsigned int line, const char *func) { + void *p = malloc(size); + if (NULL == p) { + fprintf(stderr, + "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + func, size, file, line, strerror(errno)); + exit(EXIT_FAILURE); + } + return p; +} + +void *wrap_realloc(void *ptr, size_t size, + const char *file, unsigned int line, const char *func) { + void *p = realloc(ptr, size); + if (NULL == p) { + fprintf(stderr, + "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + func, size, file, line, strerror(errno)); + exit(EXIT_FAILURE); + } + return p; +} + +char *wrap_strdup(const char *s, + const char *file, unsigned int line, const char *func) { + char *p = strdup(s); + if (NULL == p) { + fprintf(stderr, + "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + func, strlen(s), file, line, strerror(errno)); + exit(EXIT_FAILURE); + } + return p; +} diff --git a/malloc_wrap.h b/malloc_wrap.h new file mode 100644 index 0000000..a55876a --- /dev/null +++ b/malloc_wrap.h @@ -0,0 +1,47 @@ +#ifndef MALLOC_WRAP_H +#define MALLOC_WRAP_H + +#include /* Avoid breaking the usual definitions */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + + void *wrap_calloc(size_t nmemb, size_t size, + const char *file, unsigned int line, const char *func); + void *wrap_malloc(size_t size, + const char *file, unsigned int line, const char *func); + void *wrap_realloc(void *ptr, size_t size, + const char *file, unsigned int line, const char *func); + char *wrap_strdup(const char *s, + const char *file, unsigned int line, const char *func); + +#ifdef __cplusplus +} +#endif + +#ifdef USE_MALLOC_WRAPPERS +# ifdef calloc +# undef calloc +# endif +# define calloc(n, s) wrap_calloc( (n), (s), __FILE__, __LINE__, __func__) + +# ifdef malloc +# undef malloc +# endif +# define malloc(s) wrap_malloc( (s), __FILE__, __LINE__, __func__) + +# ifdef realloc +# undef realloc +# endif +# define realloc(p, s) wrap_realloc((p), (s), __FILE__, __LINE__, __func__) + +# ifdef strdup +# undef strdup +# endif +# define strdup(s) wrap_strdup( (s), __FILE__, __LINE__, __func__) + +#endif /* USE_MALLOC_WRAPPERS */ + +#endif /* MALLOC_WRAP_H */ diff --git a/pemerge.c b/pemerge.c index e37567a..725885f 100644 --- a/pemerge.c +++ b/pemerge.c @@ -4,12 +4,18 @@ #include #include #include +#include #include "ksw.h" #include "kseq.h" #include "kstring.h" #include "bwa.h" +#include "utils.h" KSEQ_DECLARE(gzFile) +#ifdef USE_MALLOC_WRAPPERS +# include "malloc_wrap.h" +#endif + #define MAX_SCORE_RATIO 0.9f #define MAX_ERR 8 @@ -140,14 +146,14 @@ pem_ret: static inline void print_bseq(const bseq1_t *s, int rn) { - putchar(s->qual? '@' : '>'); - fputs(s->name, stdout); + err_putchar(s->qual? '@' : '>'); + err_fputs(s->name, stdout); if (rn == 1 || rn == 2) { - putchar('/'); putchar('0' + rn); putchar('\n'); - } else puts(" merged"); - puts(s->seq); + err_putchar('/'); err_putchar('0' + rn); err_putchar('\n'); + } else err_puts(" merged"); + err_puts(s->seq); if (s->qual) { - puts("+"); puts(s->qual); + err_puts("+"); err_puts(s->qual); } } @@ -224,6 +230,7 @@ int main_pemerge(int argc, char *argv[]) else if (c == 'Q') opt->q_thres = atoi(optarg); else if (c == 't') opt->n_threads = atoi(optarg); else if (c == 'T') min_ovlp = atoi(optarg); + else return 1; } if (flag == 0) flag = 3; opt->flag = flag; @@ -243,9 +250,21 @@ int main_pemerge(int argc, char *argv[]) } fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); + if (NULL == fp) { + fprintf(stderr, "Couldn't open %s : %s\n", + strcmp(argv[optind], "-") ? argv[optind] : "stdin", + errno ? strerror(errno) : "Out of memory"); + exit(EXIT_FAILURE); + } ks = kseq_init(fp); if (optind + 1 < argc) { fp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r"); + if (NULL == fp) { + fprintf(stderr, "Couldn't open %s : %s\n", + strcmp(argv[optind+1], "-") ? argv[optind+1] : "stdin", + errno ? strerror(errno) : "Out of memory"); + exit(EXIT_FAILURE); + } ks2 = kseq_init(fp2); } @@ -259,11 +278,14 @@ int main_pemerge(int argc, char *argv[]) for (i = 1; i <= MAX_ERR; ++i) fprintf(stderr, "%12ld %s\n", (long)cnt[i], err_msg[i]); kseq_destroy(ks); - gzclose(fp); + err_gzclose(fp); if (ks2) { kseq_destroy(ks2); - gzclose(fp2); + err_gzclose(fp2); } free(opt); + + err_fflush(stdout); + return 0; } diff --git a/utils.c b/utils.c index 20b09ee..00be7f0 100644 --- a/utils.c +++ b/utils.c @@ -24,6 +24,7 @@ */ /* Contact: Heng Li */ +#define FSYNC_ON_FLUSH #include #include @@ -31,6 +32,11 @@ #include #include #include +#ifdef FSYNC_ON_FLUSH +#include +#include +#include +#endif #include #include #include "utils.h" @@ -41,7 +47,7 @@ KSORT_INIT(128, pair64_t, pair64_lt) KSORT_INIT(64, uint64_t, ks_lt_generic) #include "kseq.h" -KSEQ_INIT2(, gzFile, gzread) +KSEQ_INIT2(, gzFile, err_gzread) /******************** * System utilities * @@ -53,8 +59,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode) if (strcmp(fn, "-") == 0) return (strstr(mode, "r"))? stdin : stdout; if ((fp = fopen(fn, mode)) == 0) { - fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); } return fp; } @@ -62,10 +67,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode) FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp) { if (freopen(fn, mode, fp) == 0) { - fprintf(stderr, "[%s] fail to open file '%s': ", func, fn); - perror(NULL); - fprintf(stderr, "Abort!\n"); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); } return fp; } @@ -73,16 +75,30 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) { gzFile fp; - if (strcmp(fn, "-") == 0) - return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + if (strcmp(fn, "-") == 0) { + fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + /* According to zlib.h, this is the only reason gzdopen can fail */ + if (!fp) err_fatal(func, "Out of memory"); + return fp; + } if ((fp = gzopen(fn, mode)) == 0) { - fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, errno ? strerror(errno) : "Out of memory"); } return fp; } void err_fatal(const char *header, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "[%s] ", header); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + va_end(args); + exit(EXIT_FAILURE); +} + +void err_fatal_core(const char *header, const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -93,7 +109,13 @@ void err_fatal(const char *header, const char *fmt, ...) abort(); } -void err_fatal_simple_core(const char *func, const char *msg) +void _err_fatal_simple(const char *func, const char *msg) +{ + fprintf(stderr, "[%s] %s\n", func, msg); + exit(EXIT_FAILURE); +} + +void _err_fatal_simple_core(const char *func, const char *msg) { fprintf(stderr, "[%s] %s Abort!\n", func, msg); abort(); @@ -103,7 +125,51 @@ size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t ret = fwrite(ptr, size, nmemb, stream); if (ret != nmemb) - err_fatal_simple_core("fwrite", strerror(errno)); + _err_fatal_simple("fwrite", strerror(errno)); + return ret; +} + +size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t ret = fread(ptr, size, nmemb, stream); + if (ret != nmemb) + { + _err_fatal_simple("fread", ferror(stream) ? strerror(errno) : "Unexpected end of file"); + } + return ret; +} + +int err_gzread(gzFile file, void *ptr, unsigned int len) +{ + int ret = gzread(file, ptr, len); + + if (ret < 0) + { + int errnum = 0; + const char *msg = gzerror(file, &errnum); + _err_fatal_simple("gzread", Z_ERRNO == errnum ? strerror(errno) : msg); + } + + return ret; +} + +int err_fseek(FILE *stream, long offset, int whence) +{ + int ret = fseek(stream, offset, whence); + if (0 != ret) + { + _err_fatal_simple("fseek", strerror(errno)); + } + return ret; +} + +long err_ftell(FILE *stream) +{ + long ret = ftell(stream); + if (-1 == ret) + { + _err_fatal_simple("ftell", strerror(errno)); + } return ret; } @@ -115,7 +181,7 @@ int err_printf(const char *format, ...) done = vfprintf(stdout, format, arg); int saveErrno = errno; va_end(arg); - if (done < 0) err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno)); + if (done < 0) _err_fatal_simple("vfprintf(stdout)", strerror(saveErrno)); return done; } @@ -127,21 +193,74 @@ int err_fprintf(FILE *stream, const char *format, ...) done = vfprintf(stream, format, arg); int saveErrno = errno; va_end(arg); - if (done < 0) err_fatal_simple_core("vfprintf", strerror(saveErrno)); + if (done < 0) _err_fatal_simple("vfprintf", strerror(saveErrno)); return done; } +int err_fputc(int c, FILE *stream) +{ + int ret = putc(c, stream); + if (EOF == ret) + { + _err_fatal_simple("fputc", strerror(errno)); + } + + return ret; +} + +int err_fputs(const char *s, FILE *stream) +{ + int ret = fputs(s, stream); + if (EOF == ret) + { + _err_fatal_simple("fputs", strerror(errno)); + } + + return ret; +} + int err_fflush(FILE *stream) { - int ret = fflush(stream); - if (ret != 0) err_fatal_simple_core("fflush", strerror(errno)); - return ret; + int ret = fflush(stream); + if (ret != 0) _err_fatal_simple("fflush", strerror(errno)); + +#ifdef FSYNC_ON_FLUSH + /* Calling fflush() ensures that all the data has made it to the + kernel buffers, but this may not be sufficient for remote filesystems + (e.g. NFS, lustre) as an error may still occur while the kernel + is copying the buffered data to the file server. To be sure of + catching these errors, we need to call fsync() on the file + descriptor, but only if it is a regular file. */ + { + struct stat sbuf; + if (0 != fstat(fileno(stream), &sbuf)) + _err_fatal_simple("fstat", strerror(errno)); + + if (S_ISREG(sbuf.st_mode)) + { + if (0 != fsync(fileno(stream))) + _err_fatal_simple("fsync", strerror(errno)); + } + } +#endif + return ret; } int err_fclose(FILE *stream) { int ret = fclose(stream); - if (ret != 0) err_fatal_simple_core("fclose", strerror(errno)); + if (ret != 0) _err_fatal_simple("fclose", strerror(errno)); + return ret; +} + +int err_gzclose(gzFile file) +{ + int ret = gzclose(file); + if (Z_OK != ret) + { + _err_fatal_simple("gzclose", Z_ERRNO == ret ? strerror(errno) : zError(ret)); + } + return ret; } diff --git a/utils.h b/utils.h index a3db251..5ef6ac4 100644 --- a/utils.h +++ b/utils.h @@ -39,11 +39,14 @@ #define ATTRIBUTE(list) #endif -#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg) +#define err_fatal_simple(msg) _err_fatal_simple(__func__, msg) +#define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg) + #define xopen(fn, mode) err_xopen_core(__func__, fn, mode) #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) -#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg) + +#define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) typedef struct { uint64_t x, y; @@ -56,18 +59,31 @@ typedef struct { size_t n, m; pair64_t *a; } pair64_v; extern "C" { #endif - void err_fatal(const char *header, const char *fmt, ...); - void err_fatal_simple_core(const char *func, const char *msg); + void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); + void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); + void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn)); + void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn)); FILE *err_xopen_core(const char *func, const char *fn, const char *mode); FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); + size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream); + + int err_gzread(gzFile file, void *ptr, unsigned int len); + int err_fseek(FILE *stream, long offset, int whence); +#define err_rewind(FP) err_fseek((FP), 0, SEEK_SET) + long err_ftell(FILE *stream); int err_fprintf(FILE *stream, const char *format, ...) ATTRIBUTE((format(printf, 2, 3))); int err_printf(const char *format, ...) ATTRIBUTE((format(printf, 1, 2))); + int err_fputc(int c, FILE *stream); +#define err_putchar(C) err_fputc((C), stdout) + int err_fputs(const char *s, FILE *stream); +#define err_puts(S) err_fputs((S), stdout) int err_fflush(FILE *stream); int err_fclose(FILE *stream); + int err_gzclose(gzFile file); double cputime(); double realtime();