Merge pull request #13 from daviesrob/master_fixes

Updates to master to add IO error checking
This commit is contained in:
Heng Li 2013-05-03 05:07:47 -07:00
commit 338ebfaae0
40 changed files with 847 additions and 235 deletions

2
.gitignore vendored
View File

@ -3,3 +3,5 @@ bwa
test test
test64 test64
.*.swp .*.swp
Makefile.bak
bwamem-lite

View File

@ -1,12 +1,13 @@
CC= gcc CC= gcc
CFLAGS= -g -Wall -O2 CFLAGS= -g -Wall -O2
WRAP_MALLOC= -DUSE_MALLOC_WRAPPERS
AR= ar AR= ar
DFLAGS= -DHAVE_PTHREAD DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
is.o bwtindex.o bwape.o kopen.o pemerge.o \ is.o bwtindex.o bwape.o kopen.o pemerge.o \
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
bwtsw2_chain.o fastmap.o bwtsw2_pair.o bwtsw2_chain.o fastmap.o bwtsw2_pair.o malloc_wrap.o
PROG= bwa PROG= bwa
INCLUDES= INCLUDES=
LIBS= -lm -lz -lpthread LIBS= -lm -lz -lpthread
@ -28,26 +29,48 @@ bwamem-lite:libbwa.a example.o
libbwa.a:$(LOBJS) libbwa.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS) $(AR) -csru $@ $(LOBJS)
ksw.o:ksw.h
kstring.o:kstring.h
utils.o:utils.h ksort.h kseq.h
bntseq.o:bntseq.h
bwt.o:bwt.h utils.h
bwa.o:bwa.h bwt.h bntseq.h
bwamem.o:ksw.h kbtree.h ksort.h kvec.h kstring.h utils.h bwamem.h
bwamem_pair.o:ksw.h kvec.h kstring.h utils.h bwamem.h
QSufSort.o:QSufSort.h
bwt_gen.o:QSufSort.h
fastmap.o:bwt.h bwamem.h
bwtaln.o:bwt.h bwtaln.h kseq.h
bwtgap.o:bwtgap.h bwtaln.h bwt.h
bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h
bwtsw2_main.o:bwtsw2.h
clean: clean:
rm -f gmon.out *.o a.out $(PROG) *~ *.a rm -f gmon.out *.o a.out $(PROG) *~ *.a
depend:
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c )
# DO NOT DELETE THIS LINE -- make depend depends on it.
QSufSort.o: QSufSort.h
bamlite.o: bamlite.h malloc_wrap.h
bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h malloc_wrap.h kseq.h
bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
bwamem.o: ksort.h utils.h kbtree.h
bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
bwamem_pair.o: utils.h ksw.h
bwape.o: bwtaln.h bwt.h kvec.h malloc_wrap.h bntseq.h utils.h bwase.h bwa.h
bwape.o: ksw.h khash.h
bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h malloc_wrap.h
bwase.o: bwa.h ksw.h
bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h malloc_wrap.h kseq.h
bwt.o: utils.h bwt.h kvec.h malloc_wrap.h
bwt_gen.o: QSufSort.h malloc_wrap.h
bwt_lite.o: bwt_lite.h malloc_wrap.h
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h malloc_wrap.h
bwtsw2_core.o: khash.h ksort.h
bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h bwa.h
bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h
bwtsw2_pair.o: malloc_wrap.h ksw.h
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h
is.o: malloc_wrap.h
kopen.o: malloc_wrap.h
kstring.o: kstring.h malloc_wrap.h
ksw.o: ksw.h malloc_wrap.h
main.o: utils.h
malloc_wrap.o: malloc_wrap.h
pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
utils.o: utils.h ksort.h malloc_wrap.h kseq.h

View File

@ -2,8 +2,13 @@
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <errno.h>
#include "bamlite.h" #include "bamlite.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
/********************* /*********************
* from bam_endian.c * * from bam_endian.c *
*********************/ *********************/
@ -62,11 +67,11 @@ void bam_header_destroy(bam_header_t *header)
if (header == 0) return; if (header == 0) return;
if (header->target_name) { if (header->target_name) {
for (i = 0; i < header->n_targets; ++i) for (i = 0; i < header->n_targets; ++i)
free(header->target_name[i]); if (header->target_name[i]) free(header->target_name[i]);
if (header->target_len) free(header->target_len);
free(header->target_name); free(header->target_name);
free(header->target_len);
} }
free(header->text); if (header->text) free(header->text);
free(header); free(header);
} }
@ -80,28 +85,33 @@ bam_header_t *bam_header_read(bamFile fp)
magic_len = bam_read(fp, buf, 4); magic_len = bam_read(fp, buf, 4);
if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
return 0; return NULL;
} }
header = bam_header_init(); header = bam_header_init();
// read plain text and the number of reference sequences // read plain text and the number of reference sequences
bam_read(fp, &header->l_text, 4); if (bam_read(fp, &header->l_text, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&header->l_text); if (bam_is_be) bam_swap_endian_4p(&header->l_text);
header->text = (char*)calloc(header->l_text + 1, 1); header->text = (char*)calloc(header->l_text + 1, 1);
bam_read(fp, header->text, header->l_text); if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail;
bam_read(fp, &header->n_targets, 4); if (bam_read(fp, &header->n_targets, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&header->n_targets); if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
// read reference sequence names and lengths // read reference sequence names and lengths
header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
header->target_len = (uint32_t*)calloc(header->n_targets, 4); header->target_len = (uint32_t*)calloc(header->n_targets, 4);
for (i = 0; i != header->n_targets; ++i) { for (i = 0; i != header->n_targets; ++i) {
bam_read(fp, &name_len, 4); if (bam_read(fp, &name_len, 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&name_len); if (bam_is_be) bam_swap_endian_4p(&name_len);
header->target_name[i] = (char*)calloc(name_len, 1); header->target_name[i] = (char*)calloc(name_len, 1);
bam_read(fp, header->target_name[i], name_len); if (bam_read(fp, header->target_name[i], name_len) != name_len) {
bam_read(fp, &header->target_len[i], 4); goto fail;
}
if (bam_read(fp, &header->target_len[i], 4) != 4) goto fail;
if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);
} }
return header; return header;
fail:
bam_header_destroy(header);
return NULL;
} }
static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
@ -153,3 +163,48 @@ int bam_read1(bamFile fp, bam1_t *b)
if (bam_is_be) swap_endian_data(c, b->data_len, b->data); if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
return 4 + block_len; return 4 + block_len;
} }
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
// Versions of gzopen, gzread and gzclose that print up error messages
gzFile bamlite_gzopen(const char *fn, const char *mode) {
gzFile fp;
if (strcmp(fn, "-") == 0) {
fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
if (!fp) {
fprintf(stderr, "Couldn't open %s : %s",
(strstr(mode, "r"))? "stdin" : "stdout",
strerror(errno));
}
return fp;
}
if ((fp = gzopen(fn, mode)) == 0) {
fprintf(stderr, "Couldn't open %s : %s\n", fn,
errno ? strerror(errno) : "Out of memory");
}
return fp;
}
int bamlite_gzread(gzFile file, void *ptr, unsigned int len) {
int ret = gzread(file, ptr, len);
if (ret < 0) {
int errnum = 0;
const char *msg = gzerror(file, &errnum);
fprintf(stderr, "gzread error: %s\n",
Z_ERRNO == errnum ? strerror(errno) : msg);
}
return ret;
}
int bamlite_gzclose(gzFile file) {
int ret = gzclose(file);
if (Z_OK != ret) {
fprintf(stderr, "gzclose error: %s\n",
Z_ERRNO == ret ? strerror(errno) : zError(ret));
}
return ret;
}
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */

View File

@ -4,11 +4,25 @@
#include <stdint.h> #include <stdint.h>
#include <zlib.h> #include <zlib.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define USE_VERBOSE_ZLIB_WRAPPERS
typedef gzFile bamFile; typedef gzFile bamFile;
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
/* These print error messages on failure */
# define bam_open(fn, mode) bamlite_gzopen(fn, mode)
# define bam_dopen(fd, mode) gzdopen(fd, mode)
# define bam_close(fp) bamlite_gzclose(fp)
# define bam_read(fp, buf, size) bamlite_gzread(fp, buf, size)
#else
# define bam_open(fn, mode) gzopen(fn, mode) # define bam_open(fn, mode) gzopen(fn, mode)
# define bam_dopen(fd, mode) gzdopen(fd, mode) # define bam_dopen(fd, mode) gzdopen(fd, mode)
# define bam_close(fp) gzclose(fp) # define bam_close(fp) gzclose(fp)
# define bam_read(fp, buf, size) gzread(fp, buf, size) # define bam_read(fp, buf, size) gzread(fp, buf, size)
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
typedef struct { typedef struct {
int32_t n_targets; int32_t n_targets;
@ -87,6 +101,12 @@ extern "C" {
bam_header_t *bam_header_read(bamFile fp); bam_header_t *bam_header_read(bamFile fp);
int bam_read1(bamFile fp, bam1_t *b); int bam_read1(bamFile fp, bam1_t *b);
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
gzFile bamlite_gzopen(const char *fn, const char *mode);
int bamlite_gzread(gzFile file, void *ptr, unsigned int len);
int bamlite_gzclose(gzFile file);
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -30,12 +30,17 @@
#include <string.h> #include <string.h>
#include <zlib.h> #include <zlib.h>
#include <unistd.h> #include <unistd.h>
#include <errno.h>
#include "bntseq.h" #include "bntseq.h"
#include "utils.h" #include "utils.h"
#include "kseq.h" #include "kseq.h"
KSEQ_DECLARE(gzFile) KSEQ_DECLARE(gzFile)
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
unsigned char nst_nt4_table[256] = { unsigned char nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@ -63,25 +68,27 @@ void bns_dump(const bntseq_t *bns, const char *prefix)
{ // dump .ann { // dump .ann
strcpy(str, prefix); strcat(str, ".ann"); strcpy(str, prefix); strcat(str, ".ann");
fp = xopen(str, "w"); fp = xopen(str, "w");
fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed); err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed);
for (i = 0; i != bns->n_seqs; ++i) { for (i = 0; i != bns->n_seqs; ++i) {
bntann1_t *p = bns->anns + i; bntann1_t *p = bns->anns + i;
fprintf(fp, "%d %s", p->gi, p->name); err_fprintf(fp, "%d %s", p->gi, p->name);
if (p->anno[0]) fprintf(fp, " %s\n", p->anno); if (p->anno[0]) err_fprintf(fp, " %s\n", p->anno);
else fprintf(fp, "\n"); else err_fprintf(fp, "\n");
fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs); err_fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs);
} }
fclose(fp); err_fflush(fp);
err_fclose(fp);
} }
{ // dump .amb { // dump .amb
strcpy(str, prefix); strcat(str, ".amb"); strcpy(str, prefix); strcat(str, ".amb");
fp = xopen(str, "w"); fp = xopen(str, "w");
fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes); err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes);
for (i = 0; i != bns->n_holes; ++i) { for (i = 0; i != bns->n_holes; ++i) {
bntamb1_t *p = bns->ambs + i; bntamb1_t *p = bns->ambs + i;
fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb); err_fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb);
} }
fclose(fp); err_fflush(fp);
err_fclose(fp);
} }
} }
@ -89,13 +96,16 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
{ {
char str[1024]; char str[1024];
FILE *fp; FILE *fp;
const char *fname;
bntseq_t *bns; bntseq_t *bns;
long long xx; long long xx;
int i; int i;
int scanres;
bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
{ // read .ann { // read .ann
fp = xopen(ann_filename, "r"); fp = xopen(fname = ann_filename, "r");
fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);
if (scanres != 3) goto badread;
bns->l_pac = xx; bns->l_pac = xx;
bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t)); bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));
for (i = 0; i < bns->n_seqs; ++i) { for (i = 0; i < bns->n_seqs; ++i) {
@ -103,39 +113,54 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
char *q = str; char *q = str;
int c; int c;
// read gi and sequence name // read gi and sequence name
fscanf(fp, "%u%s", &p->gi, str); scanres = fscanf(fp, "%u%s", &p->gi, str);
if (scanres != 2) goto badread;
p->name = strdup(str); p->name = strdup(str);
// read fasta comments // read fasta comments
while ((c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
while (c != '\n' && c != EOF) c = fgetc(fp);
if (c == EOF) {
scanres = EOF;
goto badread;
}
*q = 0; *q = 0;
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
else p->anno = strdup(""); else p->anno = strdup("");
// read the rest // read the rest
fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
if (scanres != 3) goto badread;
p->offset = xx; p->offset = xx;
} }
fclose(fp); err_fclose(fp);
} }
{ // read .amb { // read .amb
int64_t l_pac; int64_t l_pac;
int32_t n_seqs; int32_t n_seqs;
fp = xopen(amb_filename, "r"); fp = xopen(fname = amb_filename, "r");
fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes); scanres = fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes);
if (scanres != 3) goto badread;
l_pac = xx; l_pac = xx;
xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files."); xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files.");
bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0; bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0;
for (i = 0; i < bns->n_holes; ++i) { for (i = 0; i < bns->n_holes; ++i) {
bntamb1_t *p = bns->ambs + i; bntamb1_t *p = bns->ambs + i;
fscanf(fp, "%lld%d%s", &xx, &p->len, str); scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str);
if (scanres != 3) goto badread;
p->offset = xx; p->offset = xx;
p->amb = str[0]; p->amb = str[0];
} }
fclose(fp); err_fclose(fp);
} }
{ // open .pac { // open .pac
bns->fp_pac = xopen(pac_filename, "rb"); bns->fp_pac = xopen(pac_filename, "rb");
} }
return bns; return bns;
badread:
if (EOF == scanres) {
err_fatal(__func__, "Error reading %s : %s\n", fname, ferror(fp) ? strerror(errno) : "Unexpected end of file");
}
err_fatal(__func__, "Parse error reading %s\n", fname);
} }
bntseq_t *bns_restore(const char *prefix) bntseq_t *bns_restore(const char *prefix)
@ -152,7 +177,7 @@ void bns_destroy(bntseq_t *bns)
if (bns == 0) return; if (bns == 0) return;
else { else {
int i; int i;
if (bns->fp_pac) fclose(bns->fp_pac); if (bns->fp_pac) err_fclose(bns->fp_pac);
free(bns->ambs); free(bns->ambs);
for (i = 0; i < bns->n_seqs; ++i) { for (i = 0; i < bns->n_seqs; ++i) {
free(bns->anns[i].name); free(bns->anns[i].name);
@ -250,16 +275,17 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
ret = bns->l_pac; ret = bns->l_pac;
{ // finalize .pac file { // finalize .pac file
ubyte_t ct; ubyte_t ct;
fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp); err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
// the following codes make the pac file size always (l_pac/4+1+1) // the following codes make the pac file size always (l_pac/4+1+1)
if (bns->l_pac % 4 == 0) { if (bns->l_pac % 4 == 0) {
ct = 0; ct = 0;
fwrite(&ct, 1, 1, fp); err_fwrite(&ct, 1, 1, fp);
} }
ct = bns->l_pac % 4; ct = bns->l_pac % 4;
fwrite(&ct, 1, 1, fp); err_fwrite(&ct, 1, 1, fp);
// close .pac file // close .pac file
fclose(fp); err_fflush(fp);
err_fclose(fp);
} }
bns_dump(bns, prefix); bns_dump(bns, prefix);
bns_destroy(bns); bns_destroy(bns);
@ -283,7 +309,7 @@ int bwa_fa2pac(int argc, char *argv[])
} }
fp = xzopen(argv[optind], "r"); fp = xzopen(argv[optind], "r");
bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only); bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);
gzclose(fp); err_gzclose(fp);
return 0; return 0;
} }

8
bwa.c
View File

@ -7,6 +7,10 @@
#include "ksw.h" #include "ksw.h"
#include "utils.h" #include "utils.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int bwa_verbose = 3; int bwa_verbose = 3;
char bwa_rg_id[256]; char bwa_rg_id[256];
@ -258,8 +262,8 @@ bwaidx_t *bwa_idx_load(const char *hint, int which)
idx->bns = bns_restore(prefix); idx->bns = bns_restore(prefix);
if (which & BWA_IDX_PAC) { if (which & BWA_IDX_PAC) {
idx->pac = calloc(idx->bns->l_pac/4+1, 1); idx->pac = calloc(idx->bns->l_pac/4+1, 1);
fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
fclose(idx->bns->fp_pac); err_fclose(idx->bns->fp_pac);
idx->bns->fp_pac = 0; idx->bns->fp_pac = 0;
} }
} }

View File

@ -15,6 +15,10 @@
#include "ksort.h" #include "ksort.h"
#include "utils.h" #include "utils.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
/* Theory on probability and scoring *ungapped* alignment /* Theory on probability and scoring *ungapped* alignment
* *
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution * s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
@ -229,16 +233,16 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
int i, j; int i, j;
for (i = 0; i < chn->n; ++i) { for (i = 0; i < chn->n; ++i) {
mem_chain_t *p = &chn->a[i]; mem_chain_t *p = &chn->a[i];
printf("%d", p->n); err_printf("%d", p->n);
for (j = 0; j < p->n; ++j) { for (j = 0; j < p->n; ++j) {
bwtint_t pos; bwtint_t pos;
int is_rev, ref_id; int is_rev, ref_id;
pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev); pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
if (is_rev) pos -= p->seeds[j].len - 1; if (is_rev) pos -= p->seeds[j].len - 1;
bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id); bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); err_printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
} }
putchar('\n'); err_putchar('\n');
} }
} }

View File

@ -8,6 +8,11 @@
#include "utils.h" #include "utils.h"
#include "ksw.h" #include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define MIN_RATIO 0.8 #define MIN_RATIO 0.8
#define MIN_DIR_CNT 10 #define MIN_DIR_CNT 10
#define MIN_DIR_RATIO 0.05 #define MIN_DIR_RATIO 0.05

24
bwape.c
View File

@ -12,6 +12,10 @@
#include "bwa.h" #include "bwa.h"
#include "ksw.h" #include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef struct { typedef struct {
int n; int n;
bwtint_t *a; bwtint_t *a;
@ -280,11 +284,11 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
p[j] = seqs[j] + i; p[j] = seqs[j] + i;
p[j]->n_multi = 0; p[j]->n_multi = 0;
p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2); p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2);
fread(&n_aln, 4, 1, fp_sa[j]); err_fread_noeof(&n_aln, 4, 1, fp_sa[j]);
if (n_aln > kv_max(d->aln[j])) if (n_aln > kv_max(d->aln[j]))
kv_resize(bwt_aln1_t, d->aln[j], n_aln); kv_resize(bwt_aln1_t, d->aln[j], n_aln);
d->aln[j].n = n_aln; d->aln[j].n = n_aln;
fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); err_fread_noeof(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]);
kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j] kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j]
// generate SE alignment and mapping quality // generate SE alignment and mapping quality
bwa_aln2seq(n_aln, d->aln[j].a, p[j]); bwa_aln2seq(n_aln, d->aln[j].a, p[j]);
@ -498,8 +502,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
// load reference sequence // load reference sequence
if (_pacseq == 0) { if (_pacseq == 0) {
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
rewind(bns->fp_pac); err_rewind(bns->fp_pac);
fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
} else pacseq = (ubyte_t*)_pacseq; } else pacseq = (ubyte_t*)_pacseq;
if (!popt->is_sw || ii->avg < 0.0) return pacseq; if (!popt->is_sw || ii->avg < 0.0) return pacseq;
@ -644,18 +648,18 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
g_hash = kh_init(b128); g_hash = kh_init(b128);
last_ii.avg = -1.0; last_ii.avg = -1.0;
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]); ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
opt0 = opt; opt0 = opt;
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]); ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
{ // for Illumina alignment only { // for Illumina alignment only
if (popt->is_preload) { if (popt->is_preload) {
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1); pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
rewind(bns->fp_pac); err_rewind(bns->fp_pac);
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
} }
} }
@ -709,7 +713,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
bns_destroy(bns); bns_destroy(bns);
for (i = 0; i < 2; ++i) { for (i = 0; i < 2; ++i) {
bwa_seq_close(ks[i]); bwa_seq_close(ks[i]);
fclose(fp_sa[i]); err_fclose(fp_sa[i]);
} }
for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter) for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)
if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a); if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);
@ -765,7 +769,7 @@ int bwa_sai2sam_pe(int argc, char *argv[])
} }
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
fprintf(stderr, "[%s] fail to locate the index\n", __func__); fprintf(stderr, "[%s] fail to locate the index\n", __func__);
return 0; return 1;
} }
bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line); bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line);
free(prefix); free(popt); free(prefix); free(popt);

56
bwase.c
View File

@ -12,6 +12,10 @@
#include "bwa.h" #include "bwa.h"
#include "ksw.h" #include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int g_log_n[256]; int g_log_n[256];
void bwa_print_sam_PG(); void bwa_print_sam_PG();
@ -312,8 +316,8 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
if (!_pacseq) { if (!_pacseq) {
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
rewind(bns->fp_pac); err_rewind(bns->fp_pac);
fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
} else pacseq = _pacseq; } else pacseq = _pacseq;
for (i = 0; i != n_seqs; ++i) { for (i = 0; i != n_seqs; ++i) {
bwa_seq_t *s = seqs + i; bwa_seq_t *s = seqs + i;
@ -381,6 +385,26 @@ static int64_t pos_5(const bwa_seq_t *p)
return -1; return -1;
} }
void bwa_print_seq(FILE *stream, bwa_seq_t *seq) {
char buffer[4096];
const int bsz = sizeof(buffer);
int i, j, l;
if (seq->strand == 0) {
for (i = 0; i < seq->full_len; i += bsz) {
l = seq->full_len - i > bsz ? bsz : seq->full_len - i;
for (j = 0; j < l; j++) buffer[j] = "ACGTN"[seq->seq[i + j]];
err_fwrite(buffer, 1, l, stream);
}
} else {
for (i = seq->full_len - 1; i >= 0; i -= bsz) {
l = i + 1 > bsz ? bsz : i + 1;
for (j = 0; j < l; j++) buffer[j] = "TGCAN"[seq->seq[i - j]];
err_fwrite(buffer, 1, l, stream);
}
}
}
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2) void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
{ {
int j; int j;
@ -432,10 +456,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
else err_printf("\t*\t0\t0\t"); else err_printf("\t*\t0\t0\t");
// print sequence and quality // print sequence and quality
if (p->strand == 0) bwa_print_seq(stdout, p);
for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]); err_putchar('\t');
else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
putchar('\t');
if (p->qual) { if (p->qual) {
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
err_printf("%s", p->qual); err_printf("%s", p->qual);
@ -478,14 +500,16 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
} }
} }
} }
putchar('\n'); err_putchar('\n');
} else { // this read has no match } else { // this read has no match
ubyte_t *s = p->strand? p->rseq : p->seq; //ubyte_t *s = p->strand? p->rseq : p->seq;
int flag = p->extra_flag | SAM_FSU; int flag = p->extra_flag | SAM_FSU;
if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); //Why did this work differently to the version above??
putchar('\t'); //for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
bwa_print_seq(stdout, p);
err_putchar('\t');
if (p->qual) { if (p->qual) {
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
err_printf("%s", p->qual); err_printf("%s", p->qual);
@ -493,7 +517,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id); if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id);
if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc); if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len); if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
putchar('\n'); err_putchar('\n');
} }
} }
@ -522,7 +546,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
fp_sa = xopen(fn_sa, "r"); fp_sa = xopen(fn_sa, "r");
m_aln = 0; m_aln = 0;
fread(&opt, sizeof(gap_opt_t), 1, fp_sa); err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa);
bwa_print_sam_hdr(bns, rg_line); bwa_print_sam_hdr(bns, rg_line);
//bwa_print_sam_PG(); //bwa_print_sam_PG();
// set ks // set ks
@ -536,12 +560,12 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
for (i = 0; i < n_seqs; ++i) { for (i = 0; i < n_seqs; ++i) {
bwa_seq_t *p = seqs + i; bwa_seq_t *p = seqs + i;
int n_aln; int n_aln;
fread(&n_aln, 4, 1, fp_sa); err_fread_noeof(&n_aln, 4, 1, fp_sa);
if (n_aln > m_aln) { if (n_aln > m_aln) {
m_aln = n_aln; m_aln = n_aln;
aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
} }
fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); err_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
bwa_aln2seq_core(n_aln, aln, p, 1, n_occ); bwa_aln2seq_core(n_aln, aln, p, 1, n_occ);
} }
@ -565,7 +589,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
// destroy // destroy
bwa_seq_close(ks); bwa_seq_close(ks);
bns_destroy(bns); bns_destroy(bns);
fclose(fp_sa); err_fclose(fp_sa);
free(aln); free(aln);
} }
@ -591,7 +615,7 @@ int bwa_sai2sam_se(int argc, char *argv[])
} }
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
fprintf(stderr, "[%s] fail to locate the index\n", __func__); fprintf(stderr, "[%s] fail to locate the index\n", __func__);
return 0; return 1;
} }
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line); bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
free(prefix); free(prefix);

View File

@ -7,6 +7,10 @@
#include "kseq.h" #include "kseq.h"
KSEQ_DECLARE(gzFile) KSEQ_DECLARE(gzFile)
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
extern unsigned char nst_nt4_table[256]; extern unsigned char nst_nt4_table[256];
static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
@ -26,6 +30,7 @@ bwa_seqio_t *bwa_bam_open(const char *fn, int which)
bs->is_bam = 1; bs->is_bam = 1;
bs->which = which; bs->which = which;
bs->fp = bam_open(fn, "r"); bs->fp = bam_open(fn, "r");
if (0 == bs->fp) err_fatal_simple("Couldn't open bam file");
h = bam_header_read(bs->fp); h = bam_header_read(bs->fp);
bam_header_destroy(h); bam_header_destroy(h);
return bs; return bs;
@ -44,9 +49,10 @@ bwa_seqio_t *bwa_seq_open(const char *fn)
void bwa_seq_close(bwa_seqio_t *bs) void bwa_seq_close(bwa_seqio_t *bs)
{ {
if (bs == 0) return; if (bs == 0) return;
if (bs->is_bam) bam_close(bs->fp); if (bs->is_bam) {
else { if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file");
gzclose(bs->ks->f->f); } else {
err_gzclose(bs->ks->f->f);
kseq_destroy(bs->ks); kseq_destroy(bs->ks);
} }
free(bs); free(bs);
@ -90,11 +96,12 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
int n_seqs, l, i; int n_seqs, l, i;
long n_trimmed = 0, n_tot = 0; long n_trimmed = 0, n_tot = 0;
bam1_t *b; bam1_t *b;
int res;
b = bam_init1(); b = bam_init1();
n_seqs = 0; n_seqs = 0;
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
while (bam_read1(bs->fp, b) >= 0) { while ((res = bam_read1(bs->fp, b)) >= 0) {
uint8_t *s, *q; uint8_t *s, *q;
int go = 0; int go = 0;
if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
@ -126,6 +133,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
p->name = strdup((const char*)bam1_qname(b)); p->name = strdup((const char*)bam1_qname(b));
if (n_seqs == n_needed) break; if (n_seqs == n_needed) break;
} }
if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
*n = n_seqs; *n = n_seqs;
if (n_seqs && trim_qual >= 1) if (n_seqs && trim_qual >= 1)
fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
@ -184,7 +192,7 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri
p->qual = 0; p->qual = 0;
p->full_len = p->clip_len = p->len = l; p->full_len = p->clip_len = p->len = l;
n_tot += p->full_len; n_tot += p->full_len;
p->seq = (ubyte_t*)calloc(p->len, 1); p->seq = (ubyte_t*)calloc(p->full_len, 1);
for (i = 0; i != p->full_len; ++i) for (i = 0; i != p->full_len; ++i)
p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]]; p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
if (seq->qual.l) { // copy quality if (seq->qual.l) { // copy quality

54
bwt.c
View File

@ -34,6 +34,10 @@
#include "bwt.h" #include "bwt.h"
#include "kvec.h" #include "kvec.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
void bwt_gen_cnt_table(bwt_t *bwt) void bwt_gen_cnt_table(bwt_t *bwt)
{ {
int i, j; int i, j;
@ -67,10 +71,6 @@ void bwt_cal_sa(bwt_t *bwt, int intv)
bwt->sa_intv = intv; bwt->sa_intv = intv;
bwt->n_sa = (bwt->seq_len + intv) / intv; bwt->n_sa = (bwt->seq_len + intv) / intv;
bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
if (bwt->sa == 0) {
fprintf(stderr, "[%s] Fail to allocate %.3fMB memory. Abort!\n", __func__, bwt->n_sa * sizeof(bwtint_t) / 1024.0/1024.0);
abort();
}
// calculate SA value // calculate SA value
isa = 0; sa = bwt->seq_len; isa = 0; sa = bwt->seq_len;
for (i = 0; i < bwt->seq_len; ++i) { for (i = 0; i < bwt->seq_len; ++i) {
@ -354,22 +354,24 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
{ {
FILE *fp; FILE *fp;
fp = xopen(fn, "wb"); fp = xopen(fn, "wb");
fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
fwrite(bwt->bwt, 4, bwt->bwt_size, fp); err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
fclose(fp); err_fflush(fp);
err_fclose(fp);
} }
void bwt_dump_sa(const char *fn, const bwt_t *bwt) void bwt_dump_sa(const char *fn, const bwt_t *bwt)
{ {
FILE *fp; FILE *fp;
fp = xopen(fn, "wb"); fp = xopen(fn, "wb");
fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
fclose(fp); err_fflush(fp);
err_fclose(fp);
} }
static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a) static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
@ -378,7 +380,7 @@ static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
bwtint_t offset = 0; bwtint_t offset = 0;
while (size) { while (size) {
int x = bufsize < size? bufsize : size; int x = bufsize < size? bufsize : size;
if ((x = fread(a + offset, 1, x, fp)) == 0) break; if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;
size -= x; offset += x; size -= x; offset += x;
} }
return offset; return offset;
@ -391,11 +393,11 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
bwtint_t primary; bwtint_t primary;
fp = xopen(fn, "rb"); fp = xopen(fn, "rb");
fread(&primary, sizeof(bwtint_t), 1, fp); err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same."); xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
fread(skipped, sizeof(bwtint_t), 4, fp); // skip err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip
fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
fread(&primary, sizeof(bwtint_t), 1, fp); err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
@ -403,7 +405,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
bwt->sa[0] = -1; bwt->sa[0] = -1;
fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1); fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
fclose(fp); err_fclose(fp);
} }
bwt_t *bwt_restore_bwt(const char *fn) bwt_t *bwt_restore_bwt(const char *fn)
@ -413,15 +415,15 @@ bwt_t *bwt_restore_bwt(const char *fn)
bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
fp = xopen(fn, "rb"); fp = xopen(fn, "rb");
fseek(fp, 0, SEEK_END); err_fseek(fp, 0, SEEK_END);
bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2; bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
fseek(fp, 0, SEEK_SET); err_fseek(fp, 0, SEEK_SET);
fread(&bwt->primary, sizeof(bwtint_t), 1, fp); err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);
fread(bwt->L2+1, sizeof(bwtint_t), 4, fp); err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);
fread_fix(fp, bwt->bwt_size<<2, bwt->bwt); fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
bwt->seq_len = bwt->L2[4]; bwt->seq_len = bwt->L2[4];
fclose(fp); err_fclose(fp);
bwt_gen_cnt_table(bwt); bwt_gen_cnt_table(bwt);
return bwt; return bwt;

View File

@ -27,8 +27,13 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include <stdint.h> #include <stdint.h>
#include <errno.h>
#include "QSufSort.h" #include "QSufSort.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef uint64_t bgint_t; typedef uint64_t bgint_t;
typedef int64_t sbgint_t; typedef int64_t sbgint_t;
@ -1443,13 +1448,29 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
packedFile = (FILE*)fopen(inputFileName, "rb"); packedFile = (FILE*)fopen(inputFileName, "rb");
if (packedFile == NULL) { if (packedFile == NULL) {
fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n"); fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open %s : %s\n",
inputFileName, strerror(errno));
exit(1); exit(1);
} }
fseek(packedFile, -1, SEEK_END); if (fseek(packedFile, -1, SEEK_END) != 0) {
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
inputFileName, strerror(errno));
exit(1);
}
packedFileLen = ftell(packedFile); packedFileLen = ftell(packedFile);
fread(&lastByteLength, sizeof(unsigned char), 1, packedFile); if (packedFileLen == -1) {
fprintf(stderr, "BWTIncConstructFromPacked() : Can't ftell on %s : %s\n",
inputFileName, strerror(errno));
exit(1);
}
if (fread(&lastByteLength, sizeof(unsigned char), 1, packedFile) != 1) {
fprintf(stderr,
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
inputFileName,
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
exit(1);
}
totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength); totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize); bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
@ -1463,10 +1484,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
} }
textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte
fseek(packedFile, -2, SEEK_CUR); if (fseek(packedFile, -((long)textSizeInByte + 2), SEEK_CUR) != 0) {
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile); inputFileName, strerror(errno));
fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR); exit(1);
}
if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile) != textSizeInByte + 1) {
fprintf(stderr,
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
inputFileName,
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
exit(1);
}
if (fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR) != 0) {
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
inputFileName, strerror(errno));
exit(1);
}
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
BWTIncConstruct(bwtInc, textToLoad); BWTIncConstruct(bwtInc, textToLoad);
@ -1479,9 +1513,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
textToLoad = totalTextLength - processedTextLength; textToLoad = totalTextLength - processedTextLength;
} }
textSizeInByte = textToLoad / CHAR_PER_BYTE; textSizeInByte = textToLoad / CHAR_PER_BYTE;
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile); fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR); inputFileName, strerror(errno));
exit(1);
}
if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile) != textSizeInByte) {
fprintf(stderr,
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
inputFileName,
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
exit(1);
}
if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
inputFileName, strerror(errno));
exit(1);
}
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
BWTIncConstruct(bwtInc, textToLoad); BWTIncConstruct(bwtInc, textToLoad);
processedTextLength += textToLoad; processedTextLength += textToLoad;
@ -1526,15 +1574,28 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
bwtFile = (FILE*)fopen(bwtFileName, "wb"); bwtFile = (FILE*)fopen(bwtFileName, "wb");
if (bwtFile == NULL) { if (bwtFile == NULL) {
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n"); fprintf(stderr,
"BWTSaveBwtCodeAndOcc(): Cannot open %s for writing: %s\n",
bwtFileName, strerror(errno));
exit(1); exit(1);
} }
fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile);
fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile);
bwtLength = BWTFileSizeInWord(bwt->textLength); bwtLength = BWTFileSizeInWord(bwt->textLength);
fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile);
fclose(bwtFile); if (fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile) != 1
|| fwrite(bwt->cumulativeFreq + 1,
sizeof(bgint_t), ALPHABET_SIZE, bwtFile) != ALPHABET_SIZE
|| fwrite(bwt->bwtCode,
sizeof(unsigned int), bwtLength, bwtFile) != bwtLength) {
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error writing to %s : %s\n",
bwtFileName, strerror(errno));
exit(1);
}
if (fclose(bwtFile) != 0) {
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error on closing %s : %s\n",
bwtFileName, strerror(errno));
exit(1);
}
} }
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)

View File

@ -3,6 +3,10 @@
#include <stdio.h> #include <stdio.h>
#include "bwt_lite.h" #include "bwt_lite.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int is_sa(const uint8_t *T, uint32_t *SA, int n); int is_sa(const uint8_t *T, uint32_t *SA, int n);
int is_bwt(uint8_t *T, int n); int is_bwt(uint8_t *T, int n);

View File

@ -17,6 +17,10 @@
#include <pthread.h> #include <pthread.h>
#endif #endif
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
gap_opt_t *gap_init_opt() gap_opt_t *gap_init_opt()
{ {
gap_opt_t *o; gap_opt_t *o;
@ -306,7 +310,7 @@ int bwa_aln(int argc, char *argv[])
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
fprintf(stderr, "[%s] fail to locate the index\n", __func__); fprintf(stderr, "[%s] fail to locate the index\n", __func__);
free(opt); free(opt);
return 0; return 1;
} }
bwa_aln_core(prefix, argv[optind+1], opt); bwa_aln_core(prefix, argv[optind+1], opt);
free(opt); free(prefix); free(opt); free(prefix);

View File

@ -4,6 +4,10 @@
#include "bwtgap.h" #include "bwtgap.h"
#include "bwtaln.h" #include "bwtaln.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define STATE_M 0 #define STATE_M 0
#define STATE_I 1 #define STATE_I 1
#define STATE_D 2 #define STATE_D 2

View File

@ -39,6 +39,11 @@
#include "divsufsort.h" #include "divsufsort.h"
#endif #endif
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int is_bwt(ubyte_t *T, int n); int is_bwt(ubyte_t *T, int n);
int64_t bwa_seq_len(const char *fn_pac) int64_t bwa_seq_len(const char *fn_pac)
@ -47,10 +52,10 @@ int64_t bwa_seq_len(const char *fn_pac)
int64_t pac_len; int64_t pac_len;
ubyte_t c; ubyte_t c;
fp = xopen(fn_pac, "rb"); fp = xopen(fn_pac, "rb");
fseek(fp, -1, SEEK_END); err_fseek(fp, -1, SEEK_END);
pac_len = ftell(fp); pac_len = err_ftell(fp);
fread(&c, 1, 1, fp); err_fread_noeof(&c, 1, 1, fp);
fclose(fp); err_fclose(fp);
return (pac_len - 1) * 4 + (int)c; return (pac_len - 1) * 4 + (int)c;
} }
@ -70,8 +75,8 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
// prepare sequence // prepare sequence
pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1); pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
buf2 = (ubyte_t*)calloc(pac_size, 1); buf2 = (ubyte_t*)calloc(pac_size, 1);
fread(buf2, 1, pac_size, fp); err_fread_noeof(buf2, 1, pac_size, fp);
fclose(fp); err_fclose(fp);
memset(bwt->L2, 0, 5 * 4); memset(bwt->L2, 0, 5 * 4);
buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1); buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
for (i = 0; i < bwt->seq_len; ++i) { for (i = 0; i < bwt->seq_len; ++i) {
@ -229,7 +234,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
fprintf(stderr, "[bwa_index] Pack FASTA... "); fprintf(stderr, "[bwa_index] Pack FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 0); l_pac = bns_fasta2bntseq(fp, prefix, 0);
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp); err_gzclose(fp);
} }
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
{ {
@ -263,7 +268,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 1); l_pac = bns_fasta2bntseq(fp, prefix, 1);
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp); err_gzclose(fp);
} }
{ {
bwt_t *bwt; bwt_t *bwt;

View File

@ -22,6 +22,11 @@ KSEQ_DECLARE(gzFile)
#define __left_lt(a, b) ((a).end > (b).end) #define __left_lt(a, b) ((a).end > (b).end)
KSORT_INIT(hit, bsw2hit_t, __left_lt) KSORT_INIT(hit, bsw2hit_t, __left_lt)
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
extern unsigned char nst_nt4_table[256]; extern unsigned char nst_nt4_table[256];
unsigned char nt_comp_table[256] = { unsigned char nt_comp_table[256] = {
@ -710,12 +715,12 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
// print and reset // print and reset
for (i = 0; i < _seq->n; ++i) { for (i = 0; i < _seq->n; ++i) {
bsw2seq1_t *p = _seq->seq + i; bsw2seq1_t *p = _seq->seq + i;
if (p->sam) printf("%s", p->sam); if (p->sam) err_printf("%s", p->sam);
free(p->name); free(p->seq); free(p->qual); free(p->sam); free(p->name); free(p->seq); free(p->qual); free(p->sam);
p->tid = -1; p->l = 0; p->tid = -1; p->l = 0;
p->name = p->seq = p->qual = p->sam = 0; p->name = p->seq = p->qual = p->sam = 0;
} }
fflush(stdout); err_fflush(stdout);
_seq->n = 0; _seq->n = 0;
} }
@ -729,13 +734,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
bseq1_t *bseq; bseq1_t *bseq;
pac = calloc(bns->l_pac/4+1, 1); pac = calloc(bns->l_pac/4+1, 1);
if (pac == 0) {
fprintf(stderr, "[bsw2_aln] insufficient memory!\n");
return;
}
for (l = 0; l < bns->n_seqs; ++l) for (l = 0; l < bns->n_seqs; ++l)
printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
fp = xzopen(fn, "r"); fp = xzopen(fn, "r");
ks = kseq_init(fp); ks = kseq_init(fp);
_seq = calloc(1, sizeof(bsw2seq_t)); _seq = calloc(1, sizeof(bsw2seq_t));
@ -767,9 +768,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
free(pac); free(pac);
free(_seq->seq); free(_seq); free(_seq->seq); free(_seq);
kseq_destroy(ks); kseq_destroy(ks);
gzclose(fp); err_gzclose(fp);
if (fn2) { if (fn2) {
kseq_destroy(ks2); kseq_destroy(ks2);
gzclose(fp2); err_gzclose(fp2);
} }
} }

View File

@ -1,6 +1,10 @@
#include <stdio.h> #include <stdio.h>
#include "bwtsw2.h" #include "bwtsw2.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef struct { typedef struct {
uint32_t tbeg, tend; uint32_t tbeg, tend;
int qbeg, qend; int qbeg, qend;

View File

@ -8,6 +8,10 @@
#include "bwt.h" #include "bwt.h"
#include "kvec.h" #include "kvec.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef struct { typedef struct {
bwtint_t k, l; bwtint_t k, l;
} qintv_t; } qintv_t;

View File

@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[])
case 'S': opt->skip_sw = 1; break; case 'S': opt->skip_sw = 1; break;
case 'C': opt->cpy_cmt = 1; break; case 'C': opt->cpy_cmt = 1; break;
case 'G': opt->max_chain_gap = atoi(optarg); break; case 'G': opt->max_chain_gap = atoi(optarg); break;
default: return 1;
} }
} }
opt->qr = opt->q + opt->r; opt->qr = opt->q + opt->r;
@ -79,7 +80,7 @@ int bwa_bwtsw2(int argc, char *argv[])
opt->t *= opt->a; opt->t *= opt->a;
opt->coef *= opt->a; opt->coef *= opt->a;
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 0; if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
bwa_idx_destroy(idx); bwa_idx_destroy(idx);
free(opt); free(opt);

View File

@ -2,13 +2,17 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "utils.h"
#include "bwt.h" #include "bwt.h"
#include "bntseq.h" #include "bntseq.h"
#include "bwtsw2.h" #include "bwtsw2.h"
#include "kstring.h" #include "kstring.h"
#include "utils.h"
#include "ksw.h" #include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define MIN_RATIO 0.8 #define MIN_RATIO 0.8
#define OUTLIER_BOUND 2.0 #define OUTLIER_BOUND 2.0
#define MAX_STDDEV 4.0 #define MAX_STDDEV 4.0

View File

@ -1,11 +1,16 @@
#include <stdio.h> #include <stdio.h>
#include <zlib.h> #include <zlib.h>
#include <string.h> #include <string.h>
#include <errno.h>
#include <assert.h> #include <assert.h>
#include "bwamem.h" #include "bwamem.h"
#include "kseq.h" // for the FASTA/Q parser #include "kseq.h" // for the FASTA/Q parser
KSEQ_DECLARE(gzFile) KSEQ_DECLARE(gzFile)
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
bwaidx_t *idx; bwaidx_t *idx;
@ -19,9 +24,17 @@ int main(int argc, char *argv[])
} }
idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index
assert(idx); if (NULL == idx) {
fprintf(stderr, "Index load failed.\n");
exit(EXIT_FAILURE);
}
fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r"); fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
assert(fp); if (NULL == fp) {
fprintf(stderr, "Couldn't open %s : %s\n",
strcmp(argv[2], "-") ? argv[2] : "stdin",
errno ? strerror(errno) : "Out of memory");
exit(EXIT_FAILURE);
}
ks = kseq_init(fp); // initialize the FASTA/Q parser ks = kseq_init(fp); // initialize the FASTA/Q parser
opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values
@ -34,10 +47,10 @@ int main(int argc, char *argv[])
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
// print alignment // print alignment
printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq); err_printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq);
for (k = 0; k < a.n_cigar; ++k) // print CIGAR for (k = 0; k < a.n_cigar; ++k) // print CIGAR
printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]); err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
printf("\t%d\n", a.NM); // print edit distance err_printf("\t%d\n", a.NM); // print edit distance
free(a.cigar); // don't forget to deallocate CIGAR free(a.cigar); // don't forget to deallocate CIGAR
} }
free(ar.a); // and deallocate the hit list free(ar.a); // and deallocate the hit list
@ -45,7 +58,7 @@ int main(int argc, char *argv[])
free(opt); free(opt);
kseq_destroy(ks); kseq_destroy(ks);
gzclose(fp); err_gzclose(fp);
bwa_idx_destroy(idx); bwa_idx_destroy(idx);
return 0; return 0;
} }

View File

@ -7,6 +7,7 @@
#include "kvec.h" #include "kvec.h"
#include "utils.h" #include "utils.h"
#include "kseq.h" #include "kseq.h"
#include "utils.h"
KSEQ_DECLARE(gzFile) KSEQ_DECLARE(gzFile)
extern unsigned char nst_nt4_table[256]; extern unsigned char nst_nt4_table[256];
@ -51,6 +52,7 @@ int main_mem(int argc, char *argv[])
else if (c == 'R') { else if (c == 'R') {
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
} else if (c == 's') opt->split_width = atoi(optarg); } else if (c == 's') opt->split_width = atoi(optarg);
else return 1;
} }
if (opt->n_threads < 1) opt->n_threads = 1; if (opt->n_threads < 1) opt->n_threads = 1;
if (optind + 1 >= argc) { if (optind + 1 >= argc) {
@ -130,7 +132,7 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size); fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0); mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
fputs(seqs[i].sam, stdout); err_fputs(seqs[i].sam, stdout);
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
} }
free(seqs); free(seqs);
@ -139,10 +141,10 @@ int main_mem(int argc, char *argv[])
free(opt); free(opt);
bwa_idx_destroy(idx); bwa_idx_destroy(idx);
kseq_destroy(ks); kseq_destroy(ks);
gzclose(fp); kclose(ko); err_gzclose(fp); kclose(ko);
if (ks2) { if (ks2) {
kseq_destroy(ks2); kseq_destroy(ks2);
gzclose(fp2); kclose(ko2); err_gzclose(fp2); kclose(ko2);
} }
return 0; return 0;
} }
@ -163,6 +165,7 @@ int main_fastmap(int argc, char *argv[])
case 'p': print_seq = 1; break; case 'p': print_seq = 1; break;
case 'w': min_iwidth = atoi(optarg); break; case 'w': min_iwidth = atoi(optarg); break;
case 'l': min_len = atoi(optarg); break; case 'l': min_len = atoi(optarg); break;
default: return 1;
} }
} }
if (optind + 1 >= argc) { if (optind + 1 >= argc) {
@ -170,16 +173,16 @@ int main_fastmap(int argc, char *argv[])
return 1; return 1;
} }
fp = gzopen(argv[optind + 1], "r"); fp = xzopen(argv[optind + 1], "r");
seq = kseq_init(fp); seq = kseq_init(fp);
idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS); if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
itr = smem_itr_init(idx->bwt); itr = smem_itr_init(idx->bwt);
while (kseq_read(seq) >= 0) { while (kseq_read(seq) >= 0) {
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l); err_printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
if (print_seq) { if (print_seq) {
putchar('\t'); err_putchar('\t');
puts(seq->seq.s); err_puts(seq->seq.s);
} else putchar('\n'); } else err_putchar('\n');
for (i = 0; i < seq->seq.l; ++i) for (i = 0; i < seq->seq.l; ++i)
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s); smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
@ -187,7 +190,7 @@ int main_fastmap(int argc, char *argv[])
for (i = 0; i < a->n; ++i) { for (i = 0; i < a->n; ++i) {
bwtintv_t *p = &a->a[i]; bwtintv_t *p = &a->a[i];
if ((uint32_t)p->info - (p->info>>32) < min_len) continue; if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
if (p->x[2] <= min_iwidth) { if (p->x[2] <= min_iwidth) {
for (k = 0; k < p->x[2]; ++k) { for (k = 0; k < p->x[2]; ++k) {
bwtint_t pos; bwtint_t pos;
@ -196,18 +199,18 @@ int main_fastmap(int argc, char *argv[])
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev); pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
if (is_rev) pos -= len - 1; if (is_rev) pos -= len - 1;
bns_cnt_ambi(idx->bns, pos, len, &ref_id); bns_cnt_ambi(idx->bns, pos, len, &ref_id);
printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1); err_printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
} }
} else fputs("\t*", stdout); } else err_puts("\t*");
putchar('\n'); err_putchar('\n');
} }
} }
puts("//"); err_puts("//");
} }
smem_itr_destroy(itr); smem_itr_destroy(itr);
bwa_idx_destroy(idx); bwa_idx_destroy(idx);
kseq_destroy(seq); kseq_destroy(seq);
gzclose(fp); err_gzclose(fp);
return 0; return 0;
} }

7
is.c
View File

@ -26,6 +26,10 @@
#include <stdlib.h> #include <stdlib.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef unsigned char ubyte_t; typedef unsigned char ubyte_t;
#define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i]) #define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i])
@ -205,7 +209,8 @@ int is_bwt(ubyte_t *T, int n)
{ {
int *SA, i, primary = 0; int *SA, i, primary = 0;
SA = (int*)calloc(n+1, sizeof(int)); SA = (int*)calloc(n+1, sizeof(int));
is_sa(T, SA, n);
if (is_sa(T, SA, n)) return -1;
for (i = 0; i <= n; ++i) { for (i = 0; i <= n; ++i) {
if (SA[i] == 0) primary = i; if (SA[i] == 0) primary = i;

View File

@ -32,6 +32,10 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef struct { typedef struct {
int32_t is_internal:1, n:31; int32_t is_internal:1, n:31;
} kbnode_t; } kbnode_t;

View File

@ -116,6 +116,10 @@ int main() {
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
/* compipler specific configuration */ /* compipler specific configuration */
#if UINT_MAX == 0xffffffffu #if UINT_MAX == 0xffffffffu

45
kopen.c
View File

@ -14,6 +14,10 @@
#include <sys/socket.h> #include <sys/socket.h>
#endif #endif
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#ifdef _WIN32 #ifdef _WIN32
#define _KO_NO_NET #define _KO_NO_NET
#endif #endif
@ -54,10 +58,26 @@ static int socket_connect(const char *host, const char *port)
#undef __err_connect #undef __err_connect
} }
static int write_bytes(int fd, const char *buf, size_t len)
{
ssize_t bytes;
do {
bytes = write(fd, buf, len);
if (bytes >= 0) {
len -= bytes;
} else if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) {
return -1;
}
} while (len > 0);
return 0;
}
static int http_open(const char *fn) static int http_open(const char *fn)
{ {
char *p, *proxy, *q, *http_host, *host, *port, *path, *buf; char *p, *proxy, *q, *http_host, *host, *port, *path, *buf;
int fd, ret, l; int fd, ret, l;
ssize_t bytes = 0, bufsz = 0x10000;
/* parse URL; adapted from khttp_parse_url() in knetfile.c */ /* parse URL; adapted from khttp_parse_url() in knetfile.c */
if (strstr(fn, "http://") != fn) return 0; if (strstr(fn, "http://") != fn) return 0;
@ -87,26 +107,35 @@ static int http_open(const char *fn)
/* connect; adapted from khttp_connect() in knetfile.c */ /* connect; adapted from khttp_connect() in knetfile.c */
l = 0; l = 0;
fd = socket_connect(host, port); fd = socket_connect(host, port);
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough. buf = calloc(bufsz, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", path, http_host); l += snprintf(buf + l, bufsz, "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
l += sprintf(buf + l, "\r\n"); path, http_host);
write(fd, buf, l); if (write_bytes(fd, buf, l) != 0) {
close(fd);
fd = -1;
goto out;
}
l = 0; l = 0;
while (read(fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency retry:
while (l < bufsz && (bytes = read(fd, buf + l, 1)) > 0) { // read HTTP header; FIXME: bad efficiency
if (buf[l] == '\n' && l >= 3) if (buf[l] == '\n' && l >= 3)
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break; if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
++l; ++l;
} }
if (bytes < 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) goto retry;
buf[l] = 0; buf[l] = 0;
if (l < 14) { // prematured header if (bytes < 0 || l < 14) { // prematured header
close(fd); close(fd);
fd = -1; fd = -1;
goto out;
} }
ret = strtol(buf + 8, &p, 0); // HTTP return code ret = strtol(buf + 8, &p, 0); // HTTP return code
if (ret != 200) { if (ret != 200) {
close(fd); close(fd);
fd = -1; fd = -1;
} }
out:
free(buf); free(http_host); free(host); free(port); free(path); free(buf); free(http_host); free(host); free(port); free(path);
return fd; return fd;
} }
@ -143,7 +172,7 @@ static int kftp_get_response(ftpaux_t *aux)
static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get) static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get)
{ {
if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
write(aux->ctrl_fd, cmd, strlen(cmd)); if (write_bytes(aux->ctrl_fd, cmd, strlen(cmd)) != 0) return -1;
return is_get? kftp_get_response(aux) : 0; return is_get? kftp_get_response(aux) : 0;
} }
@ -262,7 +291,7 @@ void *kopen(const char *fn, int *_fd)
if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':') if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':')
break; break;
need_shell = (*q != 0); need_shell = (*q != 0);
pipe(pfd); if (pipe(pfd) != 0) return 0;
pid = vfork(); pid = vfork();
if (pid == -1) { /* vfork() error */ if (pid == -1) { /* vfork() error */
close(pfd[0]); close(pfd[1]); close(pfd[0]); close(pfd[1]);

4
kseq.h
View File

@ -32,6 +32,10 @@
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' ' #define KS_SEP_TAB 1 // isspace() && !' '
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)

View File

@ -58,6 +58,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
typedef struct { typedef struct {
void *left, *right; void *left, *right;
int depth; int depth;

View File

@ -2,6 +2,10 @@
#include <stdio.h> #include <stdio.h>
#include "kstring.h" #include "kstring.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int ksprintf(kstring_t *s, const char *fmt, ...) int ksprintf(kstring_t *s, const char *fmt, ...)
{ {
va_list ap; va_list ap;

View File

@ -4,6 +4,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#ifndef kroundup32 #ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif #endif

18
ksw.c
View File

@ -28,6 +28,10 @@
#include <emmintrin.h> #include <emmintrin.h>
#include "ksw.h" #include "ksw.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#ifdef __GNUC__ #ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x),1) #define LIKELY(x) __builtin_expect((x),1)
#define UNLIKELY(x) __builtin_expect((x),0) #define UNLIKELY(x) __builtin_expect((x),0)
@ -553,7 +557,7 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
#include <stdio.h> #include <stdio.h>
#include <zlib.h> #include <zlib.h>
#include "kseq.h" #include "kseq.h"
KSEQ_INIT(gzFile, gzread) KSEQ_INIT(gzFile, err_gzread)
unsigned char seq_nt4_table[256] = { unsigned char seq_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@ -609,8 +613,8 @@ int main(int argc, char *argv[])
} }
for (j = 0; j < 5; ++j) mat[k++] = 0; for (j = 0; j < 5; ++j) mat[k++] = 0;
// open file // open file
fpt = gzopen(argv[optind], "r"); kst = kseq_init(fpt); fpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt);
fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq); fpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
// all-pair alignment // all-pair alignment
while (kseq_read(ksq) > 0) { while (kseq_read(ksq) > 0) {
kswq_t *q[2] = {0, 0}; kswq_t *q[2] = {0, 0};
@ -629,18 +633,18 @@ int main(int argc, char *argv[])
for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]); r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
if (r.score >= minsc) if (r.score >= minsc)
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2); err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
if (rseq) { if (rseq) {
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]); r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
if (r.score >= minsc) if (r.score >= minsc)
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
} }
} }
free(q[0]); free(q[1]); free(q[0]); free(q[1]);
} }
free(rseq); free(rseq);
kseq_destroy(kst); gzclose(fpt); kseq_destroy(kst); err_gzclose(fpt);
kseq_destroy(ksq); gzclose(fpq); kseq_destroy(ksq); err_gzclose(fpq);
return 0; return 0;
} }
#endif #endif

4
kvec.h
View File

@ -50,6 +50,10 @@ int main() {
#include <stdlib.h> #include <stdlib.h>
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#define kvec_t(type) struct { size_t n, m; type *a; } #define kvec_t(type) struct { size_t n, m; type *a; }

2
main.c
View File

@ -55,7 +55,7 @@ static int usage()
void bwa_print_sam_PG() void bwa_print_sam_PG()
{ {
printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION); err_printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION);
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[])

57
malloc_wrap.c 100644
View File

@ -0,0 +1,57 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#ifdef USE_MALLOC_WRAPPERS
/* Don't wrap ourselves */
# undef USE_MALLOC_WRAPPERS
#endif
#include "malloc_wrap.h"
void *wrap_calloc(size_t nmemb, size_t size,
const char *file, unsigned int line, const char *func) {
void *p = calloc(nmemb, size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
func, nmemb * size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
return p;
}
void *wrap_malloc(size_t size,
const char *file, unsigned int line, const char *func) {
void *p = malloc(size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
func, size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
return p;
}
void *wrap_realloc(void *ptr, size_t size,
const char *file, unsigned int line, const char *func) {
void *p = realloc(ptr, size);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
func, size, file, line, strerror(errno));
exit(EXIT_FAILURE);
}
return p;
}
char *wrap_strdup(const char *s,
const char *file, unsigned int line, const char *func) {
char *p = strdup(s);
if (NULL == p) {
fprintf(stderr,
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
func, strlen(s), file, line, strerror(errno));
exit(EXIT_FAILURE);
}
return p;
}

47
malloc_wrap.h 100644
View File

@ -0,0 +1,47 @@
#ifndef MALLOC_WRAP_H
#define MALLOC_WRAP_H
#include <stdlib.h> /* Avoid breaking the usual definitions */
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
void *wrap_calloc(size_t nmemb, size_t size,
const char *file, unsigned int line, const char *func);
void *wrap_malloc(size_t size,
const char *file, unsigned int line, const char *func);
void *wrap_realloc(void *ptr, size_t size,
const char *file, unsigned int line, const char *func);
char *wrap_strdup(const char *s,
const char *file, unsigned int line, const char *func);
#ifdef __cplusplus
}
#endif
#ifdef USE_MALLOC_WRAPPERS
# ifdef calloc
# undef calloc
# endif
# define calloc(n, s) wrap_calloc( (n), (s), __FILE__, __LINE__, __func__)
# ifdef malloc
# undef malloc
# endif
# define malloc(s) wrap_malloc( (s), __FILE__, __LINE__, __func__)
# ifdef realloc
# undef realloc
# endif
# define realloc(p, s) wrap_realloc((p), (s), __FILE__, __LINE__, __func__)
# ifdef strdup
# undef strdup
# endif
# define strdup(s) wrap_strdup( (s), __FILE__, __LINE__, __func__)
#endif /* USE_MALLOC_WRAPPERS */
#endif /* MALLOC_WRAP_H */

View File

@ -4,12 +4,18 @@
#include <string.h> #include <string.h>
#include <zlib.h> #include <zlib.h>
#include <pthread.h> #include <pthread.h>
#include <errno.h>
#include "ksw.h" #include "ksw.h"
#include "kseq.h" #include "kseq.h"
#include "kstring.h" #include "kstring.h"
#include "bwa.h" #include "bwa.h"
#include "utils.h"
KSEQ_DECLARE(gzFile) KSEQ_DECLARE(gzFile)
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
#define MAX_SCORE_RATIO 0.9f #define MAX_SCORE_RATIO 0.9f
#define MAX_ERR 8 #define MAX_ERR 8
@ -140,14 +146,14 @@ pem_ret:
static inline void print_bseq(const bseq1_t *s, int rn) static inline void print_bseq(const bseq1_t *s, int rn)
{ {
putchar(s->qual? '@' : '>'); err_putchar(s->qual? '@' : '>');
fputs(s->name, stdout); err_fputs(s->name, stdout);
if (rn == 1 || rn == 2) { if (rn == 1 || rn == 2) {
putchar('/'); putchar('0' + rn); putchar('\n'); err_putchar('/'); err_putchar('0' + rn); err_putchar('\n');
} else puts(" merged"); } else err_puts(" merged");
puts(s->seq); err_puts(s->seq);
if (s->qual) { if (s->qual) {
puts("+"); puts(s->qual); err_puts("+"); err_puts(s->qual);
} }
} }
@ -224,6 +230,7 @@ int main_pemerge(int argc, char *argv[])
else if (c == 'Q') opt->q_thres = atoi(optarg); else if (c == 'Q') opt->q_thres = atoi(optarg);
else if (c == 't') opt->n_threads = atoi(optarg); else if (c == 't') opt->n_threads = atoi(optarg);
else if (c == 'T') min_ovlp = atoi(optarg); else if (c == 'T') min_ovlp = atoi(optarg);
else return 1;
} }
if (flag == 0) flag = 3; if (flag == 0) flag = 3;
opt->flag = flag; opt->flag = flag;
@ -243,9 +250,21 @@ int main_pemerge(int argc, char *argv[])
} }
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r"); fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
if (NULL == fp) {
fprintf(stderr, "Couldn't open %s : %s\n",
strcmp(argv[optind], "-") ? argv[optind] : "stdin",
errno ? strerror(errno) : "Out of memory");
exit(EXIT_FAILURE);
}
ks = kseq_init(fp); ks = kseq_init(fp);
if (optind + 1 < argc) { if (optind + 1 < argc) {
fp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r"); fp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r");
if (NULL == fp) {
fprintf(stderr, "Couldn't open %s : %s\n",
strcmp(argv[optind+1], "-") ? argv[optind+1] : "stdin",
errno ? strerror(errno) : "Out of memory");
exit(EXIT_FAILURE);
}
ks2 = kseq_init(fp2); ks2 = kseq_init(fp2);
} }
@ -259,11 +278,14 @@ int main_pemerge(int argc, char *argv[])
for (i = 1; i <= MAX_ERR; ++i) for (i = 1; i <= MAX_ERR; ++i)
fprintf(stderr, "%12ld %s\n", (long)cnt[i], err_msg[i]); fprintf(stderr, "%12ld %s\n", (long)cnt[i], err_msg[i]);
kseq_destroy(ks); kseq_destroy(ks);
gzclose(fp); err_gzclose(fp);
if (ks2) { if (ks2) {
kseq_destroy(ks2); kseq_destroy(ks2);
gzclose(fp2); err_gzclose(fp2);
} }
free(opt); free(opt);
err_fflush(stdout);
return 0; return 0;
} }

153
utils.c
View File

@ -24,6 +24,7 @@
*/ */
/* Contact: Heng Li <lh3@sanger.ac.uk> */ /* Contact: Heng Li <lh3@sanger.ac.uk> */
#define FSYNC_ON_FLUSH
#include <stdio.h> #include <stdio.h>
#include <stdarg.h> #include <stdarg.h>
@ -31,6 +32,11 @@
#include <string.h> #include <string.h>
#include <zlib.h> #include <zlib.h>
#include <errno.h> #include <errno.h>
#ifdef FSYNC_ON_FLUSH
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/time.h> #include <sys/time.h>
#include "utils.h" #include "utils.h"
@ -41,7 +47,7 @@ KSORT_INIT(128, pair64_t, pair64_lt)
KSORT_INIT(64, uint64_t, ks_lt_generic) KSORT_INIT(64, uint64_t, ks_lt_generic)
#include "kseq.h" #include "kseq.h"
KSEQ_INIT2(, gzFile, gzread) KSEQ_INIT2(, gzFile, err_gzread)
/******************** /********************
* System utilities * * System utilities *
@ -53,8 +59,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
if (strcmp(fn, "-") == 0) if (strcmp(fn, "-") == 0)
return (strstr(mode, "r"))? stdin : stdout; return (strstr(mode, "r"))? stdin : stdout;
if ((fp = fopen(fn, mode)) == 0) { if ((fp = fopen(fn, mode)) == 0) {
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno));
abort();
} }
return fp; return fp;
} }
@ -62,10 +67,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp) FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
{ {
if (freopen(fn, mode, fp) == 0) { if (freopen(fn, mode, fp) == 0) {
fprintf(stderr, "[%s] fail to open file '%s': ", func, fn); err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno));
perror(NULL);
fprintf(stderr, "Abort!\n");
abort();
} }
return fp; return fp;
} }
@ -73,16 +75,30 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
{ {
gzFile fp; gzFile fp;
if (strcmp(fn, "-") == 0) if (strcmp(fn, "-") == 0) {
return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
/* According to zlib.h, this is the only reason gzdopen can fail */
if (!fp) err_fatal(func, "Out of memory");
return fp;
}
if ((fp = gzopen(fn, mode)) == 0) { if ((fp = gzopen(fn, mode)) == 0) {
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); err_fatal(func, "fail to open file '%s' : %s", fn, errno ? strerror(errno) : "Out of memory");
abort();
} }
return fp; return fp;
} }
void err_fatal(const char *header, const char *fmt, ...) void err_fatal(const char *header, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
fprintf(stderr, "[%s] ", header);
vfprintf(stderr, fmt, args);
fprintf(stderr, "\n");
va_end(args);
exit(EXIT_FAILURE);
}
void err_fatal_core(const char *header, const char *fmt, ...)
{ {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
@ -93,7 +109,13 @@ void err_fatal(const char *header, const char *fmt, ...)
abort(); abort();
} }
void err_fatal_simple_core(const char *func, const char *msg) void _err_fatal_simple(const char *func, const char *msg)
{
fprintf(stderr, "[%s] %s\n", func, msg);
exit(EXIT_FAILURE);
}
void _err_fatal_simple_core(const char *func, const char *msg)
{ {
fprintf(stderr, "[%s] %s Abort!\n", func, msg); fprintf(stderr, "[%s] %s Abort!\n", func, msg);
abort(); abort();
@ -103,7 +125,51 @@ size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
{ {
size_t ret = fwrite(ptr, size, nmemb, stream); size_t ret = fwrite(ptr, size, nmemb, stream);
if (ret != nmemb) if (ret != nmemb)
err_fatal_simple_core("fwrite", strerror(errno)); _err_fatal_simple("fwrite", strerror(errno));
return ret;
}
size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
{
size_t ret = fread(ptr, size, nmemb, stream);
if (ret != nmemb)
{
_err_fatal_simple("fread", ferror(stream) ? strerror(errno) : "Unexpected end of file");
}
return ret;
}
int err_gzread(gzFile file, void *ptr, unsigned int len)
{
int ret = gzread(file, ptr, len);
if (ret < 0)
{
int errnum = 0;
const char *msg = gzerror(file, &errnum);
_err_fatal_simple("gzread", Z_ERRNO == errnum ? strerror(errno) : msg);
}
return ret;
}
int err_fseek(FILE *stream, long offset, int whence)
{
int ret = fseek(stream, offset, whence);
if (0 != ret)
{
_err_fatal_simple("fseek", strerror(errno));
}
return ret;
}
long err_ftell(FILE *stream)
{
long ret = ftell(stream);
if (-1 == ret)
{
_err_fatal_simple("ftell", strerror(errno));
}
return ret; return ret;
} }
@ -115,7 +181,7 @@ int err_printf(const char *format, ...)
done = vfprintf(stdout, format, arg); done = vfprintf(stdout, format, arg);
int saveErrno = errno; int saveErrno = errno;
va_end(arg); va_end(arg);
if (done < 0) err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno)); if (done < 0) _err_fatal_simple("vfprintf(stdout)", strerror(saveErrno));
return done; return done;
} }
@ -127,21 +193,74 @@ int err_fprintf(FILE *stream, const char *format, ...)
done = vfprintf(stream, format, arg); done = vfprintf(stream, format, arg);
int saveErrno = errno; int saveErrno = errno;
va_end(arg); va_end(arg);
if (done < 0) err_fatal_simple_core("vfprintf", strerror(saveErrno)); if (done < 0) _err_fatal_simple("vfprintf", strerror(saveErrno));
return done; return done;
} }
int err_fputc(int c, FILE *stream)
{
int ret = putc(c, stream);
if (EOF == ret)
{
_err_fatal_simple("fputc", strerror(errno));
}
return ret;
}
int err_fputs(const char *s, FILE *stream)
{
int ret = fputs(s, stream);
if (EOF == ret)
{
_err_fatal_simple("fputs", strerror(errno));
}
return ret;
}
int err_fflush(FILE *stream) int err_fflush(FILE *stream)
{ {
int ret = fflush(stream); int ret = fflush(stream);
if (ret != 0) err_fatal_simple_core("fflush", strerror(errno)); if (ret != 0) _err_fatal_simple("fflush", strerror(errno));
#ifdef FSYNC_ON_FLUSH
/* Calling fflush() ensures that all the data has made it to the
kernel buffers, but this may not be sufficient for remote filesystems
(e.g. NFS, lustre) as an error may still occur while the kernel
is copying the buffered data to the file server. To be sure of
catching these errors, we need to call fsync() on the file
descriptor, but only if it is a regular file. */
{
struct stat sbuf;
if (0 != fstat(fileno(stream), &sbuf))
_err_fatal_simple("fstat", strerror(errno));
if (S_ISREG(sbuf.st_mode))
{
if (0 != fsync(fileno(stream)))
_err_fatal_simple("fsync", strerror(errno));
}
}
#endif
return ret; return ret;
} }
int err_fclose(FILE *stream) int err_fclose(FILE *stream)
{ {
int ret = fclose(stream); int ret = fclose(stream);
if (ret != 0) err_fatal_simple_core("fclose", strerror(errno)); if (ret != 0) _err_fatal_simple("fclose", strerror(errno));
return ret;
}
int err_gzclose(gzFile file)
{
int ret = gzclose(file);
if (Z_OK != ret)
{
_err_fatal_simple("gzclose", Z_ERRNO == ret ? strerror(errno) : zError(ret));
}
return ret; return ret;
} }

24
utils.h
View File

@ -39,11 +39,14 @@
#define ATTRIBUTE(list) #define ATTRIBUTE(list)
#endif #endif
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg) #define err_fatal_simple(msg) _err_fatal_simple(__func__, msg)
#define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg)
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode) #define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
#define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg)
typedef struct { typedef struct {
uint64_t x, y; uint64_t x, y;
@ -56,18 +59,31 @@ typedef struct { size_t n, m; pair64_t *a; } pair64_v;
extern "C" { extern "C" {
#endif #endif
void err_fatal(const char *header, const char *fmt, ...); void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn));
void err_fatal_simple_core(const char *func, const char *msg); void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn));
void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn));
void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn));
FILE *err_xopen_core(const char *func, const char *fn, const char *mode); FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream);
int err_gzread(gzFile file, void *ptr, unsigned int len);
int err_fseek(FILE *stream, long offset, int whence);
#define err_rewind(FP) err_fseek((FP), 0, SEEK_SET)
long err_ftell(FILE *stream);
int err_fprintf(FILE *stream, const char *format, ...) int err_fprintf(FILE *stream, const char *format, ...)
ATTRIBUTE((format(printf, 2, 3))); ATTRIBUTE((format(printf, 2, 3)));
int err_printf(const char *format, ...) int err_printf(const char *format, ...)
ATTRIBUTE((format(printf, 1, 2))); ATTRIBUTE((format(printf, 1, 2)));
int err_fputc(int c, FILE *stream);
#define err_putchar(C) err_fputc((C), stdout)
int err_fputs(const char *s, FILE *stream);
#define err_puts(S) err_fputs((S), stdout)
int err_fflush(FILE *stream); int err_fflush(FILE *stream);
int err_fclose(FILE *stream); int err_fclose(FILE *stream);
int err_gzclose(gzFile file);
double cputime(); double cputime();
double realtime(); double realtime();