Merge pull request #13 from daviesrob/master_fixes
Updates to master to add IO error checking
This commit is contained in:
commit
338ebfaae0
|
|
@ -3,3 +3,5 @@ bwa
|
||||||
test
|
test
|
||||||
test64
|
test64
|
||||||
.*.swp
|
.*.swp
|
||||||
|
Makefile.bak
|
||||||
|
bwamem-lite
|
||||||
|
|
|
||||||
69
Makefile
69
Makefile
|
|
@ -1,12 +1,13 @@
|
||||||
CC= gcc
|
CC= gcc
|
||||||
CFLAGS= -g -Wall -O2
|
CFLAGS= -g -Wall -O2
|
||||||
|
WRAP_MALLOC= -DUSE_MALLOC_WRAPPERS
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD
|
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||||
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
|
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
|
||||||
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||||
is.o bwtindex.o bwape.o kopen.o pemerge.o \
|
is.o bwtindex.o bwape.o kopen.o pemerge.o \
|
||||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
bwtsw2_chain.o fastmap.o bwtsw2_pair.o malloc_wrap.o
|
||||||
PROG= bwa
|
PROG= bwa
|
||||||
INCLUDES=
|
INCLUDES=
|
||||||
LIBS= -lm -lz -lpthread
|
LIBS= -lm -lz -lpthread
|
||||||
|
|
@ -28,26 +29,48 @@ bwamem-lite:libbwa.a example.o
|
||||||
libbwa.a:$(LOBJS)
|
libbwa.a:$(LOBJS)
|
||||||
$(AR) -csru $@ $(LOBJS)
|
$(AR) -csru $@ $(LOBJS)
|
||||||
|
|
||||||
ksw.o:ksw.h
|
|
||||||
kstring.o:kstring.h
|
|
||||||
utils.o:utils.h ksort.h kseq.h
|
|
||||||
bntseq.o:bntseq.h
|
|
||||||
bwt.o:bwt.h utils.h
|
|
||||||
bwa.o:bwa.h bwt.h bntseq.h
|
|
||||||
bwamem.o:ksw.h kbtree.h ksort.h kvec.h kstring.h utils.h bwamem.h
|
|
||||||
bwamem_pair.o:ksw.h kvec.h kstring.h utils.h bwamem.h
|
|
||||||
|
|
||||||
QSufSort.o:QSufSort.h
|
|
||||||
bwt_gen.o:QSufSort.h
|
|
||||||
|
|
||||||
fastmap.o:bwt.h bwamem.h
|
|
||||||
|
|
||||||
bwtaln.o:bwt.h bwtaln.h kseq.h
|
|
||||||
bwtgap.o:bwtgap.h bwtaln.h bwt.h
|
|
||||||
|
|
||||||
bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h
|
|
||||||
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h
|
|
||||||
bwtsw2_main.o:bwtsw2.h
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f gmon.out *.o a.out $(PROG) *~ *.a
|
rm -f gmon.out *.o a.out $(PROG) *~ *.a
|
||||||
|
|
||||||
|
depend:
|
||||||
|
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c )
|
||||||
|
|
||||||
|
# DO NOT DELETE THIS LINE -- make depend depends on it.
|
||||||
|
|
||||||
|
QSufSort.o: QSufSort.h
|
||||||
|
bamlite.o: bamlite.h malloc_wrap.h
|
||||||
|
bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
|
||||||
|
bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h malloc_wrap.h kseq.h
|
||||||
|
bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
|
||||||
|
bwamem.o: ksort.h utils.h kbtree.h
|
||||||
|
bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
|
||||||
|
bwamem_pair.o: utils.h ksw.h
|
||||||
|
bwape.o: bwtaln.h bwt.h kvec.h malloc_wrap.h bntseq.h utils.h bwase.h bwa.h
|
||||||
|
bwape.o: ksw.h khash.h
|
||||||
|
bwase.o: bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h malloc_wrap.h
|
||||||
|
bwase.o: bwa.h ksw.h
|
||||||
|
bwaseqio.o: bwtaln.h bwt.h utils.h bamlite.h malloc_wrap.h kseq.h
|
||||||
|
bwt.o: utils.h bwt.h kvec.h malloc_wrap.h
|
||||||
|
bwt_gen.o: QSufSort.h malloc_wrap.h
|
||||||
|
bwt_lite.o: bwt_lite.h malloc_wrap.h
|
||||||
|
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
|
||||||
|
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
|
||||||
|
bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h
|
||||||
|
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
|
||||||
|
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
|
||||||
|
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
|
||||||
|
bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h malloc_wrap.h
|
||||||
|
bwtsw2_core.o: khash.h ksort.h
|
||||||
|
bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h bwa.h
|
||||||
|
bwtsw2_pair.o: utils.h bwt.h bntseq.h bwtsw2.h bwt_lite.h kstring.h
|
||||||
|
bwtsw2_pair.o: malloc_wrap.h ksw.h
|
||||||
|
example.o: bwamem.h bwt.h bntseq.h bwa.h kseq.h malloc_wrap.h
|
||||||
|
fastmap.o: bwa.h bntseq.h bwt.h bwamem.h kvec.h malloc_wrap.h utils.h kseq.h
|
||||||
|
is.o: malloc_wrap.h
|
||||||
|
kopen.o: malloc_wrap.h
|
||||||
|
kstring.o: kstring.h malloc_wrap.h
|
||||||
|
ksw.o: ksw.h malloc_wrap.h
|
||||||
|
main.o: utils.h
|
||||||
|
malloc_wrap.o: malloc_wrap.h
|
||||||
|
pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
|
||||||
|
utils.o: utils.h ksort.h malloc_wrap.h kseq.h
|
||||||
|
|
|
||||||
75
bamlite.c
75
bamlite.c
|
|
@ -2,8 +2,13 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
#include "bamlite.h"
|
#include "bamlite.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/*********************
|
/*********************
|
||||||
* from bam_endian.c *
|
* from bam_endian.c *
|
||||||
*********************/
|
*********************/
|
||||||
|
|
@ -62,11 +67,11 @@ void bam_header_destroy(bam_header_t *header)
|
||||||
if (header == 0) return;
|
if (header == 0) return;
|
||||||
if (header->target_name) {
|
if (header->target_name) {
|
||||||
for (i = 0; i < header->n_targets; ++i)
|
for (i = 0; i < header->n_targets; ++i)
|
||||||
free(header->target_name[i]);
|
if (header->target_name[i]) free(header->target_name[i]);
|
||||||
|
if (header->target_len) free(header->target_len);
|
||||||
free(header->target_name);
|
free(header->target_name);
|
||||||
free(header->target_len);
|
|
||||||
}
|
}
|
||||||
free(header->text);
|
if (header->text) free(header->text);
|
||||||
free(header);
|
free(header);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -80,28 +85,33 @@ bam_header_t *bam_header_read(bamFile fp)
|
||||||
magic_len = bam_read(fp, buf, 4);
|
magic_len = bam_read(fp, buf, 4);
|
||||||
if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
|
if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
|
||||||
fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
|
fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
|
||||||
return 0;
|
return NULL;
|
||||||
}
|
}
|
||||||
header = bam_header_init();
|
header = bam_header_init();
|
||||||
// read plain text and the number of reference sequences
|
// read plain text and the number of reference sequences
|
||||||
bam_read(fp, &header->l_text, 4);
|
if (bam_read(fp, &header->l_text, 4) != 4) goto fail;
|
||||||
if (bam_is_be) bam_swap_endian_4p(&header->l_text);
|
if (bam_is_be) bam_swap_endian_4p(&header->l_text);
|
||||||
header->text = (char*)calloc(header->l_text + 1, 1);
|
header->text = (char*)calloc(header->l_text + 1, 1);
|
||||||
bam_read(fp, header->text, header->l_text);
|
if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail;
|
||||||
bam_read(fp, &header->n_targets, 4);
|
if (bam_read(fp, &header->n_targets, 4) != 4) goto fail;
|
||||||
if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
|
if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
|
||||||
// read reference sequence names and lengths
|
// read reference sequence names and lengths
|
||||||
header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
|
header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
|
||||||
header->target_len = (uint32_t*)calloc(header->n_targets, 4);
|
header->target_len = (uint32_t*)calloc(header->n_targets, 4);
|
||||||
for (i = 0; i != header->n_targets; ++i) {
|
for (i = 0; i != header->n_targets; ++i) {
|
||||||
bam_read(fp, &name_len, 4);
|
if (bam_read(fp, &name_len, 4) != 4) goto fail;
|
||||||
if (bam_is_be) bam_swap_endian_4p(&name_len);
|
if (bam_is_be) bam_swap_endian_4p(&name_len);
|
||||||
header->target_name[i] = (char*)calloc(name_len, 1);
|
header->target_name[i] = (char*)calloc(name_len, 1);
|
||||||
bam_read(fp, header->target_name[i], name_len);
|
if (bam_read(fp, header->target_name[i], name_len) != name_len) {
|
||||||
bam_read(fp, &header->target_len[i], 4);
|
goto fail;
|
||||||
|
}
|
||||||
|
if (bam_read(fp, &header->target_len[i], 4) != 4) goto fail;
|
||||||
if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);
|
if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);
|
||||||
}
|
}
|
||||||
return header;
|
return header;
|
||||||
|
fail:
|
||||||
|
bam_header_destroy(header);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
|
static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
|
||||||
|
|
@ -153,3 +163,48 @@ int bam_read1(bamFile fp, bam1_t *b)
|
||||||
if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
|
if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
|
||||||
return 4 + block_len;
|
return 4 + block_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
|
||||||
|
// Versions of gzopen, gzread and gzclose that print up error messages
|
||||||
|
|
||||||
|
gzFile bamlite_gzopen(const char *fn, const char *mode) {
|
||||||
|
gzFile fp;
|
||||||
|
if (strcmp(fn, "-") == 0) {
|
||||||
|
fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
|
||||||
|
if (!fp) {
|
||||||
|
fprintf(stderr, "Couldn't open %s : %s",
|
||||||
|
(strstr(mode, "r"))? "stdin" : "stdout",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
if ((fp = gzopen(fn, mode)) == 0) {
|
||||||
|
fprintf(stderr, "Couldn't open %s : %s\n", fn,
|
||||||
|
errno ? strerror(errno) : "Out of memory");
|
||||||
|
}
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bamlite_gzread(gzFile file, void *ptr, unsigned int len) {
|
||||||
|
int ret = gzread(file, ptr, len);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
int errnum = 0;
|
||||||
|
const char *msg = gzerror(file, &errnum);
|
||||||
|
fprintf(stderr, "gzread error: %s\n",
|
||||||
|
Z_ERRNO == errnum ? strerror(errno) : msg);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bamlite_gzclose(gzFile file) {
|
||||||
|
int ret = gzclose(file);
|
||||||
|
if (Z_OK != ret) {
|
||||||
|
fprintf(stderr, "gzclose error: %s\n",
|
||||||
|
Z_ERRNO == ret ? strerror(errno) : zError(ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
|
||||||
|
|
|
||||||
28
bamlite.h
28
bamlite.h
|
|
@ -4,11 +4,25 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define USE_VERBOSE_ZLIB_WRAPPERS
|
||||||
|
|
||||||
typedef gzFile bamFile;
|
typedef gzFile bamFile;
|
||||||
#define bam_open(fn, mode) gzopen(fn, mode)
|
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
|
||||||
#define bam_dopen(fd, mode) gzdopen(fd, mode)
|
/* These print error messages on failure */
|
||||||
#define bam_close(fp) gzclose(fp)
|
# define bam_open(fn, mode) bamlite_gzopen(fn, mode)
|
||||||
#define bam_read(fp, buf, size) gzread(fp, buf, size)
|
# define bam_dopen(fd, mode) gzdopen(fd, mode)
|
||||||
|
# define bam_close(fp) bamlite_gzclose(fp)
|
||||||
|
# define bam_read(fp, buf, size) bamlite_gzread(fp, buf, size)
|
||||||
|
#else
|
||||||
|
# define bam_open(fn, mode) gzopen(fn, mode)
|
||||||
|
# define bam_dopen(fd, mode) gzdopen(fd, mode)
|
||||||
|
# define bam_close(fp) gzclose(fp)
|
||||||
|
# define bam_read(fp, buf, size) gzread(fp, buf, size)
|
||||||
|
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t n_targets;
|
int32_t n_targets;
|
||||||
|
|
@ -87,6 +101,12 @@ extern "C" {
|
||||||
bam_header_t *bam_header_read(bamFile fp);
|
bam_header_t *bam_header_read(bamFile fp);
|
||||||
int bam_read1(bamFile fp, bam1_t *b);
|
int bam_read1(bamFile fp, bam1_t *b);
|
||||||
|
|
||||||
|
#ifdef USE_VERBOSE_ZLIB_WRAPPERS
|
||||||
|
gzFile bamlite_gzopen(const char *fn, const char *mode);
|
||||||
|
int bamlite_gzread(gzFile file, void *ptr, unsigned int len);
|
||||||
|
int bamlite_gzclose(gzFile file);
|
||||||
|
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
76
bntseq.c
76
bntseq.c
|
|
@ -30,12 +30,17 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
unsigned char nst_nt4_table[256] = {
|
unsigned char nst_nt4_table[256] = {
|
||||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
|
@ -63,25 +68,27 @@ void bns_dump(const bntseq_t *bns, const char *prefix)
|
||||||
{ // dump .ann
|
{ // dump .ann
|
||||||
strcpy(str, prefix); strcat(str, ".ann");
|
strcpy(str, prefix); strcat(str, ".ann");
|
||||||
fp = xopen(str, "w");
|
fp = xopen(str, "w");
|
||||||
fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed);
|
err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed);
|
||||||
for (i = 0; i != bns->n_seqs; ++i) {
|
for (i = 0; i != bns->n_seqs; ++i) {
|
||||||
bntann1_t *p = bns->anns + i;
|
bntann1_t *p = bns->anns + i;
|
||||||
fprintf(fp, "%d %s", p->gi, p->name);
|
err_fprintf(fp, "%d %s", p->gi, p->name);
|
||||||
if (p->anno[0]) fprintf(fp, " %s\n", p->anno);
|
if (p->anno[0]) err_fprintf(fp, " %s\n", p->anno);
|
||||||
else fprintf(fp, "\n");
|
else err_fprintf(fp, "\n");
|
||||||
fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs);
|
err_fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs);
|
||||||
}
|
}
|
||||||
fclose(fp);
|
err_fflush(fp);
|
||||||
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
{ // dump .amb
|
{ // dump .amb
|
||||||
strcpy(str, prefix); strcat(str, ".amb");
|
strcpy(str, prefix); strcat(str, ".amb");
|
||||||
fp = xopen(str, "w");
|
fp = xopen(str, "w");
|
||||||
fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes);
|
err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes);
|
||||||
for (i = 0; i != bns->n_holes; ++i) {
|
for (i = 0; i != bns->n_holes; ++i) {
|
||||||
bntamb1_t *p = bns->ambs + i;
|
bntamb1_t *p = bns->ambs + i;
|
||||||
fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb);
|
err_fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb);
|
||||||
}
|
}
|
||||||
fclose(fp);
|
err_fflush(fp);
|
||||||
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,13 +96,16 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
|
||||||
{
|
{
|
||||||
char str[1024];
|
char str[1024];
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
const char *fname;
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
long long xx;
|
long long xx;
|
||||||
int i;
|
int i;
|
||||||
|
int scanres;
|
||||||
bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
|
bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
|
||||||
{ // read .ann
|
{ // read .ann
|
||||||
fp = xopen(ann_filename, "r");
|
fp = xopen(fname = ann_filename, "r");
|
||||||
fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);
|
scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);
|
||||||
|
if (scanres != 3) goto badread;
|
||||||
bns->l_pac = xx;
|
bns->l_pac = xx;
|
||||||
bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));
|
bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));
|
||||||
for (i = 0; i < bns->n_seqs; ++i) {
|
for (i = 0; i < bns->n_seqs; ++i) {
|
||||||
|
|
@ -103,39 +113,54 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c
|
||||||
char *q = str;
|
char *q = str;
|
||||||
int c;
|
int c;
|
||||||
// read gi and sequence name
|
// read gi and sequence name
|
||||||
fscanf(fp, "%u%s", &p->gi, str);
|
scanres = fscanf(fp, "%u%s", &p->gi, str);
|
||||||
|
if (scanres != 2) goto badread;
|
||||||
p->name = strdup(str);
|
p->name = strdup(str);
|
||||||
// read fasta comments
|
// read fasta comments
|
||||||
while ((c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
|
while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
|
||||||
|
while (c != '\n' && c != EOF) c = fgetc(fp);
|
||||||
|
if (c == EOF) {
|
||||||
|
scanres = EOF;
|
||||||
|
goto badread;
|
||||||
|
}
|
||||||
*q = 0;
|
*q = 0;
|
||||||
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
|
if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
|
||||||
else p->anno = strdup("");
|
else p->anno = strdup("");
|
||||||
// read the rest
|
// read the rest
|
||||||
fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
|
scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
|
||||||
|
if (scanres != 3) goto badread;
|
||||||
p->offset = xx;
|
p->offset = xx;
|
||||||
}
|
}
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
{ // read .amb
|
{ // read .amb
|
||||||
int64_t l_pac;
|
int64_t l_pac;
|
||||||
int32_t n_seqs;
|
int32_t n_seqs;
|
||||||
fp = xopen(amb_filename, "r");
|
fp = xopen(fname = amb_filename, "r");
|
||||||
fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes);
|
scanres = fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes);
|
||||||
|
if (scanres != 3) goto badread;
|
||||||
l_pac = xx;
|
l_pac = xx;
|
||||||
xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files.");
|
xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files.");
|
||||||
bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0;
|
bns->ambs = bns->n_holes? (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)) : 0;
|
||||||
for (i = 0; i < bns->n_holes; ++i) {
|
for (i = 0; i < bns->n_holes; ++i) {
|
||||||
bntamb1_t *p = bns->ambs + i;
|
bntamb1_t *p = bns->ambs + i;
|
||||||
fscanf(fp, "%lld%d%s", &xx, &p->len, str);
|
scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str);
|
||||||
|
if (scanres != 3) goto badread;
|
||||||
p->offset = xx;
|
p->offset = xx;
|
||||||
p->amb = str[0];
|
p->amb = str[0];
|
||||||
}
|
}
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
{ // open .pac
|
{ // open .pac
|
||||||
bns->fp_pac = xopen(pac_filename, "rb");
|
bns->fp_pac = xopen(pac_filename, "rb");
|
||||||
}
|
}
|
||||||
return bns;
|
return bns;
|
||||||
|
|
||||||
|
badread:
|
||||||
|
if (EOF == scanres) {
|
||||||
|
err_fatal(__func__, "Error reading %s : %s\n", fname, ferror(fp) ? strerror(errno) : "Unexpected end of file");
|
||||||
|
}
|
||||||
|
err_fatal(__func__, "Parse error reading %s\n", fname);
|
||||||
}
|
}
|
||||||
|
|
||||||
bntseq_t *bns_restore(const char *prefix)
|
bntseq_t *bns_restore(const char *prefix)
|
||||||
|
|
@ -152,7 +177,7 @@ void bns_destroy(bntseq_t *bns)
|
||||||
if (bns == 0) return;
|
if (bns == 0) return;
|
||||||
else {
|
else {
|
||||||
int i;
|
int i;
|
||||||
if (bns->fp_pac) fclose(bns->fp_pac);
|
if (bns->fp_pac) err_fclose(bns->fp_pac);
|
||||||
free(bns->ambs);
|
free(bns->ambs);
|
||||||
for (i = 0; i < bns->n_seqs; ++i) {
|
for (i = 0; i < bns->n_seqs; ++i) {
|
||||||
free(bns->anns[i].name);
|
free(bns->anns[i].name);
|
||||||
|
|
@ -250,16 +275,17 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
|
||||||
ret = bns->l_pac;
|
ret = bns->l_pac;
|
||||||
{ // finalize .pac file
|
{ // finalize .pac file
|
||||||
ubyte_t ct;
|
ubyte_t ct;
|
||||||
fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
|
err_fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
|
||||||
// the following codes make the pac file size always (l_pac/4+1+1)
|
// the following codes make the pac file size always (l_pac/4+1+1)
|
||||||
if (bns->l_pac % 4 == 0) {
|
if (bns->l_pac % 4 == 0) {
|
||||||
ct = 0;
|
ct = 0;
|
||||||
fwrite(&ct, 1, 1, fp);
|
err_fwrite(&ct, 1, 1, fp);
|
||||||
}
|
}
|
||||||
ct = bns->l_pac % 4;
|
ct = bns->l_pac % 4;
|
||||||
fwrite(&ct, 1, 1, fp);
|
err_fwrite(&ct, 1, 1, fp);
|
||||||
// close .pac file
|
// close .pac file
|
||||||
fclose(fp);
|
err_fflush(fp);
|
||||||
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
bns_dump(bns, prefix);
|
bns_dump(bns, prefix);
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
|
|
@ -283,7 +309,7 @@ int bwa_fa2pac(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
fp = xzopen(argv[optind], "r");
|
fp = xzopen(argv[optind], "r");
|
||||||
bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);
|
bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
8
bwa.c
8
bwa.c
|
|
@ -7,6 +7,10 @@
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
int bwa_verbose = 3;
|
int bwa_verbose = 3;
|
||||||
char bwa_rg_id[256];
|
char bwa_rg_id[256];
|
||||||
|
|
||||||
|
|
@ -258,8 +262,8 @@ bwaidx_t *bwa_idx_load(const char *hint, int which)
|
||||||
idx->bns = bns_restore(prefix);
|
idx->bns = bns_restore(prefix);
|
||||||
if (which & BWA_IDX_PAC) {
|
if (which & BWA_IDX_PAC) {
|
||||||
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
|
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
|
||||||
fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
|
err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
|
||||||
fclose(idx->bns->fp_pac);
|
err_fclose(idx->bns->fp_pac);
|
||||||
idx->bns->fp_pac = 0;
|
idx->bns->fp_pac = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
10
bwamem.c
10
bwamem.c
|
|
@ -15,6 +15,10 @@
|
||||||
#include "ksort.h"
|
#include "ksort.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Theory on probability and scoring *ungapped* alignment
|
/* Theory on probability and scoring *ungapped* alignment
|
||||||
*
|
*
|
||||||
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
|
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
|
||||||
|
|
@ -229,16 +233,16 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < chn->n; ++i) {
|
for (i = 0; i < chn->n; ++i) {
|
||||||
mem_chain_t *p = &chn->a[i];
|
mem_chain_t *p = &chn->a[i];
|
||||||
printf("%d", p->n);
|
err_printf("%d", p->n);
|
||||||
for (j = 0; j < p->n; ++j) {
|
for (j = 0; j < p->n; ++j) {
|
||||||
bwtint_t pos;
|
bwtint_t pos;
|
||||||
int is_rev, ref_id;
|
int is_rev, ref_id;
|
||||||
pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
|
pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
|
||||||
if (is_rev) pos -= p->seeds[j].len - 1;
|
if (is_rev) pos -= p->seeds[j].len - 1;
|
||||||
bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
|
bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
|
||||||
printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
err_printf("\t%d,%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||||
}
|
}
|
||||||
putchar('\n');
|
err_putchar('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,11 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define MIN_RATIO 0.8
|
#define MIN_RATIO 0.8
|
||||||
#define MIN_DIR_CNT 10
|
#define MIN_DIR_CNT 10
|
||||||
#define MIN_DIR_RATIO 0.05
|
#define MIN_DIR_RATIO 0.05
|
||||||
|
|
|
||||||
24
bwape.c
24
bwape.c
|
|
@ -12,6 +12,10 @@
|
||||||
#include "bwa.h"
|
#include "bwa.h"
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int n;
|
int n;
|
||||||
bwtint_t *a;
|
bwtint_t *a;
|
||||||
|
|
@ -280,11 +284,11 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
|
||||||
p[j] = seqs[j] + i;
|
p[j] = seqs[j] + i;
|
||||||
p[j]->n_multi = 0;
|
p[j]->n_multi = 0;
|
||||||
p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2);
|
p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2);
|
||||||
fread(&n_aln, 4, 1, fp_sa[j]);
|
err_fread_noeof(&n_aln, 4, 1, fp_sa[j]);
|
||||||
if (n_aln > kv_max(d->aln[j]))
|
if (n_aln > kv_max(d->aln[j]))
|
||||||
kv_resize(bwt_aln1_t, d->aln[j], n_aln);
|
kv_resize(bwt_aln1_t, d->aln[j], n_aln);
|
||||||
d->aln[j].n = n_aln;
|
d->aln[j].n = n_aln;
|
||||||
fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]);
|
err_fread_noeof(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]);
|
||||||
kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j]
|
kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j]
|
||||||
// generate SE alignment and mapping quality
|
// generate SE alignment and mapping quality
|
||||||
bwa_aln2seq(n_aln, d->aln[j].a, p[j]);
|
bwa_aln2seq(n_aln, d->aln[j].a, p[j]);
|
||||||
|
|
@ -498,8 +502,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
|
||||||
// load reference sequence
|
// load reference sequence
|
||||||
if (_pacseq == 0) {
|
if (_pacseq == 0) {
|
||||||
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
||||||
rewind(bns->fp_pac);
|
err_rewind(bns->fp_pac);
|
||||||
fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
|
err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||||
} else pacseq = (ubyte_t*)_pacseq;
|
} else pacseq = (ubyte_t*)_pacseq;
|
||||||
if (!popt->is_sw || ii->avg < 0.0) return pacseq;
|
if (!popt->is_sw || ii->avg < 0.0) return pacseq;
|
||||||
|
|
||||||
|
|
@ -644,18 +648,18 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
||||||
g_hash = kh_init(b128);
|
g_hash = kh_init(b128);
|
||||||
last_ii.avg = -1.0;
|
last_ii.avg = -1.0;
|
||||||
|
|
||||||
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
|
err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
|
||||||
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
|
ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
|
||||||
opt0 = opt;
|
opt0 = opt;
|
||||||
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
|
err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
|
||||||
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
|
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
|
||||||
{ // for Illumina alignment only
|
{ // for Illumina alignment only
|
||||||
if (popt->is_preload) {
|
if (popt->is_preload) {
|
||||||
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
|
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
|
||||||
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
|
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
|
||||||
pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
||||||
rewind(bns->fp_pac);
|
err_rewind(bns->fp_pac);
|
||||||
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -709,7 +713,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
for (i = 0; i < 2; ++i) {
|
for (i = 0; i < 2; ++i) {
|
||||||
bwa_seq_close(ks[i]);
|
bwa_seq_close(ks[i]);
|
||||||
fclose(fp_sa[i]);
|
err_fclose(fp_sa[i]);
|
||||||
}
|
}
|
||||||
for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)
|
for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)
|
||||||
if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);
|
if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);
|
||||||
|
|
@ -765,7 +769,7 @@ int bwa_sai2sam_pe(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line);
|
bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line);
|
||||||
free(prefix); free(popt);
|
free(prefix); free(popt);
|
||||||
|
|
|
||||||
56
bwase.c
56
bwase.c
|
|
@ -12,6 +12,10 @@
|
||||||
#include "bwa.h"
|
#include "bwa.h"
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
int g_log_n[256];
|
int g_log_n[256];
|
||||||
|
|
||||||
void bwa_print_sam_PG();
|
void bwa_print_sam_PG();
|
||||||
|
|
@ -312,8 +316,8 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
|
||||||
|
|
||||||
if (!_pacseq) {
|
if (!_pacseq) {
|
||||||
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
|
||||||
rewind(bns->fp_pac);
|
err_rewind(bns->fp_pac);
|
||||||
fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
|
err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||||
} else pacseq = _pacseq;
|
} else pacseq = _pacseq;
|
||||||
for (i = 0; i != n_seqs; ++i) {
|
for (i = 0; i != n_seqs; ++i) {
|
||||||
bwa_seq_t *s = seqs + i;
|
bwa_seq_t *s = seqs + i;
|
||||||
|
|
@ -381,6 +385,26 @@ static int64_t pos_5(const bwa_seq_t *p)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bwa_print_seq(FILE *stream, bwa_seq_t *seq) {
|
||||||
|
char buffer[4096];
|
||||||
|
const int bsz = sizeof(buffer);
|
||||||
|
int i, j, l;
|
||||||
|
|
||||||
|
if (seq->strand == 0) {
|
||||||
|
for (i = 0; i < seq->full_len; i += bsz) {
|
||||||
|
l = seq->full_len - i > bsz ? bsz : seq->full_len - i;
|
||||||
|
for (j = 0; j < l; j++) buffer[j] = "ACGTN"[seq->seq[i + j]];
|
||||||
|
err_fwrite(buffer, 1, l, stream);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = seq->full_len - 1; i >= 0; i -= bsz) {
|
||||||
|
l = i + 1 > bsz ? bsz : i + 1;
|
||||||
|
for (j = 0; j < l; j++) buffer[j] = "TGCAN"[seq->seq[i - j]];
|
||||||
|
err_fwrite(buffer, 1, l, stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
|
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
|
|
@ -432,10 +456,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
||||||
else err_printf("\t*\t0\t0\t");
|
else err_printf("\t*\t0\t0\t");
|
||||||
|
|
||||||
// print sequence and quality
|
// print sequence and quality
|
||||||
if (p->strand == 0)
|
bwa_print_seq(stdout, p);
|
||||||
for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
|
err_putchar('\t');
|
||||||
else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
|
|
||||||
putchar('\t');
|
|
||||||
if (p->qual) {
|
if (p->qual) {
|
||||||
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
|
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
|
||||||
err_printf("%s", p->qual);
|
err_printf("%s", p->qual);
|
||||||
|
|
@ -478,14 +500,16 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
putchar('\n');
|
err_putchar('\n');
|
||||||
} else { // this read has no match
|
} else { // this read has no match
|
||||||
ubyte_t *s = p->strand? p->rseq : p->seq;
|
//ubyte_t *s = p->strand? p->rseq : p->seq;
|
||||||
int flag = p->extra_flag | SAM_FSU;
|
int flag = p->extra_flag | SAM_FSU;
|
||||||
if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
|
if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
|
||||||
err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
|
err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
|
||||||
for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
|
//Why did this work differently to the version above??
|
||||||
putchar('\t');
|
//for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
|
||||||
|
bwa_print_seq(stdout, p);
|
||||||
|
err_putchar('\t');
|
||||||
if (p->qual) {
|
if (p->qual) {
|
||||||
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
|
if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
|
||||||
err_printf("%s", p->qual);
|
err_printf("%s", p->qual);
|
||||||
|
|
@ -493,7 +517,7 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
|
||||||
if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id);
|
if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id);
|
||||||
if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
|
if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
|
||||||
if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
|
if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
|
||||||
putchar('\n');
|
err_putchar('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -522,7 +546,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
||||||
fp_sa = xopen(fn_sa, "r");
|
fp_sa = xopen(fn_sa, "r");
|
||||||
|
|
||||||
m_aln = 0;
|
m_aln = 0;
|
||||||
fread(&opt, sizeof(gap_opt_t), 1, fp_sa);
|
err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa);
|
||||||
bwa_print_sam_hdr(bns, rg_line);
|
bwa_print_sam_hdr(bns, rg_line);
|
||||||
//bwa_print_sam_PG();
|
//bwa_print_sam_PG();
|
||||||
// set ks
|
// set ks
|
||||||
|
|
@ -536,12 +560,12 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
||||||
for (i = 0; i < n_seqs; ++i) {
|
for (i = 0; i < n_seqs; ++i) {
|
||||||
bwa_seq_t *p = seqs + i;
|
bwa_seq_t *p = seqs + i;
|
||||||
int n_aln;
|
int n_aln;
|
||||||
fread(&n_aln, 4, 1, fp_sa);
|
err_fread_noeof(&n_aln, 4, 1, fp_sa);
|
||||||
if (n_aln > m_aln) {
|
if (n_aln > m_aln) {
|
||||||
m_aln = n_aln;
|
m_aln = n_aln;
|
||||||
aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
|
aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
|
||||||
}
|
}
|
||||||
fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
|
err_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
|
||||||
bwa_aln2seq_core(n_aln, aln, p, 1, n_occ);
|
bwa_aln2seq_core(n_aln, aln, p, 1, n_occ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -565,7 +589,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
||||||
// destroy
|
// destroy
|
||||||
bwa_seq_close(ks);
|
bwa_seq_close(ks);
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
fclose(fp_sa);
|
err_fclose(fp_sa);
|
||||||
free(aln);
|
free(aln);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -591,7 +615,7 @@ int bwa_sai2sam_se(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
|
bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);
|
||||||
free(prefix);
|
free(prefix);
|
||||||
|
|
|
||||||
18
bwaseqio.c
18
bwaseqio.c
|
|
@ -7,6 +7,10 @@
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
|
static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
|
||||||
|
|
||||||
|
|
@ -26,6 +30,7 @@ bwa_seqio_t *bwa_bam_open(const char *fn, int which)
|
||||||
bs->is_bam = 1;
|
bs->is_bam = 1;
|
||||||
bs->which = which;
|
bs->which = which;
|
||||||
bs->fp = bam_open(fn, "r");
|
bs->fp = bam_open(fn, "r");
|
||||||
|
if (0 == bs->fp) err_fatal_simple("Couldn't open bam file");
|
||||||
h = bam_header_read(bs->fp);
|
h = bam_header_read(bs->fp);
|
||||||
bam_header_destroy(h);
|
bam_header_destroy(h);
|
||||||
return bs;
|
return bs;
|
||||||
|
|
@ -44,9 +49,10 @@ bwa_seqio_t *bwa_seq_open(const char *fn)
|
||||||
void bwa_seq_close(bwa_seqio_t *bs)
|
void bwa_seq_close(bwa_seqio_t *bs)
|
||||||
{
|
{
|
||||||
if (bs == 0) return;
|
if (bs == 0) return;
|
||||||
if (bs->is_bam) bam_close(bs->fp);
|
if (bs->is_bam) {
|
||||||
else {
|
if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file");
|
||||||
gzclose(bs->ks->f->f);
|
} else {
|
||||||
|
err_gzclose(bs->ks->f->f);
|
||||||
kseq_destroy(bs->ks);
|
kseq_destroy(bs->ks);
|
||||||
}
|
}
|
||||||
free(bs);
|
free(bs);
|
||||||
|
|
@ -90,11 +96,12 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
||||||
int n_seqs, l, i;
|
int n_seqs, l, i;
|
||||||
long n_trimmed = 0, n_tot = 0;
|
long n_trimmed = 0, n_tot = 0;
|
||||||
bam1_t *b;
|
bam1_t *b;
|
||||||
|
int res;
|
||||||
|
|
||||||
b = bam_init1();
|
b = bam_init1();
|
||||||
n_seqs = 0;
|
n_seqs = 0;
|
||||||
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
|
||||||
while (bam_read1(bs->fp, b) >= 0) {
|
while ((res = bam_read1(bs->fp, b)) >= 0) {
|
||||||
uint8_t *s, *q;
|
uint8_t *s, *q;
|
||||||
int go = 0;
|
int go = 0;
|
||||||
if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
|
if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
|
||||||
|
|
@ -126,6 +133,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com
|
||||||
p->name = strdup((const char*)bam1_qname(b));
|
p->name = strdup((const char*)bam1_qname(b));
|
||||||
if (n_seqs == n_needed) break;
|
if (n_seqs == n_needed) break;
|
||||||
}
|
}
|
||||||
|
if (res < 0 && res != -1) err_fatal_simple("Error reading bam file");
|
||||||
*n = n_seqs;
|
*n = n_seqs;
|
||||||
if (n_seqs && trim_qual >= 1)
|
if (n_seqs && trim_qual >= 1)
|
||||||
fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
|
fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
|
||||||
|
|
@ -184,7 +192,7 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri
|
||||||
p->qual = 0;
|
p->qual = 0;
|
||||||
p->full_len = p->clip_len = p->len = l;
|
p->full_len = p->clip_len = p->len = l;
|
||||||
n_tot += p->full_len;
|
n_tot += p->full_len;
|
||||||
p->seq = (ubyte_t*)calloc(p->len, 1);
|
p->seq = (ubyte_t*)calloc(p->full_len, 1);
|
||||||
for (i = 0; i != p->full_len; ++i)
|
for (i = 0; i != p->full_len; ++i)
|
||||||
p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
if (seq->qual.l) { // copy quality
|
if (seq->qual.l) { // copy quality
|
||||||
|
|
|
||||||
54
bwt.c
54
bwt.c
|
|
@ -34,6 +34,10 @@
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
void bwt_gen_cnt_table(bwt_t *bwt)
|
void bwt_gen_cnt_table(bwt_t *bwt)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
@ -67,10 +71,6 @@ void bwt_cal_sa(bwt_t *bwt, int intv)
|
||||||
bwt->sa_intv = intv;
|
bwt->sa_intv = intv;
|
||||||
bwt->n_sa = (bwt->seq_len + intv) / intv;
|
bwt->n_sa = (bwt->seq_len + intv) / intv;
|
||||||
bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
|
bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
|
||||||
if (bwt->sa == 0) {
|
|
||||||
fprintf(stderr, "[%s] Fail to allocate %.3fMB memory. Abort!\n", __func__, bwt->n_sa * sizeof(bwtint_t) / 1024.0/1024.0);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
// calculate SA value
|
// calculate SA value
|
||||||
isa = 0; sa = bwt->seq_len;
|
isa = 0; sa = bwt->seq_len;
|
||||||
for (i = 0; i < bwt->seq_len; ++i) {
|
for (i = 0; i < bwt->seq_len; ++i) {
|
||||||
|
|
@ -354,22 +354,24 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
fp = xopen(fn, "wb");
|
fp = xopen(fn, "wb");
|
||||||
fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
||||||
fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
||||||
fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
|
err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
|
||||||
fclose(fp);
|
err_fflush(fp);
|
||||||
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bwt_dump_sa(const char *fn, const bwt_t *bwt)
|
void bwt_dump_sa(const char *fn, const bwt_t *bwt)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
fp = xopen(fn, "wb");
|
fp = xopen(fn, "wb");
|
||||||
fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
||||||
fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
||||||
fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
|
err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
|
||||||
fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
|
err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
|
||||||
fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
|
err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
|
||||||
fclose(fp);
|
err_fflush(fp);
|
||||||
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
|
static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
|
||||||
|
|
@ -378,7 +380,7 @@ static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
|
||||||
bwtint_t offset = 0;
|
bwtint_t offset = 0;
|
||||||
while (size) {
|
while (size) {
|
||||||
int x = bufsize < size? bufsize : size;
|
int x = bufsize < size? bufsize : size;
|
||||||
if ((x = fread(a + offset, 1, x, fp)) == 0) break;
|
if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;
|
||||||
size -= x; offset += x;
|
size -= x; offset += x;
|
||||||
}
|
}
|
||||||
return offset;
|
return offset;
|
||||||
|
|
@ -391,11 +393,11 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
|
||||||
bwtint_t primary;
|
bwtint_t primary;
|
||||||
|
|
||||||
fp = xopen(fn, "rb");
|
fp = xopen(fn, "rb");
|
||||||
fread(&primary, sizeof(bwtint_t), 1, fp);
|
err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
|
||||||
xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
|
xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
|
||||||
fread(skipped, sizeof(bwtint_t), 4, fp); // skip
|
err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip
|
||||||
fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
|
err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
|
||||||
fread(&primary, sizeof(bwtint_t), 1, fp);
|
err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
|
||||||
xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
|
xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
|
||||||
|
|
||||||
bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
|
bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
|
||||||
|
|
@ -403,7 +405,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt)
|
||||||
bwt->sa[0] = -1;
|
bwt->sa[0] = -1;
|
||||||
|
|
||||||
fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
|
fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
bwt_t *bwt_restore_bwt(const char *fn)
|
bwt_t *bwt_restore_bwt(const char *fn)
|
||||||
|
|
@ -413,15 +415,15 @@ bwt_t *bwt_restore_bwt(const char *fn)
|
||||||
|
|
||||||
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
|
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
|
||||||
fp = xopen(fn, "rb");
|
fp = xopen(fn, "rb");
|
||||||
fseek(fp, 0, SEEK_END);
|
err_fseek(fp, 0, SEEK_END);
|
||||||
bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
|
bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
|
||||||
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
|
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
|
||||||
fseek(fp, 0, SEEK_SET);
|
err_fseek(fp, 0, SEEK_SET);
|
||||||
fread(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
||||||
fread(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
||||||
fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
|
fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
|
||||||
bwt->seq_len = bwt->L2[4];
|
bwt->seq_len = bwt->L2[4];
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
bwt_gen_cnt_table(bwt);
|
bwt_gen_cnt_table(bwt);
|
||||||
|
|
||||||
return bwt;
|
return bwt;
|
||||||
|
|
|
||||||
91
bwt_gen.c
91
bwt_gen.c
|
|
@ -27,8 +27,13 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <errno.h>
|
||||||
#include "QSufSort.h"
|
#include "QSufSort.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef uint64_t bgint_t;
|
typedef uint64_t bgint_t;
|
||||||
typedef int64_t sbgint_t;
|
typedef int64_t sbgint_t;
|
||||||
|
|
||||||
|
|
@ -1443,13 +1448,29 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
|
||||||
packedFile = (FILE*)fopen(inputFileName, "rb");
|
packedFile = (FILE*)fopen(inputFileName, "rb");
|
||||||
|
|
||||||
if (packedFile == NULL) {
|
if (packedFile == NULL) {
|
||||||
fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n");
|
fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open %s : %s\n",
|
||||||
|
inputFileName, strerror(errno));
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fseek(packedFile, -1, SEEK_END);
|
if (fseek(packedFile, -1, SEEK_END) != 0) {
|
||||||
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
|
||||||
|
inputFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
packedFileLen = ftell(packedFile);
|
packedFileLen = ftell(packedFile);
|
||||||
fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
|
if (packedFileLen == -1) {
|
||||||
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't ftell on %s : %s\n",
|
||||||
|
inputFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fread(&lastByteLength, sizeof(unsigned char), 1, packedFile) != 1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
|
||||||
|
inputFileName,
|
||||||
|
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
|
totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
|
||||||
|
|
||||||
bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
|
bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
|
||||||
|
|
@ -1463,10 +1484,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
|
||||||
}
|
}
|
||||||
textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte
|
textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte
|
||||||
|
|
||||||
fseek(packedFile, -2, SEEK_CUR);
|
if (fseek(packedFile, -((long)textSizeInByte + 2), SEEK_CUR) != 0) {
|
||||||
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
|
||||||
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile);
|
inputFileName, strerror(errno));
|
||||||
fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR);
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile) != textSizeInByte + 1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
|
||||||
|
inputFileName,
|
||||||
|
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR) != 0) {
|
||||||
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
|
||||||
|
inputFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
|
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
|
||||||
BWTIncConstruct(bwtInc, textToLoad);
|
BWTIncConstruct(bwtInc, textToLoad);
|
||||||
|
|
@ -1479,9 +1513,23 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
|
||||||
textToLoad = totalTextLength - processedTextLength;
|
textToLoad = totalTextLength - processedTextLength;
|
||||||
}
|
}
|
||||||
textSizeInByte = textToLoad / CHAR_PER_BYTE;
|
textSizeInByte = textToLoad / CHAR_PER_BYTE;
|
||||||
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
|
if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {
|
||||||
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile);
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
|
||||||
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
|
inputFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile) != textSizeInByte) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"BWTIncConstructFromPacked() : Can't read from %s : %s\n",
|
||||||
|
inputFileName,
|
||||||
|
ferror(packedFile)? strerror(errno) : "Unexpected end of file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {
|
||||||
|
fprintf(stderr, "BWTIncConstructFromPacked() : Can't seek on %s : %s\n",
|
||||||
|
inputFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
|
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
|
||||||
BWTIncConstruct(bwtInc, textToLoad);
|
BWTIncConstruct(bwtInc, textToLoad);
|
||||||
processedTextLength += textToLoad;
|
processedTextLength += textToLoad;
|
||||||
|
|
@ -1526,15 +1574,28 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
|
||||||
|
|
||||||
bwtFile = (FILE*)fopen(bwtFileName, "wb");
|
bwtFile = (FILE*)fopen(bwtFileName, "wb");
|
||||||
if (bwtFile == NULL) {
|
if (bwtFile == NULL) {
|
||||||
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n");
|
fprintf(stderr,
|
||||||
|
"BWTSaveBwtCodeAndOcc(): Cannot open %s for writing: %s\n",
|
||||||
|
bwtFileName, strerror(errno));
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile);
|
|
||||||
fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile);
|
|
||||||
bwtLength = BWTFileSizeInWord(bwt->textLength);
|
bwtLength = BWTFileSizeInWord(bwt->textLength);
|
||||||
fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile);
|
|
||||||
fclose(bwtFile);
|
if (fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile) != 1
|
||||||
|
|| fwrite(bwt->cumulativeFreq + 1,
|
||||||
|
sizeof(bgint_t), ALPHABET_SIZE, bwtFile) != ALPHABET_SIZE
|
||||||
|
|| fwrite(bwt->bwtCode,
|
||||||
|
sizeof(unsigned int), bwtLength, bwtFile) != bwtLength) {
|
||||||
|
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error writing to %s : %s\n",
|
||||||
|
bwtFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fclose(bwtFile) != 0) {
|
||||||
|
fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error on closing %s : %s\n",
|
||||||
|
bwtFileName, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,10 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "bwt_lite.h"
|
#include "bwt_lite.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
int is_sa(const uint8_t *T, uint32_t *SA, int n);
|
int is_sa(const uint8_t *T, uint32_t *SA, int n);
|
||||||
int is_bwt(uint8_t *T, int n);
|
int is_bwt(uint8_t *T, int n);
|
||||||
|
|
||||||
|
|
|
||||||
6
bwtaln.c
6
bwtaln.c
|
|
@ -17,6 +17,10 @@
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
gap_opt_t *gap_init_opt()
|
gap_opt_t *gap_init_opt()
|
||||||
{
|
{
|
||||||
gap_opt_t *o;
|
gap_opt_t *o;
|
||||||
|
|
@ -306,7 +310,7 @@ int bwa_aln(int argc, char *argv[])
|
||||||
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
fprintf(stderr, "[%s] fail to locate the index\n", __func__);
|
||||||
free(opt);
|
free(opt);
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
bwa_aln_core(prefix, argv[optind+1], opt);
|
bwa_aln_core(prefix, argv[optind+1], opt);
|
||||||
free(opt); free(prefix);
|
free(opt); free(prefix);
|
||||||
|
|
|
||||||
4
bwtgap.c
4
bwtgap.c
|
|
@ -4,6 +4,10 @@
|
||||||
#include "bwtgap.h"
|
#include "bwtgap.h"
|
||||||
#include "bwtaln.h"
|
#include "bwtaln.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define STATE_M 0
|
#define STATE_M 0
|
||||||
#define STATE_I 1
|
#define STATE_I 1
|
||||||
#define STATE_D 2
|
#define STATE_D 2
|
||||||
|
|
|
||||||
21
bwtindex.c
21
bwtindex.c
|
|
@ -39,6 +39,11 @@
|
||||||
#include "divsufsort.h"
|
#include "divsufsort.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int is_bwt(ubyte_t *T, int n);
|
int is_bwt(ubyte_t *T, int n);
|
||||||
|
|
||||||
int64_t bwa_seq_len(const char *fn_pac)
|
int64_t bwa_seq_len(const char *fn_pac)
|
||||||
|
|
@ -47,10 +52,10 @@ int64_t bwa_seq_len(const char *fn_pac)
|
||||||
int64_t pac_len;
|
int64_t pac_len;
|
||||||
ubyte_t c;
|
ubyte_t c;
|
||||||
fp = xopen(fn_pac, "rb");
|
fp = xopen(fn_pac, "rb");
|
||||||
fseek(fp, -1, SEEK_END);
|
err_fseek(fp, -1, SEEK_END);
|
||||||
pac_len = ftell(fp);
|
pac_len = err_ftell(fp);
|
||||||
fread(&c, 1, 1, fp);
|
err_fread_noeof(&c, 1, 1, fp);
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
return (pac_len - 1) * 4 + (int)c;
|
return (pac_len - 1) * 4 + (int)c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -70,8 +75,8 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
|
||||||
// prepare sequence
|
// prepare sequence
|
||||||
pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
|
pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
|
||||||
buf2 = (ubyte_t*)calloc(pac_size, 1);
|
buf2 = (ubyte_t*)calloc(pac_size, 1);
|
||||||
fread(buf2, 1, pac_size, fp);
|
err_fread_noeof(buf2, 1, pac_size, fp);
|
||||||
fclose(fp);
|
err_fclose(fp);
|
||||||
memset(bwt->L2, 0, 5 * 4);
|
memset(bwt->L2, 0, 5 * 4);
|
||||||
buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
|
buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
|
||||||
for (i = 0; i < bwt->seq_len; ++i) {
|
for (i = 0; i < bwt->seq_len; ++i) {
|
||||||
|
|
@ -229,7 +234,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||||
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
}
|
}
|
||||||
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
||||||
{
|
{
|
||||||
|
|
@ -263,7 +268,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
||||||
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
|
|
|
||||||
21
bwtsw2_aux.c
21
bwtsw2_aux.c
|
|
@ -22,6 +22,11 @@ KSEQ_DECLARE(gzFile)
|
||||||
#define __left_lt(a, b) ((a).end > (b).end)
|
#define __left_lt(a, b) ((a).end > (b).end)
|
||||||
KSORT_INIT(hit, bsw2hit_t, __left_lt)
|
KSORT_INIT(hit, bsw2hit_t, __left_lt)
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
||||||
unsigned char nt_comp_table[256] = {
|
unsigned char nt_comp_table[256] = {
|
||||||
|
|
@ -710,12 +715,12 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *
|
||||||
// print and reset
|
// print and reset
|
||||||
for (i = 0; i < _seq->n; ++i) {
|
for (i = 0; i < _seq->n; ++i) {
|
||||||
bsw2seq1_t *p = _seq->seq + i;
|
bsw2seq1_t *p = _seq->seq + i;
|
||||||
if (p->sam) printf("%s", p->sam);
|
if (p->sam) err_printf("%s", p->sam);
|
||||||
free(p->name); free(p->seq); free(p->qual); free(p->sam);
|
free(p->name); free(p->seq); free(p->qual); free(p->sam);
|
||||||
p->tid = -1; p->l = 0;
|
p->tid = -1; p->l = 0;
|
||||||
p->name = p->seq = p->qual = p->sam = 0;
|
p->name = p->seq = p->qual = p->sam = 0;
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
err_fflush(stdout);
|
||||||
_seq->n = 0;
|
_seq->n = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -729,13 +734,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
||||||
bseq1_t *bseq;
|
bseq1_t *bseq;
|
||||||
|
|
||||||
pac = calloc(bns->l_pac/4+1, 1);
|
pac = calloc(bns->l_pac/4+1, 1);
|
||||||
if (pac == 0) {
|
|
||||||
fprintf(stderr, "[bsw2_aln] insufficient memory!\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (l = 0; l < bns->n_seqs; ++l)
|
for (l = 0; l < bns->n_seqs; ++l)
|
||||||
printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
|
err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
|
||||||
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||||
fp = xzopen(fn, "r");
|
fp = xzopen(fn, "r");
|
||||||
ks = kseq_init(fp);
|
ks = kseq_init(fp);
|
||||||
_seq = calloc(1, sizeof(bsw2seq_t));
|
_seq = calloc(1, sizeof(bsw2seq_t));
|
||||||
|
|
@ -767,9 +768,9 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
||||||
free(pac);
|
free(pac);
|
||||||
free(_seq->seq); free(_seq);
|
free(_seq->seq); free(_seq);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
if (fn2) {
|
if (fn2) {
|
||||||
kseq_destroy(ks2);
|
kseq_destroy(ks2);
|
||||||
gzclose(fp2);
|
err_gzclose(fp2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "bwtsw2.h"
|
#include "bwtsw2.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t tbeg, tend;
|
uint32_t tbeg, tend;
|
||||||
int qbeg, qend;
|
int qbeg, qend;
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,10 @@
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
bwtint_t k, l;
|
bwtint_t k, l;
|
||||||
} qintv_t;
|
} qintv_t;
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
||||||
case 'S': opt->skip_sw = 1; break;
|
case 'S': opt->skip_sw = 1; break;
|
||||||
case 'C': opt->cpy_cmt = 1; break;
|
case 'C': opt->cpy_cmt = 1; break;
|
||||||
case 'G': opt->max_chain_gap = atoi(optarg); break;
|
case 'G': opt->max_chain_gap = atoi(optarg); break;
|
||||||
|
default: return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opt->qr = opt->q + opt->r;
|
opt->qr = opt->q + opt->r;
|
||||||
|
|
@ -79,7 +80,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
||||||
opt->t *= opt->a;
|
opt->t *= opt->a;
|
||||||
opt->coef *= opt->a;
|
opt->coef *= opt->a;
|
||||||
|
|
||||||
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 0;
|
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
|
||||||
bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
|
bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
free(opt);
|
free(opt);
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,17 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include "utils.h"
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "bwtsw2.h"
|
#include "bwtsw2.h"
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
#include "utils.h"
|
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MIN_RATIO 0.8
|
#define MIN_RATIO 0.8
|
||||||
#define OUTLIER_BOUND 2.0
|
#define OUTLIER_BOUND 2.0
|
||||||
#define MAX_STDDEV 4.0
|
#define MAX_STDDEV 4.0
|
||||||
|
|
|
||||||
25
example.c
25
example.c
|
|
@ -1,11 +1,16 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kseq.h" // for the FASTA/Q parser
|
#include "kseq.h" // for the FASTA/Q parser
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
bwaidx_t *idx;
|
bwaidx_t *idx;
|
||||||
|
|
@ -19,9 +24,17 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index
|
idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index
|
||||||
assert(idx);
|
if (NULL == idx) {
|
||||||
|
fprintf(stderr, "Index load failed.\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
|
fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
|
||||||
assert(fp);
|
if (NULL == fp) {
|
||||||
|
fprintf(stderr, "Couldn't open %s : %s\n",
|
||||||
|
strcmp(argv[2], "-") ? argv[2] : "stdin",
|
||||||
|
errno ? strerror(errno) : "Out of memory");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
ks = kseq_init(fp); // initialize the FASTA/Q parser
|
ks = kseq_init(fp); // initialize the FASTA/Q parser
|
||||||
opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values
|
opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values
|
||||||
|
|
||||||
|
|
@ -34,10 +47,10 @@ int main(int argc, char *argv[])
|
||||||
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
if (ar.a[i].secondary >= 0) continue; // skip secondary alignments
|
||||||
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR
|
||||||
// print alignment
|
// print alignment
|
||||||
printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq);
|
err_printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq);
|
||||||
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
for (k = 0; k < a.n_cigar; ++k) // print CIGAR
|
||||||
printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
|
err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]);
|
||||||
printf("\t%d\n", a.NM); // print edit distance
|
err_printf("\t%d\n", a.NM); // print edit distance
|
||||||
free(a.cigar); // don't forget to deallocate CIGAR
|
free(a.cigar); // don't forget to deallocate CIGAR
|
||||||
}
|
}
|
||||||
free(ar.a); // and deallocate the hit list
|
free(ar.a); // and deallocate the hit list
|
||||||
|
|
@ -45,7 +58,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
free(opt);
|
free(opt);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
33
fastmap.c
33
fastmap.c
|
|
@ -7,6 +7,7 @@
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
|
#include "utils.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
@ -51,6 +52,7 @@ int main_mem(int argc, char *argv[])
|
||||||
else if (c == 'R') {
|
else if (c == 'R') {
|
||||||
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
|
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
|
||||||
} else if (c == 's') opt->split_width = atoi(optarg);
|
} else if (c == 's') opt->split_width = atoi(optarg);
|
||||||
|
else return 1;
|
||||||
}
|
}
|
||||||
if (opt->n_threads < 1) opt->n_threads = 1;
|
if (opt->n_threads < 1) opt->n_threads = 1;
|
||||||
if (optind + 1 >= argc) {
|
if (optind + 1 >= argc) {
|
||||||
|
|
@ -130,7 +132,7 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size);
|
||||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
|
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs, 0);
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
fputs(seqs[i].sam, stdout);
|
err_fputs(seqs[i].sam, stdout);
|
||||||
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
|
||||||
}
|
}
|
||||||
free(seqs);
|
free(seqs);
|
||||||
|
|
@ -139,10 +141,10 @@ int main_mem(int argc, char *argv[])
|
||||||
free(opt);
|
free(opt);
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp); kclose(ko);
|
err_gzclose(fp); kclose(ko);
|
||||||
if (ks2) {
|
if (ks2) {
|
||||||
kseq_destroy(ks2);
|
kseq_destroy(ks2);
|
||||||
gzclose(fp2); kclose(ko2);
|
err_gzclose(fp2); kclose(ko2);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -163,6 +165,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
case 'p': print_seq = 1; break;
|
case 'p': print_seq = 1; break;
|
||||||
case 'w': min_iwidth = atoi(optarg); break;
|
case 'w': min_iwidth = atoi(optarg); break;
|
||||||
case 'l': min_len = atoi(optarg); break;
|
case 'l': min_len = atoi(optarg); break;
|
||||||
|
default: return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (optind + 1 >= argc) {
|
if (optind + 1 >= argc) {
|
||||||
|
|
@ -170,16 +173,16 @@ int main_fastmap(int argc, char *argv[])
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fp = gzopen(argv[optind + 1], "r");
|
fp = xzopen(argv[optind + 1], "r");
|
||||||
seq = kseq_init(fp);
|
seq = kseq_init(fp);
|
||||||
idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS);
|
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
|
||||||
itr = smem_itr_init(idx->bwt);
|
itr = smem_itr_init(idx->bwt);
|
||||||
while (kseq_read(seq) >= 0) {
|
while (kseq_read(seq) >= 0) {
|
||||||
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
err_printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
||||||
if (print_seq) {
|
if (print_seq) {
|
||||||
putchar('\t');
|
err_putchar('\t');
|
||||||
puts(seq->seq.s);
|
err_puts(seq->seq.s);
|
||||||
} else putchar('\n');
|
} else err_putchar('\n');
|
||||||
for (i = 0; i < seq->seq.l; ++i)
|
for (i = 0; i < seq->seq.l; ++i)
|
||||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
|
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||||
|
|
@ -187,7 +190,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
for (i = 0; i < a->n; ++i) {
|
for (i = 0; i < a->n; ++i) {
|
||||||
bwtintv_t *p = &a->a[i];
|
bwtintv_t *p = &a->a[i];
|
||||||
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
||||||
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
err_printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
||||||
if (p->x[2] <= min_iwidth) {
|
if (p->x[2] <= min_iwidth) {
|
||||||
for (k = 0; k < p->x[2]; ++k) {
|
for (k = 0; k < p->x[2]; ++k) {
|
||||||
bwtint_t pos;
|
bwtint_t pos;
|
||||||
|
|
@ -196,18 +199,18 @@ int main_fastmap(int argc, char *argv[])
|
||||||
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
|
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
|
||||||
if (is_rev) pos -= len - 1;
|
if (is_rev) pos -= len - 1;
|
||||||
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
|
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
|
||||||
printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
|
err_printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
|
||||||
}
|
}
|
||||||
} else fputs("\t*", stdout);
|
} else err_puts("\t*");
|
||||||
putchar('\n');
|
err_putchar('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
puts("//");
|
err_puts("//");
|
||||||
}
|
}
|
||||||
|
|
||||||
smem_itr_destroy(itr);
|
smem_itr_destroy(itr);
|
||||||
bwa_idx_destroy(idx);
|
bwa_idx_destroy(idx);
|
||||||
kseq_destroy(seq);
|
kseq_destroy(seq);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
7
is.c
7
is.c
|
|
@ -26,6 +26,10 @@
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef unsigned char ubyte_t;
|
typedef unsigned char ubyte_t;
|
||||||
#define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i])
|
#define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i])
|
||||||
|
|
||||||
|
|
@ -205,7 +209,8 @@ int is_bwt(ubyte_t *T, int n)
|
||||||
{
|
{
|
||||||
int *SA, i, primary = 0;
|
int *SA, i, primary = 0;
|
||||||
SA = (int*)calloc(n+1, sizeof(int));
|
SA = (int*)calloc(n+1, sizeof(int));
|
||||||
is_sa(T, SA, n);
|
|
||||||
|
if (is_sa(T, SA, n)) return -1;
|
||||||
|
|
||||||
for (i = 0; i <= n; ++i) {
|
for (i = 0; i <= n; ++i) {
|
||||||
if (SA[i] == 0) primary = i;
|
if (SA[i] == 0) primary = i;
|
||||||
|
|
|
||||||
4
kbtree.h
4
kbtree.h
|
|
@ -32,6 +32,10 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int32_t is_internal:1, n:31;
|
int32_t is_internal:1, n:31;
|
||||||
} kbnode_t;
|
} kbnode_t;
|
||||||
|
|
|
||||||
4
khash.h
4
khash.h
|
|
@ -116,6 +116,10 @@ int main() {
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* compipler specific configuration */
|
/* compipler specific configuration */
|
||||||
|
|
||||||
#if UINT_MAX == 0xffffffffu
|
#if UINT_MAX == 0xffffffffu
|
||||||
|
|
|
||||||
45
kopen.c
45
kopen.c
|
|
@ -14,6 +14,10 @@
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define _KO_NO_NET
|
#define _KO_NO_NET
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -54,10 +58,26 @@ static int socket_connect(const char *host, const char *port)
|
||||||
#undef __err_connect
|
#undef __err_connect
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int write_bytes(int fd, const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
ssize_t bytes;
|
||||||
|
do {
|
||||||
|
bytes = write(fd, buf, len);
|
||||||
|
if (bytes >= 0) {
|
||||||
|
len -= bytes;
|
||||||
|
} else if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} while (len > 0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int http_open(const char *fn)
|
static int http_open(const char *fn)
|
||||||
{
|
{
|
||||||
char *p, *proxy, *q, *http_host, *host, *port, *path, *buf;
|
char *p, *proxy, *q, *http_host, *host, *port, *path, *buf;
|
||||||
int fd, ret, l;
|
int fd, ret, l;
|
||||||
|
ssize_t bytes = 0, bufsz = 0x10000;
|
||||||
|
|
||||||
/* parse URL; adapted from khttp_parse_url() in knetfile.c */
|
/* parse URL; adapted from khttp_parse_url() in knetfile.c */
|
||||||
if (strstr(fn, "http://") != fn) return 0;
|
if (strstr(fn, "http://") != fn) return 0;
|
||||||
|
|
@ -87,26 +107,35 @@ static int http_open(const char *fn)
|
||||||
/* connect; adapted from khttp_connect() in knetfile.c */
|
/* connect; adapted from khttp_connect() in knetfile.c */
|
||||||
l = 0;
|
l = 0;
|
||||||
fd = socket_connect(host, port);
|
fd = socket_connect(host, port);
|
||||||
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
buf = calloc(bufsz, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
||||||
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", path, http_host);
|
l += snprintf(buf + l, bufsz, "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
|
||||||
l += sprintf(buf + l, "\r\n");
|
path, http_host);
|
||||||
write(fd, buf, l);
|
if (write_bytes(fd, buf, l) != 0) {
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
l = 0;
|
l = 0;
|
||||||
while (read(fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
|
retry:
|
||||||
|
while (l < bufsz && (bytes = read(fd, buf + l, 1)) > 0) { // read HTTP header; FIXME: bad efficiency
|
||||||
if (buf[l] == '\n' && l >= 3)
|
if (buf[l] == '\n' && l >= 3)
|
||||||
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
||||||
++l;
|
++l;
|
||||||
}
|
}
|
||||||
|
if (bytes < 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) goto retry;
|
||||||
|
|
||||||
buf[l] = 0;
|
buf[l] = 0;
|
||||||
if (l < 14) { // prematured header
|
if (bytes < 0 || l < 14) { // prematured header
|
||||||
close(fd);
|
close(fd);
|
||||||
fd = -1;
|
fd = -1;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
ret = strtol(buf + 8, &p, 0); // HTTP return code
|
ret = strtol(buf + 8, &p, 0); // HTTP return code
|
||||||
if (ret != 200) {
|
if (ret != 200) {
|
||||||
close(fd);
|
close(fd);
|
||||||
fd = -1;
|
fd = -1;
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
free(buf); free(http_host); free(host); free(port); free(path);
|
free(buf); free(http_host); free(host); free(port); free(path);
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
@ -143,7 +172,7 @@ static int kftp_get_response(ftpaux_t *aux)
|
||||||
static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get)
|
static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get)
|
||||||
{
|
{
|
||||||
if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
||||||
write(aux->ctrl_fd, cmd, strlen(cmd));
|
if (write_bytes(aux->ctrl_fd, cmd, strlen(cmd)) != 0) return -1;
|
||||||
return is_get? kftp_get_response(aux) : 0;
|
return is_get? kftp_get_response(aux) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -262,7 +291,7 @@ void *kopen(const char *fn, int *_fd)
|
||||||
if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':')
|
if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':')
|
||||||
break;
|
break;
|
||||||
need_shell = (*q != 0);
|
need_shell = (*q != 0);
|
||||||
pipe(pfd);
|
if (pipe(pfd) != 0) return 0;
|
||||||
pid = vfork();
|
pid = vfork();
|
||||||
if (pid == -1) { /* vfork() error */
|
if (pid == -1) { /* vfork() error */
|
||||||
close(pfd[0]); close(pfd[1]);
|
close(pfd[0]); close(pfd[1]);
|
||||||
|
|
|
||||||
12
kseq.h
12
kseq.h
|
|
@ -32,6 +32,10 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
|
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
|
||||||
#define KS_SEP_TAB 1 // isspace() && !' '
|
#define KS_SEP_TAB 1 // isspace() && !' '
|
||||||
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
|
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
|
||||||
|
|
@ -50,9 +54,9 @@
|
||||||
#define __KS_BASIC(type_t, __bufsize) \
|
#define __KS_BASIC(type_t, __bufsize) \
|
||||||
static inline kstream_t *ks_init(type_t f) \
|
static inline kstream_t *ks_init(type_t f) \
|
||||||
{ \
|
{ \
|
||||||
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
|
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
|
||||||
ks->f = f; \
|
ks->f = f; \
|
||||||
ks->buf = (unsigned char*)malloc(__bufsize); \
|
ks->buf = (unsigned char*)malloc(__bufsize); \
|
||||||
return ks; \
|
return ks; \
|
||||||
} \
|
} \
|
||||||
static inline void ks_destroy(kstream_t *ks) \
|
static inline void ks_destroy(kstream_t *ks) \
|
||||||
|
|
@ -120,7 +124,7 @@ typedef struct __kstring_t {
|
||||||
if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
|
if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
|
||||||
str->m = str->l + (i - ks->begin) + 1; \
|
str->m = str->l + (i - ks->begin) + 1; \
|
||||||
kroundup32(str->m); \
|
kroundup32(str->m); \
|
||||||
str->s = (char*)realloc(str->s, str->m); \
|
str->s = (char*)realloc(str->s, str->m); \
|
||||||
} \
|
} \
|
||||||
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
|
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
|
||||||
str->l = str->l + (i - ks->begin); \
|
str->l = str->l + (i - ks->begin); \
|
||||||
|
|
@ -151,7 +155,7 @@ typedef struct __kstring_t {
|
||||||
#define __KSEQ_BASIC(SCOPE, type_t) \
|
#define __KSEQ_BASIC(SCOPE, type_t) \
|
||||||
SCOPE kseq_t *kseq_init(type_t fd) \
|
SCOPE kseq_t *kseq_init(type_t fd) \
|
||||||
{ \
|
{ \
|
||||||
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
|
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
|
||||||
s->f = ks_init(fd); \
|
s->f = ks_init(fd); \
|
||||||
return s; \
|
return s; \
|
||||||
} \
|
} \
|
||||||
|
|
|
||||||
6
ksort.h
6
ksort.h
|
|
@ -58,6 +58,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
void *left, *right;
|
void *left, *right;
|
||||||
int depth;
|
int depth;
|
||||||
|
|
@ -72,7 +76,7 @@ typedef struct {
|
||||||
int curr, shift; \
|
int curr, shift; \
|
||||||
\
|
\
|
||||||
a2[0] = array; \
|
a2[0] = array; \
|
||||||
a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
|
a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
|
||||||
for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
|
for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
|
||||||
a = a2[curr]; b = a2[1-curr]; \
|
a = a2[curr]; b = a2[1-curr]; \
|
||||||
if (shift == 0) { \
|
if (shift == 0) { \
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
int ksprintf(kstring_t *s, const char *fmt, ...)
|
int ksprintf(kstring_t *s, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
va_list ap;
|
va_list ap;
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef kroundup32
|
#ifndef kroundup32
|
||||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
18
ksw.c
18
ksw.c
|
|
@ -28,6 +28,10 @@
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define LIKELY(x) __builtin_expect((x),1)
|
#define LIKELY(x) __builtin_expect((x),1)
|
||||||
#define UNLIKELY(x) __builtin_expect((x),0)
|
#define UNLIKELY(x) __builtin_expect((x),0)
|
||||||
|
|
@ -553,7 +557,7 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_INIT(gzFile, gzread)
|
KSEQ_INIT(gzFile, err_gzread)
|
||||||
|
|
||||||
unsigned char seq_nt4_table[256] = {
|
unsigned char seq_nt4_table[256] = {
|
||||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
|
@ -609,8 +613,8 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||||
// open file
|
// open file
|
||||||
fpt = gzopen(argv[optind], "r"); kst = kseq_init(fpt);
|
fpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt);
|
||||||
fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
|
fpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
|
||||||
// all-pair alignment
|
// all-pair alignment
|
||||||
while (kseq_read(ksq) > 0) {
|
while (kseq_read(ksq) > 0) {
|
||||||
kswq_t *q[2] = {0, 0};
|
kswq_t *q[2] = {0, 0};
|
||||||
|
|
@ -629,18 +633,18 @@ int main(int argc, char *argv[])
|
||||||
for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
|
for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
|
||||||
r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
|
r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
|
||||||
if (r.score >= minsc)
|
if (r.score >= minsc)
|
||||||
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
|
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
|
||||||
if (rseq) {
|
if (rseq) {
|
||||||
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
|
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
|
||||||
if (r.score >= minsc)
|
if (r.score >= minsc)
|
||||||
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
|
err_printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(q[0]); free(q[1]);
|
free(q[0]); free(q[1]);
|
||||||
}
|
}
|
||||||
free(rseq);
|
free(rseq);
|
||||||
kseq_destroy(kst); gzclose(fpt);
|
kseq_destroy(kst); err_gzclose(fpt);
|
||||||
kseq_destroy(ksq); gzclose(fpq);
|
kseq_destroy(ksq); err_gzclose(fpq);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
6
kvec.h
6
kvec.h
|
|
@ -50,6 +50,10 @@ int main() {
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
|
||||||
#define kvec_t(type) struct { size_t n, m; type *a; }
|
#define kvec_t(type) struct { size_t n, m; type *a; }
|
||||||
|
|
@ -71,7 +75,7 @@ int main() {
|
||||||
#define kv_push(type, v, x) do { \
|
#define kv_push(type, v, x) do { \
|
||||||
if ((v).n == (v).m) { \
|
if ((v).n == (v).m) { \
|
||||||
(v).m = (v).m? (v).m<<1 : 2; \
|
(v).m = (v).m? (v).m<<1 : 2; \
|
||||||
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
|
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
|
||||||
} \
|
} \
|
||||||
(v).a[(v).n++] = (x); \
|
(v).a[(v).n++] = (x); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -55,7 +55,7 @@ static int usage()
|
||||||
|
|
||||||
void bwa_print_sam_PG()
|
void bwa_print_sam_PG()
|
||||||
{
|
{
|
||||||
printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION);
|
err_printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
/* Don't wrap ourselves */
|
||||||
|
# undef USE_MALLOC_WRAPPERS
|
||||||
|
#endif
|
||||||
|
#include "malloc_wrap.h"
|
||||||
|
|
||||||
|
void *wrap_calloc(size_t nmemb, size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func) {
|
||||||
|
void *p = calloc(nmemb, size);
|
||||||
|
if (NULL == p) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||||
|
func, nmemb * size, file, line, strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *wrap_malloc(size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func) {
|
||||||
|
void *p = malloc(size);
|
||||||
|
if (NULL == p) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||||
|
func, size, file, line, strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *wrap_realloc(void *ptr, size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func) {
|
||||||
|
void *p = realloc(ptr, size);
|
||||||
|
if (NULL == p) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||||
|
func, size, file, line, strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *wrap_strdup(const char *s,
|
||||||
|
const char *file, unsigned int line, const char *func) {
|
||||||
|
char *p = strdup(s);
|
||||||
|
if (NULL == p) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||||
|
func, strlen(s), file, line, strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
#ifndef MALLOC_WRAP_H
|
||||||
|
#define MALLOC_WRAP_H
|
||||||
|
|
||||||
|
#include <stdlib.h> /* Avoid breaking the usual definitions */
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *wrap_calloc(size_t nmemb, size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func);
|
||||||
|
void *wrap_malloc(size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func);
|
||||||
|
void *wrap_realloc(void *ptr, size_t size,
|
||||||
|
const char *file, unsigned int line, const char *func);
|
||||||
|
char *wrap_strdup(const char *s,
|
||||||
|
const char *file, unsigned int line, const char *func);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# ifdef calloc
|
||||||
|
# undef calloc
|
||||||
|
# endif
|
||||||
|
# define calloc(n, s) wrap_calloc( (n), (s), __FILE__, __LINE__, __func__)
|
||||||
|
|
||||||
|
# ifdef malloc
|
||||||
|
# undef malloc
|
||||||
|
# endif
|
||||||
|
# define malloc(s) wrap_malloc( (s), __FILE__, __LINE__, __func__)
|
||||||
|
|
||||||
|
# ifdef realloc
|
||||||
|
# undef realloc
|
||||||
|
# endif
|
||||||
|
# define realloc(p, s) wrap_realloc((p), (s), __FILE__, __LINE__, __func__)
|
||||||
|
|
||||||
|
# ifdef strdup
|
||||||
|
# undef strdup
|
||||||
|
# endif
|
||||||
|
# define strdup(s) wrap_strdup( (s), __FILE__, __LINE__, __func__)
|
||||||
|
|
||||||
|
#endif /* USE_MALLOC_WRAPPERS */
|
||||||
|
|
||||||
|
#endif /* MALLOC_WRAP_H */
|
||||||
38
pemerge.c
38
pemerge.c
|
|
@ -4,12 +4,18 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
#include <errno.h>
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
#include "bwa.h"
|
#include "bwa.h"
|
||||||
|
#include "utils.h"
|
||||||
KSEQ_DECLARE(gzFile)
|
KSEQ_DECLARE(gzFile)
|
||||||
|
|
||||||
|
#ifdef USE_MALLOC_WRAPPERS
|
||||||
|
# include "malloc_wrap.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MAX_SCORE_RATIO 0.9f
|
#define MAX_SCORE_RATIO 0.9f
|
||||||
#define MAX_ERR 8
|
#define MAX_ERR 8
|
||||||
|
|
||||||
|
|
@ -140,14 +146,14 @@ pem_ret:
|
||||||
|
|
||||||
static inline void print_bseq(const bseq1_t *s, int rn)
|
static inline void print_bseq(const bseq1_t *s, int rn)
|
||||||
{
|
{
|
||||||
putchar(s->qual? '@' : '>');
|
err_putchar(s->qual? '@' : '>');
|
||||||
fputs(s->name, stdout);
|
err_fputs(s->name, stdout);
|
||||||
if (rn == 1 || rn == 2) {
|
if (rn == 1 || rn == 2) {
|
||||||
putchar('/'); putchar('0' + rn); putchar('\n');
|
err_putchar('/'); err_putchar('0' + rn); err_putchar('\n');
|
||||||
} else puts(" merged");
|
} else err_puts(" merged");
|
||||||
puts(s->seq);
|
err_puts(s->seq);
|
||||||
if (s->qual) {
|
if (s->qual) {
|
||||||
puts("+"); puts(s->qual);
|
err_puts("+"); err_puts(s->qual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -224,6 +230,7 @@ int main_pemerge(int argc, char *argv[])
|
||||||
else if (c == 'Q') opt->q_thres = atoi(optarg);
|
else if (c == 'Q') opt->q_thres = atoi(optarg);
|
||||||
else if (c == 't') opt->n_threads = atoi(optarg);
|
else if (c == 't') opt->n_threads = atoi(optarg);
|
||||||
else if (c == 'T') min_ovlp = atoi(optarg);
|
else if (c == 'T') min_ovlp = atoi(optarg);
|
||||||
|
else return 1;
|
||||||
}
|
}
|
||||||
if (flag == 0) flag = 3;
|
if (flag == 0) flag = 3;
|
||||||
opt->flag = flag;
|
opt->flag = flag;
|
||||||
|
|
@ -243,9 +250,21 @@ int main_pemerge(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
|
fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
|
||||||
|
if (NULL == fp) {
|
||||||
|
fprintf(stderr, "Couldn't open %s : %s\n",
|
||||||
|
strcmp(argv[optind], "-") ? argv[optind] : "stdin",
|
||||||
|
errno ? strerror(errno) : "Out of memory");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
ks = kseq_init(fp);
|
ks = kseq_init(fp);
|
||||||
if (optind + 1 < argc) {
|
if (optind + 1 < argc) {
|
||||||
fp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r");
|
fp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r");
|
||||||
|
if (NULL == fp) {
|
||||||
|
fprintf(stderr, "Couldn't open %s : %s\n",
|
||||||
|
strcmp(argv[optind+1], "-") ? argv[optind+1] : "stdin",
|
||||||
|
errno ? strerror(errno) : "Out of memory");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
ks2 = kseq_init(fp2);
|
ks2 = kseq_init(fp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -259,11 +278,14 @@ int main_pemerge(int argc, char *argv[])
|
||||||
for (i = 1; i <= MAX_ERR; ++i)
|
for (i = 1; i <= MAX_ERR; ++i)
|
||||||
fprintf(stderr, "%12ld %s\n", (long)cnt[i], err_msg[i]);
|
fprintf(stderr, "%12ld %s\n", (long)cnt[i], err_msg[i]);
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp);
|
err_gzclose(fp);
|
||||||
if (ks2) {
|
if (ks2) {
|
||||||
kseq_destroy(ks2);
|
kseq_destroy(ks2);
|
||||||
gzclose(fp2);
|
err_gzclose(fp2);
|
||||||
}
|
}
|
||||||
free(opt);
|
free(opt);
|
||||||
|
|
||||||
|
err_fflush(stdout);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
157
utils.c
157
utils.c
|
|
@ -24,6 +24,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
||||||
|
#define FSYNC_ON_FLUSH
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
@ -31,6 +32,11 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#ifdef FSYNC_ON_FLUSH
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
@ -41,7 +47,7 @@ KSORT_INIT(128, pair64_t, pair64_lt)
|
||||||
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
||||||
|
|
||||||
#include "kseq.h"
|
#include "kseq.h"
|
||||||
KSEQ_INIT2(, gzFile, gzread)
|
KSEQ_INIT2(, gzFile, err_gzread)
|
||||||
|
|
||||||
/********************
|
/********************
|
||||||
* System utilities *
|
* System utilities *
|
||||||
|
|
@ -53,8 +59,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
||||||
if (strcmp(fn, "-") == 0)
|
if (strcmp(fn, "-") == 0)
|
||||||
return (strstr(mode, "r"))? stdin : stdout;
|
return (strstr(mode, "r"))? stdin : stdout;
|
||||||
if ((fp = fopen(fn, mode)) == 0) {
|
if ((fp = fopen(fn, mode)) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno));
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
@ -62,10 +67,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
||||||
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
||||||
{
|
{
|
||||||
if (freopen(fn, mode, fp) == 0) {
|
if (freopen(fn, mode, fp) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
|
err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno));
|
||||||
perror(NULL);
|
|
||||||
fprintf(stderr, "Abort!\n");
|
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
@ -73,16 +75,30 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE
|
||||||
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
||||||
{
|
{
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
if (strcmp(fn, "-") == 0)
|
if (strcmp(fn, "-") == 0) {
|
||||||
return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
|
fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
|
||||||
|
/* According to zlib.h, this is the only reason gzdopen can fail */
|
||||||
|
if (!fp) err_fatal(func, "Out of memory");
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
if ((fp = gzopen(fn, mode)) == 0) {
|
if ((fp = gzopen(fn, mode)) == 0) {
|
||||||
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
err_fatal(func, "fail to open file '%s' : %s", fn, errno ? strerror(errno) : "Out of memory");
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void err_fatal(const char *header, const char *fmt, ...)
|
void err_fatal(const char *header, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
fprintf(stderr, "[%s] ", header);
|
||||||
|
vfprintf(stderr, fmt, args);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
va_end(args);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void err_fatal_core(const char *header, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
va_list args;
|
va_list args;
|
||||||
va_start(args, fmt);
|
va_start(args, fmt);
|
||||||
|
|
@ -93,7 +109,13 @@ void err_fatal(const char *header, const char *fmt, ...)
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
void err_fatal_simple_core(const char *func, const char *msg)
|
void _err_fatal_simple(const char *func, const char *msg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "[%s] %s\n", func, msg);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void _err_fatal_simple_core(const char *func, const char *msg)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "[%s] %s Abort!\n", func, msg);
|
fprintf(stderr, "[%s] %s Abort!\n", func, msg);
|
||||||
abort();
|
abort();
|
||||||
|
|
@ -103,7 +125,51 @@ size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
|
||||||
{
|
{
|
||||||
size_t ret = fwrite(ptr, size, nmemb, stream);
|
size_t ret = fwrite(ptr, size, nmemb, stream);
|
||||||
if (ret != nmemb)
|
if (ret != nmemb)
|
||||||
err_fatal_simple_core("fwrite", strerror(errno));
|
_err_fatal_simple("fwrite", strerror(errno));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
|
||||||
|
{
|
||||||
|
size_t ret = fread(ptr, size, nmemb, stream);
|
||||||
|
if (ret != nmemb)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("fread", ferror(stream) ? strerror(errno) : "Unexpected end of file");
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int err_gzread(gzFile file, void *ptr, unsigned int len)
|
||||||
|
{
|
||||||
|
int ret = gzread(file, ptr, len);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
{
|
||||||
|
int errnum = 0;
|
||||||
|
const char *msg = gzerror(file, &errnum);
|
||||||
|
_err_fatal_simple("gzread", Z_ERRNO == errnum ? strerror(errno) : msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int err_fseek(FILE *stream, long offset, int whence)
|
||||||
|
{
|
||||||
|
int ret = fseek(stream, offset, whence);
|
||||||
|
if (0 != ret)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("fseek", strerror(errno));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
long err_ftell(FILE *stream)
|
||||||
|
{
|
||||||
|
long ret = ftell(stream);
|
||||||
|
if (-1 == ret)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("ftell", strerror(errno));
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -115,7 +181,7 @@ int err_printf(const char *format, ...)
|
||||||
done = vfprintf(stdout, format, arg);
|
done = vfprintf(stdout, format, arg);
|
||||||
int saveErrno = errno;
|
int saveErrno = errno;
|
||||||
va_end(arg);
|
va_end(arg);
|
||||||
if (done < 0) err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno));
|
if (done < 0) _err_fatal_simple("vfprintf(stdout)", strerror(saveErrno));
|
||||||
return done;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -127,21 +193,74 @@ int err_fprintf(FILE *stream, const char *format, ...)
|
||||||
done = vfprintf(stream, format, arg);
|
done = vfprintf(stream, format, arg);
|
||||||
int saveErrno = errno;
|
int saveErrno = errno;
|
||||||
va_end(arg);
|
va_end(arg);
|
||||||
if (done < 0) err_fatal_simple_core("vfprintf", strerror(saveErrno));
|
if (done < 0) _err_fatal_simple("vfprintf", strerror(saveErrno));
|
||||||
return done;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int err_fputc(int c, FILE *stream)
|
||||||
|
{
|
||||||
|
int ret = putc(c, stream);
|
||||||
|
if (EOF == ret)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("fputc", strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int err_fputs(const char *s, FILE *stream)
|
||||||
|
{
|
||||||
|
int ret = fputs(s, stream);
|
||||||
|
if (EOF == ret)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("fputs", strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int err_fflush(FILE *stream)
|
int err_fflush(FILE *stream)
|
||||||
{
|
{
|
||||||
int ret = fflush(stream);
|
int ret = fflush(stream);
|
||||||
if (ret != 0) err_fatal_simple_core("fflush", strerror(errno));
|
if (ret != 0) _err_fatal_simple("fflush", strerror(errno));
|
||||||
return ret;
|
|
||||||
|
#ifdef FSYNC_ON_FLUSH
|
||||||
|
/* Calling fflush() ensures that all the data has made it to the
|
||||||
|
kernel buffers, but this may not be sufficient for remote filesystems
|
||||||
|
(e.g. NFS, lustre) as an error may still occur while the kernel
|
||||||
|
is copying the buffered data to the file server. To be sure of
|
||||||
|
catching these errors, we need to call fsync() on the file
|
||||||
|
descriptor, but only if it is a regular file. */
|
||||||
|
{
|
||||||
|
struct stat sbuf;
|
||||||
|
if (0 != fstat(fileno(stream), &sbuf))
|
||||||
|
_err_fatal_simple("fstat", strerror(errno));
|
||||||
|
|
||||||
|
if (S_ISREG(sbuf.st_mode))
|
||||||
|
{
|
||||||
|
if (0 != fsync(fileno(stream)))
|
||||||
|
_err_fatal_simple("fsync", strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int err_fclose(FILE *stream)
|
int err_fclose(FILE *stream)
|
||||||
{
|
{
|
||||||
int ret = fclose(stream);
|
int ret = fclose(stream);
|
||||||
if (ret != 0) err_fatal_simple_core("fclose", strerror(errno));
|
if (ret != 0) _err_fatal_simple("fclose", strerror(errno));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int err_gzclose(gzFile file)
|
||||||
|
{
|
||||||
|
int ret = gzclose(file);
|
||||||
|
if (Z_OK != ret)
|
||||||
|
{
|
||||||
|
_err_fatal_simple("gzclose", Z_ERRNO == ret ? strerror(errno) : zError(ret));
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
24
utils.h
24
utils.h
|
|
@ -39,11 +39,14 @@
|
||||||
#define ATTRIBUTE(list)
|
#define ATTRIBUTE(list)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
|
#define err_fatal_simple(msg) _err_fatal_simple(__func__, msg)
|
||||||
|
#define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg)
|
||||||
|
|
||||||
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
||||||
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
||||||
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
||||||
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
|
|
||||||
|
#define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint64_t x, y;
|
uint64_t x, y;
|
||||||
|
|
@ -56,18 +59,31 @@ typedef struct { size_t n, m; pair64_t *a; } pair64_v;
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void err_fatal(const char *header, const char *fmt, ...);
|
void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn));
|
||||||
void err_fatal_simple_core(const char *func, const char *msg);
|
void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn));
|
||||||
|
void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn));
|
||||||
|
void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn));
|
||||||
FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
|
FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
|
||||||
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
|
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
|
||||||
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
|
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
|
||||||
size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
|
size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
|
||||||
|
size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream);
|
||||||
|
|
||||||
|
int err_gzread(gzFile file, void *ptr, unsigned int len);
|
||||||
|
int err_fseek(FILE *stream, long offset, int whence);
|
||||||
|
#define err_rewind(FP) err_fseek((FP), 0, SEEK_SET)
|
||||||
|
long err_ftell(FILE *stream);
|
||||||
int err_fprintf(FILE *stream, const char *format, ...)
|
int err_fprintf(FILE *stream, const char *format, ...)
|
||||||
ATTRIBUTE((format(printf, 2, 3)));
|
ATTRIBUTE((format(printf, 2, 3)));
|
||||||
int err_printf(const char *format, ...)
|
int err_printf(const char *format, ...)
|
||||||
ATTRIBUTE((format(printf, 1, 2)));
|
ATTRIBUTE((format(printf, 1, 2)));
|
||||||
|
int err_fputc(int c, FILE *stream);
|
||||||
|
#define err_putchar(C) err_fputc((C), stdout)
|
||||||
|
int err_fputs(const char *s, FILE *stream);
|
||||||
|
#define err_puts(S) err_fputs((S), stdout)
|
||||||
int err_fflush(FILE *stream);
|
int err_fflush(FILE *stream);
|
||||||
int err_fclose(FILE *stream);
|
int err_fclose(FILE *stream);
|
||||||
|
int err_gzclose(gzFile file);
|
||||||
|
|
||||||
double cputime();
|
double cputime();
|
||||||
double realtime();
|
double realtime();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue