diff --git a/Makefile b/Makefile index 8cf767a..334616c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CXXFLAGS= $(CFLAGS) AR= ar DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64 LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \ - bseq.o bwaseqio.o bwase.o kstring.o + bwaseqio.o bwase.o kstring.o AOBJS= QSufSort.o bwt_gen.o \ is.o bwtmisc.o bwtindex.o ksw.o bwape.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ diff --git a/bseq.c b/bseq.c deleted file mode 100644 index d20b983..0000000 --- a/bseq.c +++ /dev/null @@ -1,55 +0,0 @@ -#include -#include -#include -#include -#include "bseq.h" -#include "kseq.h" -KSEQ_INIT2(, gzFile, gzread) - -static inline void trim_readno(kstring_t *s) -{ - if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1])) - s->l -= 2, s->s[s->l] = 0; -} - -static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) -{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice - s->name = strdup(ks->name.s); - s->comment = ks->comment.l? strdup(s->comment) : 0; - s->seq = strdup(ks->seq.s); - s->qual = ks->qual.l? strdup(ks->qual.s) : 0; - s->l_seq = strlen(s->seq); -} - -bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) -{ - kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_; - int size = 0, m, n; - bseq1_t *seqs; - m = n = 0; seqs = 0; - while (kseq_read(ks) >= 0) { - if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads - fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__); - break; - } - if (n >= m) { - m = m? m<<1 : 256; - seqs = realloc(seqs, m * sizeof(bseq1_t)); - } - trim_readno(&ks->name); - kseq2bseq1(ks, &seqs[n]); - size += seqs[n++].l_seq; - if (ks2) { - trim_readno(&ks2->name); - kseq2bseq1(ks2, &seqs[n]); - size += seqs[n++].l_seq; - } - if (size >= chunk_size) break; - } - if (size == 0) { // test if the 2nd file is finished - if (ks2 && kseq_read(ks2) >= 0) - fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__); - } - *n_ = n; - return seqs; -} diff --git a/bseq.h b/bseq.h deleted file mode 100644 index 978312a..0000000 --- a/bseq.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef BATCHSEQ_H_ -#define BATCHSEQ_H_ - -typedef struct { - int l_seq; - char *name, *comment, *seq, *qual, *sam; -} bseq1_t; - -bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); - -#endif diff --git a/bwamem.c b/bwamem.c index 7557af6..8d0494d 100644 --- a/bwamem.c +++ b/bwamem.c @@ -10,6 +10,7 @@ #include "bwamem.h" #include "bntseq.h" #include "ksw.h" +#include "kvec.h" #include "ksort.h" #define MAPQ_COEF 40. diff --git a/bwamem.h b/bwamem.h index b95c96d..4e2e5ce 100644 --- a/bwamem.h +++ b/bwamem.h @@ -3,8 +3,7 @@ #include "bwt.h" #include "bntseq.h" -#include "bseq.h" -#include "kvec.h" +#include "utils.h" struct __smem_i; typedef struct __smem_i smem_i; @@ -51,8 +50,8 @@ typedef struct { int64_t mb, me; // mb: mate start; -1 if single-end; -2 if mate unmapped } bwahit_t; -typedef kvec_t(mem_chain_t) mem_chain_v; -typedef kvec_t(mem_alnreg_t) mem_alnreg_v; +typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v; +typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v; extern int mem_verbose; diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index a18ffc8..55c7c64 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -13,7 +13,6 @@ #include "bwtsw2.h" #include "stdaln.h" #include "kstring.h" -#include "bseq.h" #include "kseq.h" KSEQ_DECLARE(gzFile) diff --git a/fastmap.c b/fastmap.c index 56674f9..f2677eb 100644 --- a/fastmap.c +++ b/fastmap.c @@ -6,7 +6,7 @@ #include "bwt.h" #include "bwamem.h" #include "kvec.h" -#include "bseq.h" +#include "utils.h" #include "kseq.h" KSEQ_DECLARE(gzFile) diff --git a/utils.c b/utils.c index 41594c3..127c8fe 100644 --- a/utils.c +++ b/utils.c @@ -35,9 +35,8 @@ #include #include "utils.h" -#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y)) - #include "ksort.h" +#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y)) KSORT_INIT(128, pair64_t, pair64_lt) KSORT_INIT(64, uint64_t, ks_lt_generic) @@ -139,6 +138,10 @@ int err_fclose(FILE *stream) return ret; } +/********* + * Timer * + *********/ + double cputime() { struct rusage r; @@ -153,3 +156,58 @@ double realtime() gettimeofday(&tp, &tzp); return tp.tv_sec + tp.tv_usec * 1e-6; } + +/************************ + * Batch FASTA/Q reader * + ************************/ + +#include "kseq.h" +KSEQ_INIT2(, gzFile, gzread) + +static inline void trim_readno(kstring_t *s) +{ + if (s->l > 2 && s->s[s->l-2] == '/' && isdigit(s->s[s->l-1])) + s->l -= 2, s->s[s->l] = 0; +} + +static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) +{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice + s->name = strdup(ks->name.s); + s->comment = ks->comment.l? strdup(s->comment) : 0; + s->seq = strdup(ks->seq.s); + s->qual = ks->qual.l? strdup(ks->qual.s) : 0; + s->l_seq = strlen(s->seq); +} + +bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) +{ + kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_; + int size = 0, m, n; + bseq1_t *seqs; + m = n = 0; seqs = 0; + while (kseq_read(ks) >= 0) { + if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads + fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__); + break; + } + if (n >= m) { + m = m? m<<1 : 256; + seqs = realloc(seqs, m * sizeof(bseq1_t)); + } + trim_readno(&ks->name); + kseq2bseq1(ks, &seqs[n]); + size += seqs[n++].l_seq; + if (ks2) { + trim_readno(&ks2->name); + kseq2bseq1(ks2, &seqs[n]); + size += seqs[n++].l_seq; + } + if (size >= chunk_size) break; + } + if (size == 0) { // test if the 2nd file is finished + if (ks2 && kseq_read(ks2) >= 0) + fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__); + } + *n_ = n; + return seqs; +} diff --git a/utils.h b/utils.h index 5abab41..6c065c1 100644 --- a/utils.h +++ b/utils.h @@ -52,6 +52,11 @@ typedef struct { typedef struct { size_t n, m; uint64_t *a; } uint64_v; typedef struct { size_t n, m; pair64_t *a; } pair64_v; +typedef struct { + int l_seq; + char *name, *comment, *seq, *qual, *sam; +} bseq1_t; + #ifdef __cplusplus extern "C" { #endif @@ -75,6 +80,8 @@ extern "C" { void ks_introsort_64 (size_t n, uint64_t *a); void ks_introsort_128(size_t n, pair64_t *a); + bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); + #ifdef __cplusplus } #endif