From 91debf412b59135bdf5d45c2772e72ce969ca9f1 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 31 Jan 2013 13:59:48 -0500 Subject: [PATCH] move smem iterators to bwamem.{c,h} --- Makefile | 5 ++++- bwamem.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ bwamem.h | 33 +++++++++++++++++++++++++++++++++ bwt.c | 41 ----------------------------------------- bwt.h | 12 ------------ fastmap.c | 23 +++++++++++++++++++++++ main.c | 2 ++ main.h | 1 + 8 files changed, 117 insertions(+), 54 deletions(-) create mode 100644 bwamem.c create mode 100644 bwamem.h diff --git a/Makefile b/Makefile index 6f388f2..04fd7a0 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CFLAGS= -g -Wall -O2 CXXFLAGS= $(CFLAGS) AR= ar DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64 -LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o stdaln.o \ +LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o stdaln.o \ bwaseqio.o bwase.o kstring.o AOBJS= QSufSort.o bwt_gen.o \ is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \ @@ -45,5 +45,8 @@ bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h bwtsw2_main.o:bwtsw2.h +bwamem.o:bwamem.h +fastmap.o:bwt.h bwamem.h + clean: rm -f gmon.out *.o a.out $(PROG) *~ *.a diff --git a/bwamem.c b/bwamem.c new file mode 100644 index 0000000..91931bb --- /dev/null +++ b/bwamem.c @@ -0,0 +1,54 @@ +#include +#include "bwamem.h" + +memopt_t *mem_opt_init() +{ + memopt_t *o; + o = calloc(1, sizeof(memopt_t)); + o->a = 1; o->b = 9; o->q = 16; o->r = 1; o->w = 100; + o->min_seed_len = 17; + o->max_occ = 10; + return o; +} + +/*************************** + * SMEM iterator interface * + ***************************/ + +smem_i *smem_itr_init(const bwt_t *bwt) +{ + smem_i *itr; + itr = calloc(1, sizeof(smem_i)); + itr->bwt = bwt; + itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v)); + itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v)); + itr->matches = calloc(1, sizeof(bwtintv_v)); + return itr; +} + +void smem_itr_destroy(smem_i *itr) +{ + free(itr->tmpvec[0]->a); + free(itr->tmpvec[1]->a); + free(itr->matches->a); + free(itr); +} + +void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query) +{ + itr->query = query; + itr->start = 0; + itr->len = len; + itr->min_intv = min_intv; +} + +int smem_next(smem_i *itr) +{ + itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = 0; + if (itr->start >= itr->len || itr->start < 0) return -1; + while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases + if (itr->start == itr->len) return -1; + itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, itr->start, itr->min_intv, itr->matches, itr->tmpvec); + return itr->start; +} + diff --git a/bwamem.h b/bwamem.h new file mode 100644 index 0000000..da636a0 --- /dev/null +++ b/bwamem.h @@ -0,0 +1,33 @@ +#ifndef BWAMEM_H_ +#define BWAMEM_H_ + +#include "bwt.h" + +typedef struct { + const bwt_t *bwt; + const uint8_t *query; + int start, len, min_intv; + bwtintv_v *tmpvec[2], *matches; +} smem_i; + +typedef struct { + int a, b, q, r, w; + int min_seed_len, max_occ; +} memopt_t; + +#ifdef __cplusplus +extern "C" { +#endif + +smem_i *smem_itr_init(const bwt_t *bwt); +void smem_itr_destroy(smem_i *itr); +void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query); +int smem_next(smem_i *itr); + +memopt_t *mem_opt_init(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bwt.c b/bwt.c index fe8007f..689d8f8 100644 --- a/bwt.c +++ b/bwt.c @@ -337,44 +337,3 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a); return ret; } - -/*************************** - * SMEM iterator interface * - ***************************/ - -smem_i *smem_itr_init(const bwt_t *bwt) -{ - smem_i *itr; - itr = calloc(1, sizeof(smem_i)); - itr->bwt = bwt; - itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v)); - itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v)); - itr->matches = calloc(1, sizeof(bwtintv_v)); - return itr; -} - -void smem_itr_destroy(smem_i *itr) -{ - free(itr->tmpvec[0]->a); - free(itr->tmpvec[1]->a); - free(itr->matches->a); - free(itr); -} - -void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query) -{ - itr->query = query; - itr->start = 0; - itr->len = len; - itr->min_intv = min_intv; -} - -int smem_next(smem_i *itr) -{ - itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = 0; - if (itr->start >= itr->len || itr->start < 0) return -1; - while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases - if (itr->start == itr->len) return -1; - itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, itr->start, itr->min_intv, itr->matches, itr->tmpvec); - return itr->start; -} diff --git a/bwt.h b/bwt.h index 67a256d..2aab9d1 100644 --- a/bwt.h +++ b/bwt.h @@ -60,13 +60,6 @@ typedef struct { typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v; -typedef struct { - const bwt_t *bwt; - const uint8_t *query; - int start, len, min_intv; - bwtintv_v *tmpvec[2], *matches; -} smem_i; - /* For general OCC_INTERVAL, the following is correct: #define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) #define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) @@ -132,11 +125,6 @@ extern "C" { // SMEM iterator interface - smem_i *smem_itr_init(const bwt_t *bwt); - void smem_itr_destroy(smem_i *itr); - void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query); - int smem_next(smem_i *itr); - #ifdef __cplusplus } #endif diff --git a/fastmap.c b/fastmap.c index 17db06d..6a41aeb 100644 --- a/fastmap.c +++ b/fastmap.c @@ -4,12 +4,35 @@ #include #include "bntseq.h" #include "bwt.h" +#include "bwamem.h" #include "kvec.h" #include "kseq.h" KSEQ_INIT(gzFile, gzread) extern unsigned char nst_nt4_table[256]; +int main_mem(int argc, char *argv[]) +{ + memopt_t *opt; + bwt_t *bwt; + bntseq_t *bns; + int c; + + opt = mem_opt_init(); + while ((c = getopt(argc, argv, "")) >= 0) { + } + if (optind + 1 >= argc) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n"); + fprintf(stderr, "\n"); + free(opt); + return 1; + } + + free(opt); + return 0; +} + int main_fastmap(int argc, char *argv[]) { int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1; diff --git a/main.c b/main.c index 73cbcd9..2718732 100644 --- a/main.c +++ b/main.c @@ -20,6 +20,7 @@ static int usage() fprintf(stderr, " sampe generate alignment (paired ended)\n"); fprintf(stderr, " bwasw BWA-SW for long queries\n"); fprintf(stderr, " fastmap identify super-maximal exact matches\n"); + fprintf(stderr, " mem BWA-MEM algorithm\n"); fprintf(stderr, "\n"); fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); fprintf(stderr, " pac2bwt generate BWT from PAC\n"); @@ -59,6 +60,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1); + else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1); else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; diff --git a/main.h b/main.h index 026a80b..1a0292a 100644 --- a/main.h +++ b/main.h @@ -22,6 +22,7 @@ extern "C" { int bwa_bwtsw2(int argc, char *argv[]); int main_fastmap(int argc, char *argv[]); + int main_mem(int argc, char *argv[]); #ifdef __cplusplus }