move smem iterators to bwamem.{c,h}

This commit is contained in:
Heng Li 2013-01-31 13:59:48 -05:00
parent 5a4a0c4173
commit 91debf412b
8 changed files with 117 additions and 54 deletions

View File

@ -4,7 +4,7 @@ CFLAGS= -g -Wall -O2
CXXFLAGS= $(CFLAGS)
AR= ar
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o stdaln.o \
LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o stdaln.o \
bwaseqio.o bwase.o kstring.o
AOBJS= QSufSort.o bwt_gen.o \
is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
@ -45,5 +45,8 @@ bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
bwtsw2_main.o:bwtsw2.h
bwamem.o:bwamem.h
fastmap.o:bwt.h bwamem.h
clean:
rm -f gmon.out *.o a.out $(PROG) *~ *.a

54
bwamem.c 100644
View File

@ -0,0 +1,54 @@
#include <stdlib.h>
#include "bwamem.h"
memopt_t *mem_opt_init()
{
memopt_t *o;
o = calloc(1, sizeof(memopt_t));
o->a = 1; o->b = 9; o->q = 16; o->r = 1; o->w = 100;
o->min_seed_len = 17;
o->max_occ = 10;
return o;
}
/***************************
* SMEM iterator interface *
***************************/
smem_i *smem_itr_init(const bwt_t *bwt)
{
smem_i *itr;
itr = calloc(1, sizeof(smem_i));
itr->bwt = bwt;
itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
itr->matches = calloc(1, sizeof(bwtintv_v));
return itr;
}
void smem_itr_destroy(smem_i *itr)
{
free(itr->tmpvec[0]->a);
free(itr->tmpvec[1]->a);
free(itr->matches->a);
free(itr);
}
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query)
{
itr->query = query;
itr->start = 0;
itr->len = len;
itr->min_intv = min_intv;
}
int smem_next(smem_i *itr)
{
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = 0;
if (itr->start >= itr->len || itr->start < 0) return -1;
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
if (itr->start == itr->len) return -1;
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, itr->start, itr->min_intv, itr->matches, itr->tmpvec);
return itr->start;
}

33
bwamem.h 100644
View File

@ -0,0 +1,33 @@
#ifndef BWAMEM_H_
#define BWAMEM_H_
#include "bwt.h"
typedef struct {
const bwt_t *bwt;
const uint8_t *query;
int start, len, min_intv;
bwtintv_v *tmpvec[2], *matches;
} smem_i;
typedef struct {
int a, b, q, r, w;
int min_seed_len, max_occ;
} memopt_t;
#ifdef __cplusplus
extern "C" {
#endif
smem_i *smem_itr_init(const bwt_t *bwt);
void smem_itr_destroy(smem_i *itr);
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query);
int smem_next(smem_i *itr);
memopt_t *mem_opt_init(void);
#ifdef __cplusplus
}
#endif
#endif

41
bwt.c
View File

@ -337,44 +337,3 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv,
if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
return ret;
}
/***************************
* SMEM iterator interface *
***************************/
smem_i *smem_itr_init(const bwt_t *bwt)
{
smem_i *itr;
itr = calloc(1, sizeof(smem_i));
itr->bwt = bwt;
itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
itr->matches = calloc(1, sizeof(bwtintv_v));
return itr;
}
void smem_itr_destroy(smem_i *itr)
{
free(itr->tmpvec[0]->a);
free(itr->tmpvec[1]->a);
free(itr->matches->a);
free(itr);
}
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query)
{
itr->query = query;
itr->start = 0;
itr->len = len;
itr->min_intv = min_intv;
}
int smem_next(smem_i *itr)
{
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = 0;
if (itr->start >= itr->len || itr->start < 0) return -1;
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
if (itr->start == itr->len) return -1;
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, itr->start, itr->min_intv, itr->matches, itr->tmpvec);
return itr->start;
}

12
bwt.h
View File

@ -60,13 +60,6 @@ typedef struct {
typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v;
typedef struct {
const bwt_t *bwt;
const uint8_t *query;
int start, len, min_intv;
bwtintv_v *tmpvec[2], *matches;
} smem_i;
/* For general OCC_INTERVAL, the following is correct:
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4)
@ -132,11 +125,6 @@ extern "C" {
// SMEM iterator interface
smem_i *smem_itr_init(const bwt_t *bwt);
void smem_itr_destroy(smem_i *itr);
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query);
int smem_next(smem_i *itr);
#ifdef __cplusplus
}
#endif

View File

@ -4,12 +4,35 @@
#include <stdlib.h>
#include "bntseq.h"
#include "bwt.h"
#include "bwamem.h"
#include "kvec.h"
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)
extern unsigned char nst_nt4_table[256];
int main_mem(int argc, char *argv[])
{
memopt_t *opt;
bwt_t *bwt;
bntseq_t *bns;
int c;
opt = mem_opt_init();
while ((c = getopt(argc, argv, "")) >= 0) {
}
if (optind + 1 >= argc) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n");
fprintf(stderr, "\n");
free(opt);
return 1;
}
free(opt);
return 0;
}
int main_fastmap(int argc, char *argv[])
{
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1;

2
main.c
View File

@ -20,6 +20,7 @@ static int usage()
fprintf(stderr, " sampe generate alignment (paired ended)\n");
fprintf(stderr, " bwasw BWA-SW for long queries\n");
fprintf(stderr, " fastmap identify super-maximal exact matches\n");
fprintf(stderr, " mem BWA-MEM algorithm\n");
fprintf(stderr, "\n");
fprintf(stderr, " fa2pac convert FASTA to PAC format\n");
fprintf(stderr, " pac2bwt generate BWT from PAC\n");
@ -59,6 +60,7 @@ int main(int argc, char *argv[])
else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1);
else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1);
else {
fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
return 1;

1
main.h
View File

@ -22,6 +22,7 @@ extern "C" {
int bwa_bwtsw2(int argc, char *argv[]);
int main_fastmap(int argc, char *argv[]);
int main_mem(int argc, char *argv[]);
#ifdef __cplusplus
}