开始改成sbwa那种batch模式

This commit is contained in:
zzh 2024-03-07 18:23:21 +08:00
parent 6e1dd08fb6
commit 7d085962a2
23 changed files with 1313 additions and 627 deletions

View File

@ -3,9 +3,9 @@ CC= gcc
# CFLAGS= -g -Wall -Wno-unused-function -O2 # CFLAGS= -g -Wall -Wno-unused-function -O2
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
SHOW_PERF= -DSHOW_PERF SHOW_PERF= #-DSHOW_PERF
AR= ar AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) -DUSE_AVX2 -DKSW_EQUAL #-DUSE_MT_READ #-DFILTER_FULL_MATCH
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
@ -17,6 +17,7 @@ PROG= bwa
INCLUDES= INCLUDES=
LIBS= -lm -lz -lpthread -ldl LIBS= -lm -lz -lpthread -ldl
SUBDIRS= . SUBDIRS= .
JE_MALLOC=/home/zzh/work/jemalloc/lib/libjemalloc.a # -ljemalloc -L/home/zzh/work/jemalloc/lib/
ifeq ($(shell uname -s),Linux) ifeq ($(shell uname -s),Linux)
LIBS += -lrt LIBS += -lrt
@ -29,11 +30,11 @@ endif
all:$(PROG) all:$(PROG)
bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
#bwa:libbwa.a $(AOBJS) main.o #bwa:libbwa.a $(AOBJS) main.o
# $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) ../sbwa/jemalloc/lib/libjemalloc.a main.o -o $@ -L. -lbwa $(LIBS) # $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
bwa:libbwa.a $(AOBJS) main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) $(JE_MALLOC) main.o -o $@ -L. -lbwa $(LIBS)
bwamem-lite:libbwa.a example.o bwamem-lite:libbwa.a example.o
$(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS) $(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS)

View File

@ -423,6 +423,30 @@ uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end
return seq; return seq;
} }
void bns_get_seq_no_alloc(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len, size_t *m_seq, uint8_t **seqp)
{
if (end < beg) end ^= beg, beg ^= end, end ^= beg; // if end is smaller, swap
if (end > l_pac<<1) end = l_pac<<1;
if (beg < 0) beg = 0;
if (beg >= l_pac || end <= l_pac) {
int64_t k, l = 0;
*len = end - beg;
if (*len > *m_seq) {
*m_seq = *len;
*seqp = realloc(*seqp, end - beg);
}
if (beg >= l_pac) { // reverse strand
int64_t beg_f = (l_pac<<1) - 1 - end;
int64_t end_f = (l_pac<<1) - 1 - beg;
for (k = end_f; k > beg_f; --k)
(*seqp)[l++] = 3 - _get_pac(pac, k);
} else { // forward strand
for (k = beg; k < end; ++k)
(*seqp)[l++] = _get_pac(pac, k);
}
} else *len = 0; // if bridging the forward-reverse boundary, return nothing
}
uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid) uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid)
{ {
int64_t far_beg, far_end, len; int64_t far_beg, far_end, len;
@ -449,3 +473,28 @@ uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, in
assert(seq && *end - *beg == len); // assertion failure should never happen assert(seq && *end - *beg == len); // assertion failure should never happen
return seq; return seq;
} }
void bns_fetch_seq_no_alloc(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid, size_t *m_seq, uint8_t **seqp)
{
int64_t far_beg, far_end, len;
int is_rev;
if (*end < *beg) *end ^= *beg, *beg ^= *end, *end ^= *beg; // if end is smaller, swap
assert(*beg <= mid && mid < *end);
*rid = bns_pos2rid(bns, bns_depos(bns, mid, &is_rev));
far_beg = bns->anns[*rid].offset;
far_end = far_beg + bns->anns[*rid].len;
if (is_rev) { // flip to the reverse strand
int64_t tmp = far_beg;
far_beg = (bns->l_pac<<1) - far_end;
far_end = (bns->l_pac<<1) - tmp;
}
*beg = *beg > far_beg? *beg : far_beg;
*end = *end < far_end? *end : far_end;
bns_get_seq_no_alloc(bns->l_pac, pac, *beg, *end, &len, m_seq, seqp);
if (*seqp == 0 || *end - *beg != len) {
fprintf(stderr, "[E::%s] begin=%ld, mid=%ld, end=%ld, len=%ld, seq=%p, rid=%d, far_beg=%ld, far_end=%ld\n",
__func__, (long)*beg, (long)mid, (long)*end, (long)len, *seqp, *rid, (long)far_beg, (long)far_end);
}
assert(*seqp && *end - *beg == len); // assertion failure should never happen
}

View File

@ -79,7 +79,7 @@ extern "C" {
uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len); uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len);
uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid); uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid);
int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re); int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re);
void bns_fetch_seq_no_alloc(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid, size_t *m_seq, uint8_t **seqp);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

173
bwa.c
View File

@ -29,6 +29,7 @@
#include <zlib.h> #include <zlib.h>
#include <assert.h> #include <assert.h>
#include <unistd.h> #include <unistd.h>
#include <pthread.h>
#include "bntseq.h" #include "bntseq.h"
#include "bwa.h" #include "bwa.h"
#include "ksw.h" #include "ksw.h"
@ -58,31 +59,160 @@ static inline void trim_readno(kstring_t *s)
s->l -= 2, s->s[s->l] = 0; s->l -= 2, s->s[s->l] = 0;
} }
static inline char *dupkstring(const kstring_t *str, int dupempty) static inline void dupkstring(const kstring_t *str, int dupempty, char **dstp, int *sm)
{ {
char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL; if (!dupempty && str->l == 0) {
if (!s) return NULL; if (*dstp) free(*dstp);
*dstp = 0; *sm = 0;
} else if (*dstp == 0 || *sm < str->l) {
*sm = str->l;
*dstp = realloc(*dstp, str->l + 1);
}
char *s = *dstp;
if (!s) return;
memcpy(s, str->s, str->l); memcpy(s, str->s, str->l);
s[str->l] = '\0'; s[str->l] = '\0';
return s;
} }
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s, int copy_comment)
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice { // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
s->name = dupkstring(&ks->name, 1); dupkstring(&ks->name, 1, &s->name, &s->m_name);
s->comment = dupkstring(&ks->comment, 0); if (copy_comment) dupkstring(&ks->comment, 0, &s->comment, &s->m_comment);
s->seq = dupkstring(&ks->seq, 1); dupkstring(&ks->seq, 1, &s->seq, &s->m_seq);
s->qual = dupkstring(&ks->qual, 0); dupkstring(&ks->qual, 0, &s->qual, &s->m_qual);
s->l_seq = ks->seq.l; s->l_seq = ks->seq.l;
} }
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) typedef struct {
kseq_t *ks;
bseq1_t *seq;
int start_pos;
int n_bound;
int copy_comment;
int ret_n;
int ret_size;
int ret_status;
int chunk_size;
} read_data_t;
static void *thread_bseq_read(void *data) {
read_data_t *d = (read_data_t*) data;
kseq_t *ks = d->ks;
bseq1_t *seqs = d->seq;
int copy_comment = d->copy_comment;
int chunk_size = d->chunk_size;
int cur_n = 0, cur_pos = d->start_pos, size = 0;
int ret_status = 1;
while (cur_n < d->n_bound && (ret_status = kseq_read(ks)) >= 0) {
trim_readno(&ks->name);
kseq2bseq1(ks, seqs + cur_pos, copy_comment);
seqs[cur_pos].id = cur_pos;
size += seqs[cur_pos].l_seq;
cur_pos += 2; cur_n += 1;
if (size >= chunk_size) break;
}
d->ret_n = cur_n; d->ret_size = size; d->ret_status = ret_status;
return 0;
}
#define READ_ONE_SEQ(ks) \
trim_readno(&ks->name); \
kseq2bseq1(ks, &seqs[n], copy_comment); \
seqs[n].id = n; \
size += seqs[n++].l_seq;
// multi thread reading input seqs
void bseq_read_pe_mt(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment, int64_t *size_, int *m_, bseq1_t **seqs_ptr)
{ {
kseq_t *ks = (kseq_t *)ks1_, *ks2 = (kseq_t *)ks2_;
int size = 0, m = *m_, n = 0;
bseq1_t *seqs = *seqs_ptr;
read_data_t d[2];
pthread_t tid[2];
if (m == 0) { // 还没开辟空间,要初始化
seqs = calloc(20, sizeof(bseq1_t)); // 先读取20个reads根据reads的长度和chunk size决定要读取多少条reads
int ks1_ret = 0, ks2_ret = 0;
int i = 10;
while (i-- > 0) {
ks1_ret = kseq_read(ks);
if (ks1_ret < 0) break;
ks2_ret = kseq_read(ks2);
if (ks2_ret < 0) {
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
break;
}
READ_ONE_SEQ(ks);
READ_ONE_SEQ(ks2);
}
if (ks1_ret < 0 || ks2_ret < 0) {
if (size == 0 && kseq_read(ks2) >= 0) { // test if the 2nd file is finished
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
}
*n_ = n; *seqs_ptr = seqs;
return;
}
// 重新开辟空间
m = (chunk_size + size / 10 - 1) / (size / 10) * 2;
*m_ = m;
seqs = realloc(seqs, m * sizeof(bseq1_t));
memset(seqs + n, 0, sizeof(bseq1_t));
*seqs_ptr = seqs;
}
d[0].copy_comment = copy_comment; d[1].copy_comment = copy_comment;
d[0].ks = ks ; d[0].seq = &seqs[0]; d[0].n_bound = (m >> 1) - (n>>1); d[0].start_pos = n;
d[1].ks = ks2; d[1].seq = &seqs[0]; d[1].n_bound = (m >> 1) - (n>>1); d[1].start_pos = n+1;
d[0].chunk_size = d[1].chunk_size = (chunk_size + 1) >> 1;
pthread_create(&tid[0], 0, thread_bseq_read, &d[0]);
pthread_create(&tid[1], 0, thread_bseq_read, &d[1]);
pthread_join(tid[0], 0); pthread_join(tid[1], 0);
size += d[0].ret_size + d[1].ret_size;
n += d[0].ret_n + d[1].ret_n;
if (size == 0 && kseq_read(ks2) >= 0) { // test if the 2nd file is finished
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
} else if (size < chunk_size && d[0].ret_status > 0 && d[1].ret_status > 0) {
while (kseq_read(ks) >= 0) {
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
break;
}
if (n >= m) {
m = m? m<<1 : 256;
seqs = realloc(seqs, m * sizeof(bseq1_t));
memset(seqs + n, 0, (m-n) * sizeof(bseq1_t));
}
READ_ONE_SEQ(ks);
if (ks2) {
READ_ONE_SEQ(ks2);
}
if (size >= chunk_size && (n&1) == 0) break;
}
if (size == 0) { // test if the 2nd file is finished
if (ks2 && kseq_read(ks2) >= 0) fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
}
}
*n_ = n; *size_ = size;
if (m > *m_) *m_ = m;
*seqs_ptr = seqs;
}
void bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment, int64_t *size_, int *m_, bseq1_t **seqs_ptr)
{
#ifdef USE_MT_READ
if (ks2_) return bseq_read_pe_mt(chunk_size, n_, ks1_, ks2_, copy_comment, size_, m_, seqs_ptr);
#endif
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_; kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
int size = 0, m, n; int size = 0, m, n;
bseq1_t *seqs; bseq1_t *seqs = *seqs_ptr;
m = n = 0; seqs = 0; n = 0; m = *m_;
while (kseq_read(ks) >= 0) { while (kseq_read(ks) >= 0) {
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__); fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
@ -91,16 +221,11 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
if (n >= m) { if (n >= m) {
m = m? m<<1 : 256; m = m? m<<1 : 256;
seqs = realloc(seqs, m * sizeof(bseq1_t)); seqs = realloc(seqs, m * sizeof(bseq1_t));
memset(seqs + n, 0, (m-n) * sizeof(bseq1_t));
} }
trim_readno(&ks->name); READ_ONE_SEQ(ks);
kseq2bseq1(ks, &seqs[n]);
seqs[n].id = n;
size += seqs[n++].l_seq;
if (ks2) { if (ks2) {
trim_readno(&ks2->name); READ_ONE_SEQ(ks2);
kseq2bseq1(ks2, &seqs[n]);
seqs[n].id = n;
size += seqs[n++].l_seq;
} }
if (size >= chunk_size && (n&1) == 0) break; if (size >= chunk_size && (n&1) == 0) break;
} }
@ -108,8 +233,9 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
if (ks2 && kseq_read(ks2) >= 0) if (ks2 && kseq_read(ks2) >= 0)
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__); fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
} }
*n_ = n; *n_ = n; *size_ = size;
return seqs; if (m > *m_) *m_ = m;
*seqs_ptr = seqs;
} }
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]) void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2])
@ -303,7 +429,7 @@ FMTIndex *bwa_idx_load_fmt(const char *hint)
sprintf(suffix, ".256.%d.fmt", FMT_MID_INTERVAL); sprintf(suffix, ".256.%d.fmt", FMT_MID_INTERVAL);
strcpy(fmt_idx_fn, hint); strcpy(fmt_idx_fn, hint);
strcpy(fmt_idx_fn + l_hint, suffix); strcpy(fmt_idx_fn + l_hint, suffix);
sprintf(suffix, ".14.xmer"); sprintf(suffix, ".%d.kmer", HASH_KMER_LEN);
strcpy(kmer_idx_fn, hint); strcpy(kmer_idx_fn, hint);
strcpy(kmer_idx_fn + l_hint, suffix); strcpy(kmer_idx_fn + l_hint, suffix);
@ -339,6 +465,7 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which)
return 0; return 0;
} }
idx = calloc(1, sizeof(bwaidx_t)); idx = calloc(1, sizeof(bwaidx_t));
if (which & BWA_IDX_BWT) idx->fmt = bwa_idx_load_fmt(hint); if (which & BWA_IDX_BWT) idx->fmt = bwa_idx_load_fmt(hint);
//if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint); //if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint);
//idx->bwt->kmer_hash = idx->fmt->kmer_hash; //idx->bwt->kmer_hash = idx->fmt->kmer_hash;

8
bwa.h
View File

@ -29,6 +29,7 @@
#include <stdint.h> #include <stdint.h>
#include "bntseq.h" #include "bntseq.h"
#include "kstring.h"
#include "bwt.h" #include "bwt.h"
#include "fmt_idx.h" #include "fmt_idx.h"
@ -59,7 +60,9 @@ typedef struct {
typedef struct { typedef struct {
int l_seq, id; int l_seq, id;
char *name, *comment, *seq, *qual, *sam; int m_name, m_comment, m_seq, m_qual;
char *name, *comment, *seq, *qual;
kstring_t sam;
} bseq1_t; } bseq1_t;
extern int bwa_verbose, bwa_dbg; extern int bwa_verbose, bwa_dbg;
@ -69,7 +72,8 @@ extern char bwa_rg_id[256];
extern "C" { extern "C" {
#endif #endif
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); //bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
void bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment, int64_t *size_, int *m_, bseq1_t **seqs_ptr);
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]); void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]);
void bwa_fill_scmat(int a, int b, int8_t mat[25]); void bwa_fill_scmat(int a, int b, int8_t mat[25]);

607
bwamem.c
View File

@ -117,170 +117,96 @@ mem_opt_t *mem_opt_init()
#define intv_lt(a, b) ((a).info < (b).info) #define intv_lt(a, b) ((a).info < (b).info)
KSORT_INIT(mem_intv, bwtintv_t, intv_lt) KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
typedef struct { static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *a)
int *full_match;
bwtintv_v *mem, *mem1, *tmpv[2];
} smem_aux_t;
static smem_aux_t *smem_aux_init(int batch_size)
{ {
smem_aux_t *a; int i, k, x = 0, old_n;
a = calloc(1, sizeof(smem_aux_t));
a->mem = calloc(batch_size, sizeof(bwtintv_v));
a->mem1 = calloc(batch_size, sizeof(bwtintv_v));
a->tmpv[0] = calloc(1, sizeof(bwtintv_v));
a->tmpv[1] = calloc(1, sizeof(bwtintv_v));
a->full_match = calloc(batch_size, sizeof(int));
return a;
}
static void smem_aux_destroy(smem_aux_t *a, int batch_size)
{
int i;
free(a->tmpv[0]->a); free(a->tmpv[0]);
free(a->tmpv[1]->a); free(a->tmpv[1]);
for (i = 0; i < batch_size; ++i) {
free(a->mem[i].a); free(a->mem1[i].a);
}
free(a->mem); free(a->mem1);
free(a->full_match);
free(a);
}
#define USE_FMT 1
static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, bseq1_t *seq_arr, int nseq, smem_aux_t *a)
{
int si, i, k, x, old_n, len, slen, start, end;
int start_width = 1; int start_width = 1;
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
int max_seed_len, start_N_num, start_flag; int max_seed_len = 0;
uint8_t *seq; int start_N_num = 0, start_flag = 1;
bwtintv_v *mem1, *mem; a->mem.n = 0;
bwtintv_t *p;
// 1. first pass: find all SMEMs
#ifdef SHOW_PERF #ifdef SHOW_PERF
int64_t tmp_time = realtime_msec(); int64_t tmp_time = realtime_msec();
#endif #endif
for (si = 0; si < nseq; ++si) { // 1. first pass: find all SMEMs
x = 0; max_seed_len = 0; start_N_num = 0; start_flag = 1; while (x < len) {
len = seq_arr[si].l_seq; seq = (uint8_t*) seq_arr[si].seq; if (seq[x] < 4) {
mem1 = &a->mem1[si]; mem = &a->mem[si]; mem->n = 0; start_flag = 0;
while (x < len) { x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]);
if (seq[x] < 4) { for (i = 0; i < a->mem1.n; ++i) {
start_flag = 0; bwtintv_t *p = &a->mem1.a[i];
#if USE_FMT int slen = (uint32_t)p->info - (p->info >> 32); // seed length
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, mem1, a->tmpv[0]); max_seed_len = MAX(max_seed_len, slen);
#else if (slen >= opt->min_seed_len) {
x = bwt_smem1(bwt, len, seq, x, start_width, mem1, a->tmpv); kv_push(bwtintv_t, a->mem, *p);
#endif
for (i = 0; i < mem1->n; ++i) {
p = &mem1->a[i];
slen = (uint32_t)p->info - (p->info >> 32); // seed length
max_seed_len = MAX(max_seed_len, slen);
if (slen >= opt->min_seed_len) {
kv_push(bwtintv_t, *mem, *p);
}
} }
} else {
++x;
if (start_flag) ++start_N_num;
} }
//break; // for test full match time
} else {
++x;
if (start_flag) ++start_N_num;
} }
if (max_seed_len == len - start_N_num) a->full_match[si] = 1;
} }
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_seed_1, tmp_time); __sync_fetch_and_add(&time_seed_1, tmp_time);
#endif
#ifdef SHOW_PERF
tmp_time = realtime_msec(); tmp_time = realtime_msec();
#endif #endif
// 2. second pass: find MEMs inside a long SMEM
for (si = 0; si < nseq; ++si) { #ifdef FILTER_FULL_MATCH
len = seq_arr[si].l_seq; seq = (uint8_t*) seq_arr[si].seq; if (max_seed_len == len - start_N_num) goto collect_intv_end;
mem1 = &a->mem1[si]; mem = &a->mem[si];
old_n = mem->n;
// if (a->full_match[si]) continue;
for (k = 0; k < old_n; ++k) {
p = &mem->a[k];
start = p->info >> 32; end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
#if USE_FMT
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, mem1, a->tmpv[0]);
#else
bwt_smem1(bwt, len, seq, (start + end) >> 1, p->x[2] + 1, mem1, a->tmpv);
#endif #endif
for (i = 0; i < mem1->n; ++i) {
p = &mem1->a[i]; // 2. second pass: find MEMs inside a long SMEM
slen = (uint32_t)p->info - (p->info >> 32); old_n = a->mem.n;
if (slen >= opt->min_seed_len) { for (k = 0; k < old_n; ++k) {
kv_push(bwtintv_t, *mem, *p); bwtintv_t *p = &a->mem.a[k];
} int start = p->info>>32, end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32);
if (slen >= opt->min_seed_len) {
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
} }
} }
} }
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_seed_2, tmp_time); __sync_fetch_and_add(&time_seed_2, tmp_time);
#endif
#ifdef SHOW_PERF
tmp_time = realtime_msec(); tmp_time = realtime_msec();
#endif #endif
// 3. third pass: LAST-like // 3. third pass: LAST-like
if (opt->max_mem_intv > 0) { if (opt->max_mem_intv > 0) {
for (si = 0; si < nseq; ++si) { x = 0;
// if (a->full_match[si]) continue; while (x < len) {
len = seq_arr[si].l_seq; seq = (uint8_t*) seq_arr[si].seq; if (seq[x] < 4) {
x = 0; mem = &a->mem[si]; bwtintv_t m;
while (x < len) { x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
if (seq[x] < 4) { if (m.x[2] > 0) {
bwtintv_t m; kv_push(bwtintv_t, a->mem, m);
#if USE_FMT }
x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); } else ++x;
#else
x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
#endif
if (m.x[2] > 0) {
kv_push(bwtintv_t, *mem, m);
}
} else ++x;
}
} }
} }
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_seed_3, tmp_time); __sync_fetch_and_add(&time_seed_3, tmp_time);
#endif #endif
// 4. sort
for (si = 0; si < nseq; ++si) { #ifdef FILTER_FULL_MATCH
ks_introsort(mem_intv, a->mem[si].n, a->mem[si].a); collect_intv_end:
} #endif
// sort
ks_introsort(mem_intv, a->mem.n, a->mem.a);
} }
/************ /************
* Chaining * * Chaining *
************/ ************/
typedef struct {
int64_t rbeg;
int32_t qbeg, len;
int score;
} mem_seed_t; // unaligned memory
typedef struct {
int n, m, first, rid;
uint32_t w:29, kept:2, is_alt:1;
float frac_rep;
int64_t pos;
mem_seed_t *seeds;
} mem_chain_t;
typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
#include "kbtree.h" #include "kbtree.h"
#define chain_cmp(a, b) (((b).pos < (a).pos) - ((a).pos < (b).pos)) #define chain_cmp(a, b) (((b).pos < (a).pos) - ((a).pos < (b).pos))
@ -289,6 +215,7 @@ KBTREE_INIT(chn, mem_chain_t, chain_cmp)
// return 1 if the seed is merged into the chain // return 1 if the seed is merged into the chain
static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid) static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid)
{ {
//return 0;
int64_t qend, rend, x, y; int64_t qend, rend, x, y;
const mem_seed_t *last = &c->seeds[c->n-1]; const mem_seed_t *last = &c->seeds[c->n-1];
qend = last->qbeg + last->len; qend = last->qbeg + last->len;
@ -297,8 +224,12 @@ static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, c
if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend) if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend)
return 1; // contained seed; do nothing return 1; // contained seed; do nothing
if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand
//return 0; //到这里都一样的
x = p->qbeg - last->qbeg; // always non-negtive x = p->qbeg - last->qbeg; // always non-negtive
y = p->rbeg - last->rbeg; y = p->rbeg - last->rbeg;
//fprintf(stderr, "x: %ld, y: %ld\n", x, y);
// 这里的代码是不稳定的因为他只用当前的seed和chain里的last做比较而chain里所有的seed都是无序的他们的顺序跟处理seed的先后有关
if (y >= 0 && x - y <= opt->w && y - x <= opt->w && x - last->len < opt->max_chain_gap && y - last->len < opt->max_chain_gap) { // grow the chain if (y >= 0 && x - y <= opt->w && y - x <= opt->w && x - last->len < opt->max_chain_gap && y - last->len < opt->max_chain_gap) { // grow the chain
if (c->n == c->m) { if (c->n == c->m) {
c->m <<= 1; c->m <<= 1;
@ -348,21 +279,28 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
} }
} }
void mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, const bntseq_t *bns, bseq1_t *seq_arr, int nseq, mem_chain_v *chns, void *buf) #define uint64_lt(a, b) ((a) > (b))
KSORT_INIT(uint64_sort, uint64_t, uint64_lt)
mem_chain_v mem_chain(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, int len, const uint8_t *seq, void *buf)
{ {
int si, i, b, e, l_rep, len; int i, b, e, l_rep;
int64_t l_pac = bns->l_pac, k; int64_t l_pac = bns->l_pac;
mem_chain_v *chain; mem_chain_v chain;
kbtree_t(chn) *tree; kbtree_t(chn) *tree;
smem_aux_t *aux; smem_aux_t *aux;
bwtintv_v *mem;
uint64_v v1, v2;
kv_init(v1);
kv_init(v2);
kv_init(chain);
if (len < opt->min_seed_len) return chain; // if the query is shorter than the seed length, no match
tree = kb_init(chn, KB_DEFAULT_SIZE); tree = kb_init(chn, KB_DEFAULT_SIZE);
aux = (smem_aux_t*)buf;
// 1. find smem
mem_collect_intv(opt, bwt, fmt, seq_arr, nseq, aux);
// 2. chain aux = (smem_aux_t*)buf;
mem_collect_intv(opt, fmt, len, seq, aux);
#define CHECK_ADD_CHAIN(tmp, lower, upper) \ #define CHECK_ADD_CHAIN(tmp, lower, upper) \
int rid, to_add = 0; \ int rid, to_add = 0; \
mem_chain_t tmp, *lower, *upper; \ mem_chain_t tmp, *lower, *upper; \
@ -391,64 +329,92 @@ void mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, cons
kb_putp(chn, tree, &tmp); \ kb_putp(chn, tree, &tmp); \
} }
for (si = 0; si < nseq; ++si) { for (i = 0, b = e = l_rep = 0; i < aux->mem.n; ++i) { // compute frac_rep
tree->n_keys = 0; bwtintv_t *p = &aux->mem.a[i];
tree->n_nodes = 1; int sb = (p->info>>32), se = (uint32_t)p->info;
tree->root->n = 0; if (p->x[2] <= opt->max_occ) continue;
tree->root->is_internal = 0; if (sb > e) l_rep += e - b, b = sb, e = se;
//tree = kb_init(chn, KB_DEFAULT_SIZE); else e = e > se? e : se;
len = seq_arr[si].l_seq;
mem = &aux->mem[si];
for (i = 0, b = e = l_rep = 0; i < mem->n; ++i) { // compute frac_rep
bwtintv_t *p = &mem->a[i];
int sb = (p->info>>32), se = (uint32_t)p->info;
if (p->x[2] <= opt->max_occ) continue;
if (sb > e) l_rep += e - b, b = sb, e = se;
else e = e > se? e : se;
}
l_rep += e - b;
for (i = 0; i < mem->n; ++i) {
bwtintv_t *p = &mem->a[i];
int step, count, slen = (uint32_t)p->info - (p->info >> 32); // seed length
if (p->num_match > 0) {
mem_seed_t s;
s.rbeg = p->rm[0].rs;
if (p->rm[0].reverse) s.rbeg = (fmt->l_pac << 1) - 1 - s.rbeg;
CHECK_ADD_CHAIN(tmp, lower, upper);
} else {
step = p->x[2] > opt->max_occ ? p->x[2] / opt->max_occ : 1;
for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) {
mem_seed_t s;
#ifdef SHOW_PERF
int64_t tmp_time = realtime_msec();
#endif
#if USE_FMT
s.rbeg = fmt_sa(fmt, p->x[0] + k);
#else
s.rbeg = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
#endif
#ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_bwt_sa, tmp_time);
#endif
CHECK_ADD_CHAIN(tmp, lower, upper);
}
}
}
chain = &chns[si];
kv_resize(mem_chain_t, *chain, kb_size(tree));
#define traverse_func(p_) (chain->a[chain->n++] = *(p_))
__kb_traverse(mem_chain_t, tree, traverse_func);
#undef traverse_func
for (i = 0; i < chain->n; ++i) chain->a[i].frac_rep = (float)l_rep / len;
if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
// kb_destroy(chn, tree);
} }
l_rep += e - b;
for (i = 0; i < aux->mem.n; ++i) {
bwtintv_t *p = &aux->mem.a[i];
int step, count, slen = (uint32_t)p->info - (p->info>>32); // seed length
int64_t k;
if (p->num_match > 0) {
mem_seed_t s;
s.rbeg = p->rm[0].rs;
if (p->rm[0].reverse) s.rbeg = (fmt->l_pac << 1) - 1 - s.rbeg;
CHECK_ADD_CHAIN(tmp, lower, upper);
} else {
step = p->x[2] > opt->max_occ? p->x[2] / opt->max_occ : 1;
//step = 1;
for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) {
mem_seed_t s;
#ifdef SHOW_PERF
int64_t tmp_time = realtime_msec();
#endif
s.rbeg = fmt_sa(fmt, p->x[0] + k);
//kv_push(uint64_t, v1, s.rbeg);
//fprintf(stderr, "%ld\t", s.rbeg);
//if (p->x[0] == 0)
// s.rbeg = (fmt->l_pac << 1) - slen - fmt_sa(fmt, p->x[1] + p->x[2] - 1 - k);
//else
// s.rbeg = fmt_sa(fmt, p->x[0] + k);
//kv_push(uint64_t, v2, s.rbeg);
//fprintf(stderr, "%ld\t", s.rbeg);
//s.rbeg = (fmt->l_pac << 1) - slen - fmt_sa(fmt, p->x[1] + k);
//fprintf(stderr, "%ld\n", s.rbeg);
//if (s.rbeg < fmt->l_pac) { // 在正链
// s.rbeg = (fmt->l_pac << 1) - slen - s.rbeg;
//} else { // 在反向互补链
// s.rbeg = (fmt->l_pac << 1) - slen - s.rbeg;
//}
//s.rbeg = fmt_sa(fmt, p->x[0] + k);
#ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_bwt_sa, tmp_time);
#endif
CHECK_ADD_CHAIN(tmp, lower, upper);
}
//fprintf(stderr, "\n");
}
}
//fprintf(stderr, "split\n");
kv_resize(mem_chain_t, chain, kb_size(tree));
#define traverse_func(p_) (chain.a[chain.n++] = *(p_))
__kb_traverse(mem_chain_t, tree, traverse_func);
#undef traverse_func
for (i = 0; i < chain.n; ++i) chain.a[i].frac_rep = (float)l_rep / len;
if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
kb_destroy(chn, tree); kb_destroy(chn, tree);
#if 0
for (i = 0; i < chain.n; ++i)
{
fprintf(gfp1, "chain-begin: %d\n", i);
int j;
fprintf(gfp1, "chain-%ld %d\n", chain.a[i].pos, chain.a[i].rid);
for (j = 0; j < chain.a[i].n; ++j) {
fprintf(gfp1, "chain-%d %ld %d\n", chain.a[i].seeds[j].qbeg, chain.a[i].seeds[j].rbeg, chain.a[i].seeds[j].len);
}
fprintf(gfp1, "chain-end\n");
}
fprintf(gfp1, "fun-end\n");
#endif
// ks_introsort(uint64_sort, v1.n, v1.a);
// ks_introsort(uint64_sort, v2.n, v2.a);
// for (i = 0; i < v1.n; ++i) {
// //fprintf(stderr, "%ld\t%ld\n", v1.a[i], v2.a[i]);
// if (v1.a[i] != v2.a[i]) {
// fprintf(stderr, "diff: %ld\t%ld\n", v1.a[i], v2.a[i]);
// }
// }
return chain;
} }
/******************** /********************
@ -705,7 +671,7 @@ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id
#define MEM_MINSC_COEF 5.5f #define MEM_MINSC_COEF 5.5f
#define MEM_SEEDSW_COEF 0.05f #define MEM_SEEDSW_COEF 0.05f
int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_seed_t *s) int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_seed_t *s, buf_t *buf)
{ {
int qb, qe, rid; int qb, qe, rid;
int64_t rb, re, mid, l_pac = bns->l_pac; int64_t rb, re, mid, l_pac = bns->l_pac;
@ -727,12 +693,14 @@ int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, i
if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return -1; // the seed seems good enough; no need to do SW if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return -1; // the seed seems good enough; no need to do SW
rseq = bns_fetch_seq(bns, pac, &rb, mid, &re, &rid); rseq = bns_fetch_seq(bns, pac, &rb, mid, &re, &rid);
//bns_fetch_seq_no_alloc(bns, pac, &rb, mid, &re, &rid, &buf->m, &buf->addr); rseq = buf->addr;
x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0); x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0);
free(rseq); free(rseq);
return x.score; return x.score;
} }
void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a) void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a, buf_t *buf)
{ {
double min_l = opt->min_chain_weight? MEM_HSP_COEF * opt->min_chain_weight : MEM_MINSC_COEF * log(l_query); double min_l = opt->min_chain_weight? MEM_HSP_COEF * opt->min_chain_weight : MEM_MINSC_COEF * log(l_query);
int i, j, k, min_HSP_score = (int)(opt->a * min_l + .499); int i, j, k, min_HSP_score = (int)(opt->a * min_l + .499);
@ -741,7 +709,7 @@ void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint
mem_chain_t *c = &a[i]; mem_chain_t *c = &a[i];
for (j = k = 0; j < c->n; ++j) { for (j = k = 0; j < c->n; ++j) {
mem_seed_t *s = &c->seeds[j]; mem_seed_t *s = &c->seeds[j];
s->score = mem_seed_sw(opt, bns, pac, l_query, query, s); s->score = mem_seed_sw(opt, bns, pac, l_query, query, s, buf);
if (s->score < 0 || s->score >= min_HSP_score) { if (s->score < 0 || s->score >= min_HSP_score) {
s->score = s->score < 0? s->len * opt->a : s->score; s->score = s->score < 0? s->len * opt->a : s->score;
c->seeds[k++] = *s; c->seeds[k++] = *s;
@ -766,13 +734,14 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
#define MAX_BAND_TRY 2 #define MAX_BAND_TRY 2
void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av, void *buf)
{ {
int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension
int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0; int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0;
const mem_seed_t *s; const mem_seed_t *s;
uint8_t *rseq = 0; uint8_t *rseq = 0;
uint64_t *srt; uint64_t *srt;
smem_aux_t *aux = (smem_aux_t*)buf;
if (c->n == 0) return; if (c->n == 0) return;
// get the max possible span // get the max possible span
@ -794,6 +763,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
} }
// retrieve the reference sequence // retrieve the reference sequence
rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid); rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid);
//bns_fetch_seq_no_alloc(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid, &aux->seq_buf->m, &aux->seq_buf->addr); rseq = aux->seq_buf->addr;
assert(c->rid == rid); assert(c->rid == rid);
srt = malloc(c->n * 8); srt = malloc(c->n * 8);
@ -850,26 +821,31 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name); if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name);
if (s->qbeg) { // left extension if (s->qbeg) { // left extension
uint8_t *rs, *qs;
int qle, tle, gtle, gscore; int qle, tle, gtle, gscore;
tmp = s->rbeg - rmax[0];
#ifndef USE_AVX2
uint8_t *rs, *qs;
qs = malloc(s->qbeg); qs = malloc(s->qbeg);
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i]; for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
tmp = s->rbeg - rmax[0];
rs = malloc(tmp); rs = malloc(tmp);
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i]; for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
#endif
for (i = 0; i < MAX_BAND_TRY; ++i) { for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score; int prev = a->score;
aw[0] = opt->w << i; aw[0] = opt->w << i;
if (bwa_verbose >= 4) { if (bwa_verbose >= 4) {
int j; int j;
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rs[j]]); putchar('\n'); printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n'); printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
} }
#ifdef SHOW_PERF #ifdef SHOW_PERF
int64_t tmp_time = realtime_msec(); int64_t tmp_time = realtime_msec();
#endif #endif
// a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]); #ifndef USE_AVX2
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]); a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0], aux->sw_buf);
#else
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0], aux->sw_buf);
#endif
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_ksw_extend2, tmp_time); __sync_fetch_and_add(&time_ksw_extend2, tmp_time);
@ -885,7 +861,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
a->qb = 0, a->rb = s->rbeg - gtle; a->qb = 0, a->rb = s->rbeg - gtle;
a->truesc = gscore; a->truesc = gscore;
} }
#ifndef USE_AVX2
free(qs); free(rs); free(qs); free(rs);
#endif
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg; } else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
if (s->qbeg + s->len != l_query) { // right extension if (s->qbeg + s->len != l_query) { // right extension
@ -904,8 +882,11 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
#ifdef SHOW_PERF #ifdef SHOW_PERF
int64_t tmp_time = realtime_msec(); int64_t tmp_time = realtime_msec();
#endif #endif
//a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]); #ifndef USE_AVX2
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]); a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1], aux->sw_buf);
#else
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1], aux->sw_buf);
#endif
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_ksw_extend2, tmp_time); __sync_fetch_and_add(&time_ksw_extend2, tmp_time);
@ -935,7 +916,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
a->frac_rep = c->frac_rep; a->frac_rep = c->frac_rep;
} }
free(srt); free(rseq); free(srt);
free(rseq);
} }
/***************************** /*****************************
@ -1160,7 +1142,7 @@ void mem_reorder_primary5(int T, mem_alnreg_v *a)
void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m) void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
{ {
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query); extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
kstring_t str; // kstring_t str;
kvec_t(mem_aln_t) aa; kvec_t(mem_aln_t) aa;
int k, l; int k, l;
char **XA = 0; char **XA = 0;
@ -1168,7 +1150,8 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
if (!(opt->flag & MEM_F_ALL)) if (!(opt->flag & MEM_F_ALL))
XA = mem_gen_alt(opt, bns, pac, a, s->l_seq, s->seq); XA = mem_gen_alt(opt, bns, pac, a, s->l_seq, s->seq);
kv_init(aa); kv_init(aa);
str.l = str.m = 0; str.s = 0; // str.l = str.m = 0; str.s = 0;
s->sam.l = 0;
for (k = l = 0; k < a->n; ++k) { for (k = l = 0; k < a->n; ++k) {
mem_alnreg_t *p = &a->a[k]; mem_alnreg_t *p = &a->a[k];
mem_aln_t *q; mem_aln_t *q;
@ -1191,65 +1174,56 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
mem_aln_t t; mem_aln_t t;
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0); t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
t.flag |= extra_flag; t.flag |= extra_flag;
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m); mem_aln2sam(opt, bns, &s->sam, s, 1, &t, 0, m);
} else { } else {
for (k = 0; k < aa.n; ++k) for (k = 0; k < aa.n; ++k)
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m); mem_aln2sam(opt, bns, &s->sam, s, aa.n, aa.a, k, m);
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar); for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
free(aa.a); free(aa.a);
} }
s->sam = str.s; // s->sam = str.s;
if (XA) { if (XA) {
for (k = 0; k < a->n; ++k) free(XA[k]); for (k = 0; k < a->n; ++k) free(XA[k]);
free(XA); free(XA);
} }
} }
void mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *seq_arr, int nseq, mem_chain_v *chns, mem_alnreg_v *regs, void *buf) mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf)
{ {
int si, i, j; int i;
// 1. 将seq都转成0,1,2,3 mem_chain_v chn;
for (si = 0; si < nseq; ++si) { // convert to 2-bit encoding if we have not done so mem_alnreg_v regs;
const int l_seq = seq_arr[si].l_seq;
char *seq = seq_arr[si].seq; for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so
for (j = 0; j < l_seq; ++j) seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]]; seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]];
chn = mem_chain(opt, fmt, bns, l_seq, (uint8_t*)seq, buf);
chn.n = mem_chain_flt(opt, chn.n, chn.a);
mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t *)seq, chn.n, chn.a, ((smem_aux_t *)buf)->seq_buf);
if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
kv_init(regs);
for (i = 0; i < chn.n; ++i) {
mem_chain_t *p = &chn.a[i];
if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i);
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, &regs, buf);
free(chn.a[i].seeds);
} }
free(chn.a);
// 2. find smem and chain regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a);
mem_chain(opt, bwt, fmt, bns, seq_arr, nseq, chns, buf); if (bwa_verbose >= 4) {
err_printf("* %ld chains remain after removing duplicated chains\n", regs.n);
// 3. filter chain for (i = 0; i < regs.n; ++i) {
for (si = 0; si < nseq; ++si) { mem_alnreg_t *p = &regs.a[i];
chns[si].n = mem_chain_flt(opt, chns[si].n, chns[si].a); printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re);
mem_flt_chained_seeds(opt, bns, pac, seq_arr[si].l_seq, (uint8_t *)seq_arr[si].seq, chns[si].n, chns[si].a);
if (bwa_verbose >= 4) mem_print_chain(bns, &chns[si]);
}
// 4. extend
for (si = 0; si < nseq; ++si) {
mem_chain_v *chn = &chns[si];
for (i = 0; i < chn->n; ++i) {
mem_chain_t *p = &chn->a[i];
if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i);
mem_chain2aln(opt, bns, pac, seq_arr[si].l_seq, (uint8_t *)seq_arr[si].seq, p, &regs[si]);
free(chn->a[i].seeds);
}
free(chn->a);
regs[si].n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq_arr[si].seq, regs[si].n, regs[si].a);
if (bwa_verbose >= 4) {
err_printf("* %ld chains remain after removing duplicated chains\n", regs[si].n);
for (i = 0; i < regs[si].n; ++i) {
mem_alnreg_t *p = &regs[si].a[i];
printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re);
}
}
for (i = 0; i < regs[si].n; ++i) {
mem_alnreg_t *p = &regs[si].a[i];
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
p->is_alt = 1;
} }
} }
for (i = 0; i < regs.n; ++i) {
mem_alnreg_t *p = &regs.a[i];
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
p->is_alt = 1;
}
return regs;
} }
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
@ -1324,44 +1298,25 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
return a; return a;
} }
typedef struct {
int num_reads;
const mem_opt_t *opt;
const bwt_t *bwt;
const FMTIndex *fmt;
const bntseq_t *bns;
const uint8_t *pac;
const mem_pestat_t *pes;
smem_aux_t **aux;
bseq1_t *seqs;
mem_chain_v *chns;
mem_alnreg_v *regs;
int64_t n_processed;
} worker_t;
static void worker1(void *data, int i, int tid) static void worker1(void *data, int i, int tid)
{ {
worker_t *w = (worker_t*)data; mem_worker_t *w = (mem_worker_t*)data;
int start = i * w->opt->batch_size; if (!(w->opt->flag&MEM_F_PE)) {
int end = MIN(start + w->opt->batch_size, w->num_reads); if (bwa_verbose >= 4) printf("=====> Processing read '%s' <=====\n", w->seqs[i].name);
mem_align1_core(w->opt, w->bwt, w->fmt, w->bns, w->pac, w->seqs + start, end - start, w->chns + start, w->regs + start, w->aux[tid]); w->regs[i] = mem_align1_core(w->opt, w->fmt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq, w->aux[tid]);
//if (!(w->opt->flag&MEM_F_PE)) { } else {
// if (bwa_verbose >= 4) printf("=====> Processing read '%s' <=====\n", w->seqs[i].name); if (bwa_verbose >= 4) printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i<<1|0].name);
// w->regs[i] = mem_align1_core(w->opt, w->bwt, w->fmt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq, w->aux[tid]); w->regs[i<<1|0] = mem_align1_core(w->opt, w->fmt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq, w->aux[tid]);
// if (bwa_verbose >= 4) printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i<<1|1].name);
//} else { w->regs[i<<1|1] = mem_align1_core(w->opt, w->fmt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq, w->aux[tid]);
// if (bwa_verbose >= 4) printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i<<1|0].name); }
// w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->fmt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq, w->aux[tid]);
// if (bwa_verbose >= 4) printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i<<1|1].name);
// w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->fmt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq, w->aux[tid]);
//}
} }
static void worker2(void *data, int i, int tid) static void worker2(void *data, int i, int tid)
{ {
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]); extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a); extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
worker_t *w = (worker_t*)data; mem_worker_t *w = (mem_worker_t*)data;
if (!(w->opt->flag&MEM_F_PE)) { if (!(w->opt->flag&MEM_F_PE)) {
if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
@ -1375,51 +1330,73 @@ static void worker2(void *data, int i, int tid)
} }
} }
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0) static smem_aux_t *smem_aux_init()
{ {
smem_aux_t *a;
a = calloc(1, sizeof(smem_aux_t));
a->tmpv[0] = calloc(1, sizeof(bwtintv_v));
a->tmpv[1] = calloc(1, sizeof(bwtintv_v));
a->sw_buf = calloc(1, sizeof(buf_t));
a->seq_buf = calloc(1, sizeof(buf_t));
return a;
}
static void smem_aux_destroy(smem_aux_t *a)
{
free(a->tmpv[0]->a); free(a->tmpv[0]);
free(a->tmpv[1]->a); free(a->tmpv[1]);
free(a->mem.a); free(a->mem1.a);
free(a);
}
// 初始化线程需要的数据
mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac)
{
int i;
mem_worker_t *w = calloc(1, sizeof(mem_worker_t));
w->opt = opt; w->bns = bns; w->pac = pac; w->fmt = fmt;
w->aux = malloc(opt->n_threads * sizeof(smem_aux_t));
for (i = 0; i < opt->n_threads; ++i)
w->aux[i] = smem_aux_init();
return w;
}
void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
{
extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n);
mem_pestat_t pes[4];
double ctime, rtime;
ctime = cputime(); rtime = realtime();
global_bns = w->bns;
if (w->n < n) {
w->n = n;
w->regs = realloc(w->regs, n * sizeof(mem_alnreg_v));
}
w->seqs = seqs; w->n_processed = n_processed;
w->pes = &pes[0];
#ifdef SHOW_PERF #ifdef SHOW_PERF
int64_t tmp_time = realtime_msec(); int64_t tmp_time = realtime_msec();
#endif #endif
extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n); kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
worker_t w; #ifdef SHOW_PERF
mem_pestat_t pes[4]; tmp_time = realtime_msec() - tmp_time;
double ctime, rtime; __sync_fetch_and_add(&time_work1, tmp_time);
int i; tmp_time = realtime_msec();
int n_batch = (n + opt->batch_size - 1) / opt->batch_size; #endif
w.num_reads = n;
ctime = cputime(); rtime = realtime();
global_bns = bns;
w.regs = calloc(n, sizeof(mem_alnreg_v));
w.chns = calloc(n, sizeof(mem_chain_v));
w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
w.seqs = seqs; w.n_processed = n_processed;
w.pes = &pes[0]; w.fmt = fmt;
w.aux = malloc(opt->n_threads * sizeof(smem_aux_t));
for (i = 0; i < opt->n_threads; ++i)
w.aux[i] = smem_aux_init(opt->batch_size);
//kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
kt_for(opt->n_threads, worker1, &w, n_batch); // find mapping positions
for (i = 0; i < opt->n_threads; ++i)
smem_aux_destroy(w.aux[i], opt->batch_size);
free(w.aux);
free(w.chns);
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
else mem_pestat(opt, bns->l_pac, n, w.regs, pes); // otherwise, infer the insert size distribution from data else mem_pestat(opt, w->bns->l_pac, n, w->regs, pes); // otherwise, infer the insert size distribution from data
} }
kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
free(w.regs);
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
#ifdef SHOW_PERF #ifdef SHOW_PERF
tmp_time = realtime_msec() - tmp_time; tmp_time = realtime_msec() - tmp_time;
__sync_fetch_and_add(&time_core_process, tmp_time); __sync_fetch_and_add(&time_work2, tmp_time);
#endif #endif
//free(w.regs);
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
} }

View File

@ -31,6 +31,7 @@
#include "bntseq.h" #include "bntseq.h"
#include "bwa.h" #include "bwa.h"
#include "fmt_idx.h" #include "fmt_idx.h"
#include "utils.h"
#define MEM_MAPQ_COEF 30.0 #define MEM_MAPQ_COEF 30.0
#define MEM_MAPQ_MAX 60 #define MEM_MAPQ_MAX 60
@ -71,7 +72,6 @@ typedef struct {
int max_occ; // skip a seed if its occurence is larger than this value int max_occ; // skip a seed if its occurence is larger than this value
int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed
int n_threads; // number of threads int n_threads; // number of threads
int batch_size; // batch size of seqs to process at one time
int chunk_size; // process chunk_size-bp sequences in a batch int chunk_size; // process chunk_size-bp sequences in a batch
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
float drop_ratio; // drop a chain if its seed coverage is below drop_ratio times the seed coverage of a better chain overlapping with the small chain float drop_ratio; // drop a chain if its seed coverage is below drop_ratio times the seed coverage of a better chain overlapping with the small chain
@ -125,9 +125,46 @@ typedef struct { // This struct is only used for the convenience of API.
int score, sub, alt_sc; int score, sub, alt_sc;
} mem_aln_t; } mem_aln_t;
typedef struct {
int64_t rbeg;
int32_t qbeg, len;
int score;
} mem_seed_t; // unaligned memory
typedef struct {
int n, m, first, rid;
uint32_t w:29, kept:2, is_alt:1;
float frac_rep;
int64_t pos;
mem_seed_t *seeds;
} mem_chain_t;
typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
typedef struct {
bwtintv_v mem, mem1, *tmpv[2];
buf_t *sw_buf, *seq_buf;
} smem_aux_t;
typedef struct {
const mem_opt_t *opt;
const bwt_t *bwt;
const FMTIndex *fmt;
const bntseq_t *bns;
const uint8_t *pac;
const mem_pestat_t *pes;
smem_aux_t **aux;
bseq1_t *seqs;
mem_alnreg_v *regs;
int64_t n_processed;
int64_t n;
} mem_worker_t;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac);
smem_i *smem_itr_init(const bwt_t *bwt); smem_i *smem_itr_init(const bwt_t *bwt);
void smem_itr_destroy(smem_i *itr); void smem_itr_destroy(smem_i *itr);
@ -160,7 +197,7 @@ extern "C" {
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements, * @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info. * corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
*/ */
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0); void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
/** /**
* Find the aligned regions for one query sequence * Find the aligned regions for one query sequence

View File

@ -377,12 +377,12 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
aa[i][n_aa[i]++] = g[i]; aa[i][n_aa[i]++] = g[i];
} }
} }
s[0].sam.l = 0;
for (i = 0; i < n_aa[0]; ++i) for (i = 0; i < n_aa[0]; ++i)
mem_aln2sam(opt, bns, &str, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits mem_aln2sam(opt, bns, &s[0].sam, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
s[0].sam = strdup(str.s); str.l = 0; s[1].sam.l = 0;
for (i = 0; i < n_aa[1]; ++i) for (i = 0; i < n_aa[1]; ++i)
mem_aln2sam(opt, bns, &str, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits mem_aln2sam(opt, bns, &s[1].sam, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
s[1].sam = str.s;
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
// free // free
for (i = 0; i < 2; ++i) { for (i = 0; i < 2; ++i) {

18
bwt.c
View File

@ -116,6 +116,7 @@ inline void kmer_setval_at(uint8_t *mem_addr, bwtintv_t ik, int pos)
// 获取kmer对应的fmt匹配信息, pos should be [0, 13] // 获取kmer对应的fmt匹配信息, pos should be [0, 13]
inline void bwt_kmer_get(const KmerHash *kmer_hash, bwtintv_t *ok, uint32_t qbit, int pos) inline void bwt_kmer_get(const KmerHash *kmer_hash, bwtintv_t *ok, uint32_t qbit, int pos)
{ {
#if HASH_KMER_LEN == 14
if (pos == 13) if (pos == 13)
kmer_getval_at(kmer_hash->ke14[qbit].intv_arr, ok, 0); kmer_getval_at(kmer_hash->ke14[qbit].intv_arr, ok, 0);
else if (pos == 12) else if (pos == 12)
@ -126,6 +127,23 @@ inline void bwt_kmer_get(const KmerHash *kmer_hash, bwtintv_t *ok, uint32_t qbit
kmer_getval_at(kmer_hash->ke11[qbit >> 6].intv_arr, ok, 0); kmer_getval_at(kmer_hash->ke11[qbit >> 6].intv_arr, ok, 0);
else else
kmer_getval_at(kmer_hash->ke10[qbit >> 8].intv_arr, ok, pos); kmer_getval_at(kmer_hash->ke10[qbit >> 8].intv_arr, ok, pos);
#elif HASH_KMER_LEN == 13
if (pos == 12)
kmer_getval_at(kmer_hash->ke13[qbit].intv_arr, ok, 0);
else if (pos == 11)
kmer_getval_at(kmer_hash->ke12[qbit >> 2].intv_arr, ok, 0);
else if (pos == 10)
kmer_getval_at(kmer_hash->ke11[qbit >> 4].intv_arr, ok, 0);
else
kmer_getval_at(kmer_hash->ke10[qbit >> 6].intv_arr, ok, pos);
#else
if (pos == 11)
kmer_getval_at(kmer_hash->ke12[qbit].intv_arr, ok, 0);
else if (pos == 10)
kmer_getval_at(kmer_hash->ke11[qbit >> 2].intv_arr, ok, 0);
else
kmer_getval_at(kmer_hash->ke10[qbit >> 4].intv_arr, ok, pos);
#endif
} }
// bwt->bwt and bwt->occ must be precalculated // bwt->bwt and bwt->occ must be precalculated

2
bwt.h
View File

@ -45,6 +45,8 @@ typedef unsigned char ubyte_t;
typedef uint64_t bwtint_t; typedef uint64_t bwtint_t;
//#define HASH_KMER_LEN 12
//#define HASH_KMER_LEN 13
#define HASH_KMER_LEN 14 #define HASH_KMER_LEN 14
// 用来保存kmer对应的fmt的位置信息 // 用来保存kmer对应的fmt的位置信息

View File

@ -130,7 +130,7 @@ void bsw2_extend_left(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *_query, int lq
for (k = p->k - 1, j = 0; k > 0 && j < lt; --k) // FIXME: k=0 not considered! for (k = p->k - 1, j = 0; k > 0 && j < lt; --k) // FIXME: k=0 not considered!
target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3; target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3;
lt = j; lt = j;
score = ksw_extend(p->beg, &query[lq - p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, 0, -1, p->G, &qle, &tle, 0, 0, 0); score = ksw_extend(p->beg, &query[lq - p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, 0, -1, p->G, &qle, &tle, 0, 0, 0, 0);
if (score > p->G) { // extensible if (score > p->G) { // extensible
p->G = score; p->G = score;
p->k -= tle; p->k -= tle;
@ -158,7 +158,7 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq,
for (k = p->k, j = 0; k < p->k + lt && k < l_pac; ++k) for (k = p->k, j = 0; k < p->k + lt && k < l_pac; ++k)
target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3; target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3;
lt = j; lt = j;
score = ksw_extend(lq - p->beg, &query[p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, 0, -1, 1, &qle, &tle, 0, 0, 0) - 1; score = ksw_extend(lq - p->beg, &query[p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, 0, -1, 1, &qle, &tle, 0, 0, 0, 0) - 1;
// if (score < p->G) fprintf(stderr, "[bsw2_extend_hits] %d < %d\n", score, p->G); // if (score < p->G) fprintf(stderr, "[bsw2_extend_hits] %d < %d\n", score, p->G);
if (score >= p->G) { if (score >= p->G) {
p->G = score; p->G = score;
@ -728,10 +728,11 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
{ {
gzFile fp, fp2; gzFile fp, fp2;
kseq_t *ks, *ks2; kseq_t *ks, *ks2;
int l, is_pe = 0, i, n; int l, is_pe = 0, i, n, m = 0;
int64_t seq_size = 0;
uint8_t *pac; uint8_t *pac;
bsw2seq_t *_seq; bsw2seq_t *_seq;
bseq1_t *bseq; bseq1_t *bseq = 0;
pac = calloc(bns->l_pac/4+1, 1); pac = calloc(bns->l_pac/4+1, 1);
for (l = 0; l < bns->n_seqs; ++l) for (l = 0; l < bns->n_seqs; ++l)
@ -745,7 +746,8 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
ks2 = kseq_init(fp2); ks2 = kseq_init(fp2);
is_pe = 1; is_pe = 1;
} else fp2 = 0, ks2 = 0, is_pe = 0; } else fp2 = 0, ks2 = 0, is_pe = 0;
while ((bseq = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) { bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2, 1, &seq_size, &m, &bseq);
while (n > 0) {
int size = 0; int size = 0;
if (n > _seq->max) { if (n > _seq->max) {
_seq->max = n; _seq->max = n;
@ -761,10 +763,11 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
size += p->l; size += p->l;
} }
fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp) ...\n", n, size); fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp) ...\n", n, size);
free(bseq);
process_seqs(_seq, opt, bns, pac, target, is_pe); process_seqs(_seq, opt, bns, pac, target, is_pe);
bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2, 1, &seq_size, &m, &bseq);
} }
// free // free
if (bseq) free(bseq);
free(pac); free(pac);
free(_seq->seq); free(_seq); free(_seq->seq); free(_seq);
kseq_destroy(ks); kseq_destroy(ks);

View File

@ -12,6 +12,8 @@ thread=1
#n_r2=~/fastq/tiny_n_r2.fq #n_r2=~/fastq/tiny_n_r2.fq
n_r1=~/fastq/diff_r1.fq n_r1=~/fastq/diff_r1.fq
n_r2=~/fastq/diff_r2.fq n_r2=~/fastq/diff_r2.fq
#n_r1=~/fastq/diff_all_r1.fq
#n_r2=~/fastq/diff_all_r2.fq
#n_r1=~/fastq/d_r1.fq #n_r1=~/fastq/d_r1.fq
#n_r2=~/fastq/d_r2.fq #n_r2=~/fastq/d_r2.fq
reference=~/reference/human_g1k_v37_decoy.fasta reference=~/reference/human_g1k_v37_decoy.fasta

View File

@ -57,12 +57,14 @@ int64_t time_ksw_extend2 = 0,
time_bwt_sa = 0, time_bwt_sa = 0,
time_bwt_sa_read = 0, time_bwt_sa_read = 0,
time_bns = 0, time_bns = 0,
time_core_process = 0; time_work1 = 0,
time_work2 = 0,
time_flt_chain = 0;
int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0; int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0;
int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0; int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0;
int64_t g_num_smem2 = 0; int64_t g_num_smem2 = 0;
FILE *fp1; FILE *gfp1, *gfp2, *gfp3;
#endif #endif
@ -72,6 +74,12 @@ void *kopen(const char *fn, int *_fd);
int kclose(void *a); int kclose(void *a);
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
typedef struct {
int n_seqs;
int m_seqs;
bseq1_t *seqs;
} ktp_data_t;
typedef struct { typedef struct {
kseq_t *ks, *ks2; kseq_t *ks, *ks2;
mem_opt_t *opt; mem_opt_t *opt;
@ -79,40 +87,30 @@ typedef struct {
int64_t n_processed; int64_t n_processed;
int copy_comment, actual_chunk_size; int copy_comment, actual_chunk_size;
bwaidx_t *idx; bwaidx_t *idx;
mem_worker_t *w;
int data_idx; // pingpong buffer index
ktp_data_t *data;
} ktp_aux_t; } ktp_aux_t;
typedef struct {
ktp_aux_t *aux;
int n_seqs;
bseq1_t *seqs;
} ktp_data_t;
static void *process(void *shared, int step, void *_data) static void *process(void *shared, int step, void *_data)
{ {
ktp_aux_t *aux = (ktp_aux_t*)shared; ktp_aux_t *aux = (ktp_aux_t*)shared;
ktp_data_t *data = (ktp_data_t*)_data; ktp_data_t *data = (ktp_data_t*)_data;
int i; int i;
if (step == 0) { if (step == 0) {
ktp_data_t *ret; ktp_data_t *ret = aux->data + aux->data_idx;
aux->data_idx = !aux->data_idx;
int64_t size = 0; int64_t size = 0;
ret = calloc(1, sizeof(ktp_data_t)); bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2, aux->copy_comment, &size, &ret->m_seqs, &ret->seqs);
ret->seqs = bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2); if (ret->n_seqs == 0) {
if (ret->seqs == 0) {
free(ret);
return 0; return 0;
} }
if (!aux->copy_comment)
for (i = 0; i < ret->n_seqs; ++i) {
free(ret->seqs[i].comment);
ret->seqs[i].comment = 0;
}
for (i = 0; i < ret->n_seqs; ++i) size += ret->seqs[i].l_seq;
if (bwa_verbose >= 3) if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size); fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size);
return ret; return ret;
} else if (step == 1) { } else if (step == 1) {
const mem_opt_t *opt = aux->opt; const mem_opt_t *opt = aux->opt;
const bwaidx_t *idx = aux->idx; // const bwaidx_t *idx = aux->idx;
if (opt->flag & MEM_F_SMARTPE) { if (opt->flag & MEM_F_SMARTPE) {
bseq1_t *sep[2]; bseq1_t *sep[2];
int n_sep[2]; int n_sep[2];
@ -122,28 +120,25 @@ static void *process(void *shared, int step, void *_data)
fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]); fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]);
if (n_sep[0]) { if (n_sep[0]) {
tmp_opt.flag &= ~MEM_F_PE; tmp_opt.flag &= ~MEM_F_PE;
mem_process_seqs(&tmp_opt, idx->bwt, idx->fmt, idx->bns, idx->pac, aux->n_processed, n_sep[0], sep[0], 0); mem_process_seqs(&tmp_opt, aux->w, aux->n_processed, n_sep[0], sep[0], 0);
for (i = 0; i < n_sep[0]; ++i) for (i = 0; i < n_sep[0]; ++i)
data->seqs[sep[0][i].id].sam = sep[0][i].sam; data->seqs[sep[0][i].id].sam = sep[0][i].sam;
} }
if (n_sep[1]) { if (n_sep[1]) {
tmp_opt.flag |= MEM_F_PE; tmp_opt.flag |= MEM_F_PE;
mem_process_seqs(&tmp_opt, idx->bwt, idx->fmt, idx->bns, idx->pac, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0); mem_process_seqs(&tmp_opt, aux->w, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0);
for (i = 0; i < n_sep[1]; ++i) for (i = 0; i < n_sep[1]; ++i)
data->seqs[sep[1][i].id].sam = sep[1][i].sam; data->seqs[sep[1][i].id].sam = sep[1][i].sam;
} }
free(sep[0]); free(sep[1]); free(sep[0]); free(sep[1]);
} else } else
mem_process_seqs(opt, idx->bwt, idx->fmt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0); mem_process_seqs(opt, aux->w, aux->n_processed, data->n_seqs, data->seqs, aux->pes0);
aux->n_processed += data->n_seqs; aux->n_processed += data->n_seqs;
return data; return data;
} else if (step == 2) { } else if (step == 2) {
for (i = 0; i < data->n_seqs; ++i) { for (i = 0; i < data->n_seqs; ++i) {
if (data->seqs[i].sam) err_fputs(data->seqs[i].sam, stdout); if (data->seqs[i].sam.l) err_fputs(data->seqs[i].sam.s, stdout);
free(data->seqs[i].name); free(data->seqs[i].comment);
free(data->seqs[i].seq); free(data->seqs[i].qual); free(data->seqs[i].sam);
} }
free(data->seqs); free(data);
return 0; return 0;
} }
return 0; return 0;
@ -167,8 +162,11 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
int main_mem(int argc, char *argv[]) int main_mem(int argc, char *argv[])
{ {
fp1 = fopen("./test_out.txt", "w"); #ifdef SHOW_PERF
gfp1 = fopen("./fp1.txt", "w");
gfp2 = fopen("./fp2.txt", "w");
gfp3 = fopen("./fp3.txt", "w");
#endif
mem_opt_t *opt, opt0; mem_opt_t *opt, opt0;
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0; int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
int fixed_chunk_size = -1; int fixed_chunk_size = -1;
@ -185,7 +183,7 @@ int main_mem(int argc, char *argv[])
aux.opt = opt = mem_opt_init(); aux.opt = opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t)); memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:b:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) { while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == '1') no_mt_io = 1; else if (c == '1') no_mt_io = 1;
else if (c == 'x') mode = optarg; else if (c == 'x') mode = optarg;
@ -195,7 +193,6 @@ int main_mem(int argc, char *argv[])
else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1; else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1;
else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1; else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1;
else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1; else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1;
else if (c == 'b') opt->batch_size = atoi(optarg) >> 1 << 1, opt->batch_size = opt->batch_size > 1? opt->batch_size : 512;
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
else if (c == 'a') opt->flag |= MEM_F_ALL; else if (c == 'a') opt->flag |= MEM_F_ALL;
else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE; else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
@ -294,13 +291,11 @@ int main_mem(int argc, char *argv[])
} }
if (opt->n_threads < 1) opt->n_threads = 1; if (opt->n_threads < 1) opt->n_threads = 1;
if (opt->batch_size < 1) opt->batch_size = 512;
if (optind + 1 >= argc || optind + 3 < argc) { if (optind + 1 >= argc || optind + 3 < argc) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n\n"); fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n\n");
fprintf(stderr, "Algorithm options:\n\n"); fprintf(stderr, "Algorithm options:\n\n");
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
fprintf(stderr, " -b INT batch size of reads to process at one time [%d]\n", opt->batch_size);
fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len);
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop); fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);
@ -421,7 +416,11 @@ int main_mem(int argc, char *argv[])
opt->flag |= MEM_F_PE; opt->flag |= MEM_F_PE;
} }
} }
//fprintf(stderr, "%ld %ld %ld %ld %ld\n", aux.idx->fmt->L2[0], aux.idx->fmt->L2[1], aux.idx->fmt->L2[2], aux.idx->fmt->L2[3], aux.idx->fmt->L2[4]);
//exit(0);
aux.w = init_mem_worker(opt, aux.idx->fmt, aux.idx->bns, aux.idx->pac);
aux.data = calloc(2, sizeof(ktp_data_t));
bwa_print_sam_hdr(aux.idx->bns, hdr_line); bwa_print_sam_hdr(aux.idx->bns, hdr_line);
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
@ -438,24 +437,19 @@ int main_mem(int argc, char *argv[])
#ifdef SHOW_PERF #ifdef SHOW_PERF
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "time_work1: %f s\n", time_work1 / 1000.0 / 1);
fprintf(stderr, "time_work2: %f s\n", time_work2 / 1000.0 / 1);
fprintf(stderr, "time_flt_chain: %f s\n", time_flt_chain / 1000.0 / opt->n_threads);
fprintf(stderr, "time_seed_1: %f s\n", time_seed_1 / 1000.0 / opt->n_threads); fprintf(stderr, "time_seed_1: %f s\n", time_seed_1 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_seed_2: %f s\n", time_seed_2 / 1000.0 / opt->n_threads); fprintf(stderr, "time_seed_2: %f s\n", time_seed_2 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_seed_3: %f s\n", time_seed_3 / 1000.0 / opt->n_threads); fprintf(stderr, "time_seed_3: %f s\n", time_seed_3 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads); fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads);
fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads); fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads);
fprintf(stderr, "time_core_process: %f s\n", time_core_process / 1000.0 / opt->n_threads);
fprintf(stderr, "time_bns: %f s\n", time_bns / 1000.0 / opt->n_threads);
fprintf(stderr, "s1 num: %ld\n", s1n);
fprintf(stderr, "s2 num: %ld\n", s2n);
fprintf(stderr, "s3 num: %ld\n", s3n);
// fprintf(stderr, "s1 len: %ld\n", s1l / s1n);
// fprintf(stderr, "s2 len: %ld\n", s2l / s2n);
// fprintf(stderr, "s3 len: %ld\n", s3l / s3n);
fprintf(stderr, "get sa num: %ld\n", num_sa);
fprintf(stderr, "seed 2 num: %ld\n", g_num_smem2);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fclose(fp1); fclose(gfp1);
fclose(gfp2);
fclose(gfp3);
#endif #endif
return 0; return 0;

329
fmt_idx.c
View File

@ -144,12 +144,16 @@ KmerHash fmt_restore_kmer_idx(const char *fn)
len = 1 << (12 << 1); len = 1 << (12 << 1);
kh->ke12 = (KmerEntry *)malloc(len * sizeof(KmerEntry)); kh->ke12 = (KmerEntry *)malloc(len * sizeof(KmerEntry));
fread_fix(fp, len * sizeof(KmerEntry), kh->ke12); fread_fix(fp, len * sizeof(KmerEntry), kh->ke12);
#if HASH_KMER_LEN > 12
len = 1 << (13 << 1); len = 1 << (13 << 1);
kh->ke13 = (KmerEntry *)malloc(len * sizeof(KmerEntry)); kh->ke13 = (KmerEntry *)malloc(len * sizeof(KmerEntry));
fread_fix(fp, len * sizeof(KmerEntry), kh->ke13); fread_fix(fp, len * sizeof(KmerEntry), kh->ke13);
#endif
#if HASH_KMER_LEN > 13
len = 1 << (14 << 1); len = 1 << (14 << 1);
kh->ke14 = (KmerEntry *)malloc(len * sizeof(KmerEntry)); kh->ke14 = (KmerEntry *)malloc(len * sizeof(KmerEntry));
fread_fix(fp, len * sizeof(KmerEntry), kh->ke14); fread_fix(fp, len * sizeof(KmerEntry), kh->ke14);
#endif
err_fclose(fp); err_fclose(fp);
return khash; return khash;
} }
@ -414,6 +418,109 @@ inline void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t
cnt[3] += x >> 24 & 0xff; cnt[3] += x >> 24 & 0xff;
} }
// 扩展两个个碱基计算bwt base为b的pre-bwt str中各个碱基的occ
inline void fmt_direct_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4])
{
uint32_t x = 0;
uint32_t *p, *q, tmp;
bwtint_t str_line = k, cp_line = k & (~FMT_OCC_INTV_MASK); // cp = check point
int ti = b1 << 2 | b2;
if (k == (bwtint_t)(-1))
{
p = fmt->bwt + 4 + b1 * 4;
cnt[3] = p[b2];
return;
}
k -= (k >= fmt->primary); // k由bwt矩阵对应的行转换成bwt字符串对应的行去掉了$,所以大于$的行都减掉1
p = fmt_occ_intv(fmt, k);
q = p + 4 + b1 * 4;
cnt[3] = q[b2]; // b2的occ
p += 20;
// 使用mid interval信息
int mk = k % FMT_OCC_INTERVAL;
int n_mintv = mk >> FMT_MID_INTV_SHIFT;
if (n_mintv > 0) // 至少超过了第一个mid interval
{
p += n_mintv * (4 + (FMT_MID_INTERVAL >> 3)) - 4; // 对应的mid interval check point的首地址即A C G T的局部累积量
q = p + b1;
x = *q;
cnt[3] += x >> (b2 << 3) & 0xff; // b2的occ
x = 0;
p += 4;
}
uint32_t *end = p + ((k >> 3) - ((k & ~FMT_MID_INTV_MASK) >> 3));
for (; p < end; ++p)
{
x += __fmt_occ_e2_aux2(fmt, ti, *p);
}
tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
x += __fmt_occ_e2_aux2(fmt, ti, tmp);
if (b1 == 0)
{
x -= (~k & 7) << 8;
if (b2 == 0)
x -= (~k & 7) << 24;
}
// 如果跨过了second_primary,那么可能需要减掉一次累积值
if (b1 == fmt->first_base && cp_line < fmt->sec_primary && str_line >= fmt->sec_primary && b2 == fmt->last_base)
cnt[3] -= 1;
cnt[3] += x >> 24 & 0xff;
}
inline void fmt_direct2_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4])
{
uint32_t x = 0;
uint32_t *p, *q, tmp;
bwtint_t str_line = k, cp_line = k & (~FMT_OCC_INTV_MASK); // cp = check point
int ti = b1 << 2 | b2;
cnt[1] = 0;
if (k == (bwtint_t)(-1))
{
p = fmt->bwt + 4 + b1 * 4;
cnt[3] = p[b2];
return;
}
k -= (k >= fmt->primary); // k由bwt矩阵对应的行转换成bwt字符串对应的行去掉了$,所以大于$的行都减掉1
p = fmt_occ_intv(fmt, k);
cnt[1] = p[b1]; // b1的occ
q = p + 4 + b1 * 4;
cnt[3] = q[b2]; // b2的occ
p += 20;
// 使用mid interval信息
int mk = k % FMT_OCC_INTERVAL;
int n_mintv = mk >> FMT_MID_INTV_SHIFT;
if (n_mintv > 0) // 至少超过了第一个mid interval
{
p += n_mintv * (4 + (FMT_MID_INTERVAL >> 3)) - 4; // 对应的mid interval check point的首地址即A C G T的局部累积量
q = p + b1;
x = *q;
cnt[1] += __fmt_mid_sum(x); // b1的occ
cnt[3] += x >> (b2 << 3) & 0xff; // b2的occ
x = 0;
p += 4;
}
uint32_t *end = p + ((k >> 3) - ((k & ~FMT_MID_INTV_MASK) >> 3));
for (; p < end; ++p)
{
x += __fmt_occ_e2_aux2(fmt, ti, *p);
}
tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
x += __fmt_occ_e2_aux2(fmt, ti, tmp);
if (b1 == 0)
{
x -= (~k & 7) << 8;
if (b2 == 0)
x -= (~k & 7) << 24;
}
// 如果跨过了second_primary,那么可能需要减掉一次累积值
if (b1 == fmt->first_base && cp_line < fmt->sec_primary && str_line >= fmt->sec_primary && b2 == fmt->last_base)
cnt[3] -= 1;
cnt[1] += x >> 8 & 0xff;
cnt[3] += x >> 24 & 0xff;
}
// 扩展两个碱基 // 扩展两个碱基
inline void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2) inline void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2)
{ {
@ -434,6 +541,33 @@ inline void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwti
*ok2 = intv; *ok2 = intv;
} }
// 扩展两个碱基
inline void fmt_direct_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2)
{
bwtint_t tk[4], tl[4];
// tk表示在k行之前所有各个碱基累积出现次数tl表示在l行之前的累积
fmt_direct_e2_occ(fmt, ik->x[!is_back] - 1, b1, b2, tk);
fmt_direct_e2_occ(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, b2, tl);
ok2->x[!is_back] = fmt->L2[b2] + 1 + tk[3];
ok2->x[2] = tl[3] - tk[3];
ok2->x[is_back] = 0;
}
// 扩展两个碱基
inline void fmt_direct2_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2)
{
bwtint_t tk[4], tl[4];
// tk表示在k行之前所有各个碱基累积出现次数tl表示在l行之前的累积
fmt_direct2_e2_occ(fmt, ik->x[!is_back] - 1, b1, b2, tk);
fmt_direct2_e2_occ(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, b2, tl);
ok1->x[!is_back] = fmt->L2[b1] + 1 + tk[1];
ok1->x[2] = tl[1] - tk[1];
ok1->x[is_back] = 0;
ok2->x[!is_back] = fmt->L2[b2] + 1 + tk[3];
ok2->x[2] = tl[3] - tk[3];
ok2->x[is_back] = 0;
}
// 扩展一个碱基 // 扩展一个碱基
inline void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1) inline void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1)
{ {
@ -448,6 +582,17 @@ inline void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int i
// 第一次正向扩展 // 第一次正向扩展
ok->x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= fmt->primary && ik->x[!is_back] + ik->x[2] - 1 >= fmt->primary) + tl[0] - tk[0]; ok->x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= fmt->primary && ik->x[!is_back] + ik->x[2] - 1 >= fmt->primary) + tl[0] - tk[0];
} }
inline void fmt_direct_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1)
{
bwtint_t tk[4], tl[4];
int b2 = 3; // 如果只扩展一次那么第二个碱基设置成T可以减小一些计算量如计算大于b2的累积数量
// tk表示在k行之前所有各个碱基累积出现次数tl表示在l行之前的累积
fmt_direct2_e2_occ(fmt, ik->x[!is_back] - 1, b1, b2, tk);
fmt_direct2_e2_occ(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, b2, tl);
// 这里是反向扩展
ok->x[!is_back] = fmt->L2[b1] + 1 + tk[1];
ok->x[2] = tl[1] - tk[1];
}
// 序列和参考基因直接对比 // 序列和参考基因直接对比
static void direct_extend(const FMTIndex *fmt, int len, const uint8_t *q, int left_pos, int right_pos, bwtint_t mtx_line, bwtintv_t *mt) static void direct_extend(const FMTIndex *fmt, int len, const uint8_t *q, int left_pos, int right_pos, bwtint_t mtx_line, bwtintv_t *mt)
@ -457,7 +602,8 @@ static void direct_extend(const FMTIndex *fmt, int len, const uint8_t *q, int le
while (k != qcond && r != rcond) \ while (k != qcond && r != rcond) \
{ \ { \
const int base = PAC_BASE(fmt->pac, r); \ const int base = PAC_BASE(fmt->pac, r); \
if (q[k] != base) break; \ if (q[k] != base) \
break; \
k += qstep; \ k += qstep; \
r += rstep; \ r += rstep; \
} }
@ -465,7 +611,8 @@ static void direct_extend(const FMTIndex *fmt, int len, const uint8_t *q, int le
while (k != qcond && r != rcond) \ while (k != qcond && r != rcond) \
{ \ { \
const int base = 3 - PAC_BASE(fmt->pac, r); \ const int base = 3 - PAC_BASE(fmt->pac, r); \
if (q[k] != base) break; \ if (q[k] != base) \
break; \
k += qstep; \ k += qstep; \
r += rstep; \ r += rstep; \
} }
@ -510,7 +657,8 @@ static void direct_extend(const FMTIndex *fmt, int len, const uint8_t *q, int le
static inline void fmt_reverse_intvs(bwtintv_v *p) static inline void fmt_reverse_intvs(bwtintv_v *p)
{ {
if (p->n > 1) { if (p->n > 1)
{
int j; int j;
for (j = 0; j < p->n >> 1; ++j) for (j = 0; j < p->n >> 1; ++j)
{ {
@ -531,10 +679,12 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
uint32_t qbit = 0; uint32_t qbit = 0;
mem->n = 0; mem->n = 0;
int only_forward = x == 0 || q[x - 1] > 3; int only_forward = x == 0 || q[x - 1] > 3;
if (q[x] > 3) return x + 1; if (q[x] > 3)
return x + 1;
if (min_intv < 1) min_intv = 1; // the interval size should be at least 1 if (min_intv < 1)
curr = tmpvec; // use the temporary vector if provided min_intv = 1; // the interval size should be at least 1
curr = tmpvec; // use the temporary vector if provided
qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len); qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len);
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置 bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置
@ -542,13 +692,26 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
// check change of the interval size and whether the interval size is too small to be extended further // check change of the interval size and whether the interval size is too small to be extended further
#define CHECK_INTV_CHANGE(iv, ov, end_pos) \ #define CHECK_INTV_CHANGE(iv, ov, end_pos) \
if (ov.x[2] != iv.x[2]) { kv_push(bwtintv_t, *curr, iv); if (ov.x[2] < min_intv) break; } iv = ov; iv.info = end_pos if (ov.x[2] != iv.x[2]) \
#define PUSH_VAL_AND_SKIP(iv) \ { \
do { kv_push(bwtintv_t, *curr, iv); goto backward_search; } while(0) kv_push(bwtintv_t, *curr, iv); \
if (ov.x[2] < min_intv) \
break; \
} \
iv = ov; \
iv.info = end_pos
#define PUSH_VAL_AND_SKIP(iv) \
do \
{ \
kv_push(bwtintv_t, *curr, iv); \
goto backward_search; \
} while (0)
// 处理kmer对应的匹配信息 // 处理kmer对应的匹配信息
if (only_forward) j = kmer_len - 1; if (only_forward)
else j = 1; j = kmer_len - 1;
else
j = 1;
for (curr->n = 0; j < kmer_len; ++j) for (curr->n = 0; j < kmer_len; ++j)
{ {
bwt_kmer_get(&fmt->kmer_hash, &ok1, qbit, j); bwt_kmer_get(&fmt->kmer_hash, &ok1, qbit, j);
@ -558,8 +721,8 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
PUSH_VAL_AND_SKIP(ik); PUSH_VAL_AND_SKIP(ik);
// 扩展kmer之后的碱基 // 扩展kmer之后的碱基
// __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2); // __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2);
// __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2); // __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2);
for (i = (int)ik.info; i + 1 < len; i += 2) for (i = (int)ik.info; i + 1 < len; i += 2)
{ // forward search { // forward search
if (q[i] < 4 && q[i + 1] < 4) if (q[i] < 4 && q[i + 1] < 4)
@ -569,18 +732,20 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
CHECK_INTV_CHANGE(ik, ok1, i + 1); CHECK_INTV_CHANGE(ik, ok1, i + 1);
CHECK_INTV_CHANGE(ik, ok2, i + 2); CHECK_INTV_CHANGE(ik, ok2, i + 2);
// 在这里进行判断是否只有一个候选了
#if 1 // 间隔为1的时候直接与reference比对 #if 1
if (min_intv == 1 && ok2.x[2] == min_intv) // 在这里进行判断是否只有一个候选了 if (min_intv == 1 && ok2.x[2] == min_intv)
{ {
direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt); direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt);
kv_push(bwtintv_t, *mem, mt); kv_push(bwtintv_t, *mem, mt);
ret = (uint32_t)mt.info; ret = (uint32_t)mt.info;
if (only_forward || mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3) goto fmt_smem_end; if (only_forward || mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3)
goto fmt_smem_end;
goto backward_search; goto backward_search;
} }
#endif #endif
} else if (q[i] < 4) // q[i+1] >= 4 }
else if (q[i] < 4) // q[i+1] >= 4
{ {
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
CHECK_INTV_CHANGE(ik, ok1, i + 1); CHECK_INTV_CHANGE(ik, ok1, i + 1);
@ -605,56 +770,72 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
backward_search: backward_search:
fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
if (mt.num_match == 0) ret = curr->a[0].info; // this will be the returned value扩展到的最远的位置 if (mt.num_match == 0)
ret = curr->a[0].info; // this will be the returned value扩展到的最远的位置
// 按照种子进行遍历,反向扩展 // 按照种子进行遍历,反向扩展
#define CHECK_ADD_MEM(pos, intv, mem) \ #define CHECK_ADD_MEM(pos, intv, mem) \
if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) { \ if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) \
(intv).info |= (uint64_t)(pos) << 32; kv_push(bwtintv_t, *mem, (intv)); \ { \
(intv).info |= (uint64_t)(pos) << 32; \
kv_push(bwtintv_t, *mem, (intv)); \
} }
#define CHECK_INTV_ADD_MEM(ok, pos, intv, mem) \ #define CHECK_INTV_ADD_MEM(ok, pos, intv, mem) \
if (ok.x[2] < min_intv) { CHECK_ADD_MEM(pos, intv, mem); break; } if (ok.x[2] < min_intv) \
{ \
CHECK_ADD_MEM(pos, intv, mem); \
break; \
}
int last_kmer_start = 0; int last_kmer_start = 0;
for (j = 0; j < curr->n; ++j) for (j = 0; j < curr->n; ++j)
{ {
bwtintv_t *p = &curr->a[j]; // 前向扩展的种子 bwtintv_t *p = &curr->a[j]; // 前向扩展的种子
// __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1), 0, 2); // __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1), 0, 2);
// __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1 + p->x[2]), 0, 2); // __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1 + p->x[2]), 0, 2);
#if 1 if (!only_forward && p->info - x < HASH_KMER_LEN)
if (!only_forward && p->info - x < HASH_KMER_LEN) { {
if (last_kmer_start && kmer_len == HASH_KMER_LEN && p->info == last_kmer_start && p->info - kmer_len > 0 && q[p->info - kmer_len] < 4) if (last_kmer_start && kmer_len == HASH_KMER_LEN && p->info == last_kmer_start && p->info - kmer_len > 0 && q[p->info - kmer_len] < 4)
qbit = ((qbit << 2) | (3 - q[p->info - kmer_len])) & ((1L << (kmer_len << 1)) - 1); // 创建反向kmer qbit = ((qbit << 2) | (3 - q[p->info - kmer_len])) & ((1L << (kmer_len << 1)) - 1); // 创建反向kmer
else qbit = build_backward_kmer(q, p->info - 1, HASH_KMER_LEN, &kmer_len); // 创建反向kmer else
qbit = build_backward_kmer(q, p->info - 1, HASH_KMER_LEN, &kmer_len); // 创建反向kmer
last_kmer_start = p->info - 1; last_kmer_start = p->info - 1;
i = 1; i = 1;
do { bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len - i++); } while (ik.x[2] < min_intv); do
if (i > 2) continue; {
p->x[0] = ik.x[1]; p->x[1] = ik.x[0]; p->x[2] = ik.x[2]; bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len - i++);
} while (ik.x[2] < min_intv);
if (i > 2)
continue;
p->x[0] = ik.x[1];
p->x[1] = ik.x[0];
p->x[2] = ik.x[2];
i = p->info - (kmer_len - i + 3); i = p->info - (kmer_len - i + 3);
} else { }
else
{
i = x - 1; i = x - 1;
} }
#else for (; i > 0; i -= 2)
i = x - 1;
#endif
for (; i > 0; i -= 2)
{ {
if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展 if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展
{ {
fmt_extend2(fmt, p, &ok1, &ok2, 1, q[i], q[i - 1]); // fmt_extend2(fmt, p, &ok1, &ok2, 1, q[i], q[i - 1]);
fmt_direct2_extend2(fmt, p, &ok1, &ok2, 1, q[i], q[i - 1]);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1 + ok2.x[2]), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1 + ok2.x[2]), 0, 2);
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem); CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
ok1.info = p->info; ok1.info = p->info;
CHECK_INTV_ADD_MEM(ok2, i, ok1, mem); CHECK_INTV_ADD_MEM(ok2, i, ok1, mem);
ok2.info = p->info; *p = ok2; ok2.info = p->info;
*p = ok2;
} }
else if (q[i] < 4) // 只能扩展一个 else if (q[i] < 4) // 只能扩展一个
{ {
fmt_extend1(fmt, p, &ok1, 1, q[i]); // fmt_extend1(fmt, p, &ok1, 1, q[i]);
fmt_direct_extend1(fmt, p, &ok1, 1, q[i]);
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem); CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
ok1.info = p->info; ok1.info = p->info;
CHECK_ADD_MEM(i, ok1, mem); CHECK_ADD_MEM(i, ok1, mem);
@ -668,16 +849,22 @@ backward_search:
} }
for (; i == 0; --i) for (; i == 0; --i)
{ // 扩展到了第一个碱基 { // 扩展到了第一个碱基
if (q[i] < 4) { if (q[i] < 4)
fmt_extend1(fmt, p, &ok1, 1, q[i]); {
// fmt_extend1(fmt, p, &ok1, 1, q[i]);
fmt_direct_extend1(fmt, p, &ok1, 1, q[i]);
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem); CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
ok1.info = p->info; *p = ok1; ok1.info = p->info;
} else { *p = ok1;
}
else
{
CHECK_ADD_MEM(i + 1, *p, mem); CHECK_ADD_MEM(i + 1, *p, mem);
goto fmt_smem_end; goto fmt_smem_end;
} }
} }
if (i == -1) { if (i == -1)
{
CHECK_ADD_MEM(i + 1, *p, mem); CHECK_ADD_MEM(i + 1, *p, mem);
goto fmt_smem_end; goto fmt_smem_end;
} }
@ -690,21 +877,21 @@ fmt_smem_end:
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem) int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem)
{ {
int i, kmer_len; int i, kmer_len, first_extend_len;
bwtintv_t ik = {0}, ok1={0}, ok2={0}; bwtintv_t ik = {0}, ok1 = {0}, ok2 = {0};
uint64_t qbit; uint64_t qbit;
memset(mem, 0, sizeof(bwtintv_t)); memset(mem, 0, sizeof(bwtintv_t));
if (q[x] > 3) return x + 1; if (q[x] > 3)
return x + 1;
if (len - x <= min_len)
return len;
qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len); qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len);
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len-1); // 初始碱基位置 bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len - 1); // 初始碱基位置
ik.info = x + kmer_len; ik.info = x + kmer_len;
//fmt_set_intv(fmt, q[x], ik); // __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2);
//ik.info = x + 1; // __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2);
// __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2);
// __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2);
#define COND_SET_RETURN(iv, ov, start_pos, end_pos, max_intv, min_len) \ #define COND_SET_RETURN(iv, ov, start_pos, end_pos, max_intv, min_len) \
if (iv.x[2] < max_intv && end_pos - start_pos >= min_len) \ if (iv.x[2] < max_intv && end_pos - start_pos >= min_len) \
@ -713,12 +900,35 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
(ov).info = (uint64_t)start_pos << 32 | (end_pos + 1); \ (ov).info = (uint64_t)start_pos << 32 | (end_pos + 1); \
return end_pos + 1; \ return end_pos + 1; \
} }
#if 1
first_extend_len = x + min_len + 1;
first_extend_len = MIN(len, first_extend_len);
for (i = (int)ik.info; i + 1 < first_extend_len; i += 2)
{
if (q[i] < 4 && q[i + 1] < 4)
{
// fmt_direct_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]);
fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
ik = ok2;
}
else if (q[i] < 4)
return i + 2;
else
return i + 1;
}
COND_SET_RETURN(ik, *mem, x, i - 1, max_intv, min_len);
for (; i + 1 < len; i += 2)
#else
for (i = (int)ik.info; i + 1 < len; i += 2) for (i = (int)ik.info; i + 1 < len; i += 2)
#endif
{ // forward search { // forward search
if (q[i] < 4 && q[i + 1] < 4) if (q[i] < 4 && q[i + 1] < 4)
{ {
fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]); fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]);
// fmt_direct2_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
@ -736,7 +946,8 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in
return i + 1; return i + 1;
} }
} }
if (i == len - 1) { if (i == len - 1)
{
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len);
} }
@ -750,7 +961,7 @@ inline static void fmt_get_previous_base(const FMTIndex *fmt, bwtint_t k, uint8_
uint8_t base2; uint8_t base2;
// 第一步找到check point位置 // 第一步找到check point位置
p = fmt_occ_intv(fmt, k); // check point起始位置 p = fmt_occ_intv(fmt, k); // check point起始位置
p += 20; // bwt碱基起始位置 p += 20; // bwt碱基起始位置
// 第二步找到mid check point位置 // 第二步找到mid check point位置
int mk = k & FMT_OCC_INTV_MASK; int mk = k & FMT_OCC_INTV_MASK;
int n_mintv = mk >> FMT_MID_INTV_SHIFT; int n_mintv = mk >> FMT_MID_INTV_SHIFT;
@ -783,7 +994,9 @@ bwtint_t fmt_sa(const FMTIndex *fmt, bwtint_t k)
{ {
++sa; ++sa;
fmt_previous_line(fmt, k, &k1, &k2); fmt_previous_line(fmt, k, &k1, &k2);
if (!(k1 & mask)) { __builtin_prefetch(fmt_occ_intv(fmt, k2), 0, 2);
if (!(k1 & mask))
{
k = k1; k = k1;
break; break;
} }

View File

@ -21,8 +21,6 @@ Date : 2023/12/24
#define FMT_MID_INTERVAL (1 << FMT_MID_INTV_SHIFT) #define FMT_MID_INTERVAL (1 << FMT_MID_INTV_SHIFT)
#define FMT_MID_INTV_MASK (FMT_MID_INTERVAL - 1) #define FMT_MID_INTV_MASK (FMT_MID_INTERVAL - 1)
// #undef FMT_MID_INTERVAL
// 获取碱基c待查找序列的首个碱基和对应的互补碱基对应的行以及间隔 // 获取碱基c待查找序列的首个碱基和对应的互补碱基对应的行以及间隔
#define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0) #define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
// k行bwt str行不包含$对应的check point occ数据起始地址小于k且是OCC_INTERVAL的整数倍 // k行bwt str行不包含$对应的check point occ数据起始地址小于k且是OCC_INTERVAL的整数倍
@ -32,9 +30,11 @@ Date : 2023/12/24
#elif FMT_MID_INTERVAL == 16 #elif FMT_MID_INTERVAL == 16
#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 80)) #define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 80))
#elif FMT_MID_INTERVAL == 32 #elif FMT_MID_INTERVAL == 32
#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 48)) //#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 48))
#define fmt_occ_intv(b, k) ((b)->bwt + ((k) >> 8) * 80)
#elif FMT_MID_INTERVAL == 64 #elif FMT_MID_INTERVAL == 64
#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 32)) //#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 32))
#define fmt_occ_intv(b, k) ((b)->bwt + ((k) >> 8 << 6))
#elif FMT_MID_INTERVAL == 128 #elif FMT_MID_INTERVAL == 128
#define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 24)) #define fmt_occ_intv(b, k) ((b)->bwt + (k) / FMT_OCC_INTERVAL * (FMT_OCC_INTERVAL / 8 + 24))
#else #else
@ -89,10 +89,15 @@ void fmt_restore_sa(const char *fn, FMTIndex *fmt);
// 根据interval-bwt创建fmt-index // 根据interval-bwt创建fmt-index
FMTIndex *create_fmt_from_bwt(bwt_t *bwt); FMTIndex *create_fmt_from_bwt(bwt_t *bwt);
// 扩展两个个碱基计算bwt base为b的pre-bwt str中各个碱基的occ // 扩展两个个碱基计算bwt base为b的pre-bwt str中各个碱基的occ
void fmt_direct_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4]);
void fmt_direct2_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4]);
void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4]); void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4]);
// 扩展两个碱基 // 扩展两个碱基
void fmt_direct2_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2);
void fmt_direct_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2);
void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2); void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok1, bwtintv_t *ok2, int is_back, int b1, int b2);
// 扩展一个碱基 // 扩展一个碱基
void fmt_direct_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1);
void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1); void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t *ok, int is_back, int b1);
// 生成所有KMER_LEN长度的序列字符串表示 // 生成所有KMER_LEN长度的序列字符串表示
void gen_all_seq(char **seq_arr, int kmer_len); void gen_all_seq(char **seq_arr, int kmer_len);

24
ksw.c
View File

@ -413,15 +413,24 @@ typedef struct {
int32_t h, e; int32_t h, e;
} eh_t; } eh_t;
int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off, buf_t *buf)
{ {
eh_t *eh; // score array eh_t *eh; // score array
int8_t *qp; // query profile int8_t *qp; // query profile
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
int mem_size = qlen * m + (qlen + 1) * 8;
assert(h0 > 0); assert(h0 > 0);
// allocate memory // allocate memory
qp = malloc(qlen * m); if (mem_size > buf->m) {
eh = calloc(qlen + 1, 8); buf->m = mem_size;
buf->addr = realloc(buf->addr, mem_size);
}
//qp = malloc(qlen * m);
//eh = calloc(qlen + 1, 8);
qp = (int8_t*)buf->addr;
eh = (eh_t*)(buf->addr + qlen * m);
memset(eh, 0, (qlen + 1) * 8);
// generate the query profile // generate the query profile
for (k = i = 0; k < m; ++k) { for (k = i = 0; k < m; ++k) {
const int8_t *p = &mat[k * m]; const int8_t *p = &mat[k * m];
@ -505,7 +514,7 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
end = j + 2 < qlen? j + 2 : qlen; end = j + 2 < qlen? j + 2 : qlen;
//beg = 0; end = qlen; // uncomment this line for debugging //beg = 0; end = qlen; // uncomment this line for debugging
} }
free(eh); free(qp); // free(eh); free(qp);
if (_qle) *_qle = max_j + 1; if (_qle) *_qle = max_j + 1;
if (_tle) *_tle = max_i + 1; if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1; if (_gtle) *_gtle = max_ie + 1;
@ -514,9 +523,12 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
return max; return max;
} }
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off) int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off, buf_t *buf)
{ {
return ksw_extend2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, w, end_bonus, zdrop, h0, qle, tle, gtle, gscore, max_off); if (buf == 0) buf = calloc(1, sizeof(buf_t));
return ksw_extend2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, w, end_bonus, zdrop, h0, qle, tle, gtle, gscore, max_off, buf);
free(buf->addr);
free(buf);
} }
/******************** /********************

7
ksw.h
View File

@ -2,6 +2,7 @@
#define __AC_KSW_H #define __AC_KSW_H
#include <stdint.h> #include <stdint.h>
#include "utils.h"
#define KSW_XBYTE 0x10000 #define KSW_XBYTE 0x10000
#define KSW_XSTOP 0x20000 #define KSW_XSTOP 0x20000
@ -104,10 +105,10 @@ extern "C" {
* *
* @return best semi-local alignment score * @return best semi-local alignment score
*/ */
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off, buf_t *buf);
int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off, buf_t *buf);
int ksw_extend2_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, int ksw_extend2_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off, buf_t *buf);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1,11 +1,16 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <emmintrin.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <immintrin.h> #include <immintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
extern FILE *gfp1, *gfp2, *gfp3;
//#define DEBUG_SW_EXTEND 1
#define NO_VAL -1
#ifdef __GNUC__ #ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x),1) #define LIKELY(x) __builtin_expect((x),1)
#define UNLIKELY(x) __builtin_expect((x),0) #define UNLIKELY(x) __builtin_expect((x),0)
@ -20,12 +25,13 @@
#define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MIN(x, y) ((x) < (y) ? (x) : (y))
#define SIMD_WIDTH 16 #define SIMD_WIDTH 16
typedef struct { size_t m; uint8_t *addr; } buf_t;
extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off, buf_t *buf);
int ksw_extend2_origin(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
int ksw_extend2_origin(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
@ -152,6 +158,7 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
int pos = SIMD_WIDTH - 1 - (( __builtin_clz(mask)) >> 1); \ int pos = SIMD_WIDTH - 1 - (( __builtin_clz(mask)) >> 1); \
mj = j - 1 + pos; \ mj = j - 1 + pos; \
mi = i - 1 - pos; \ mi = i - 1 - pos; \
/*if (m >= max) fprintf(stderr, "%d %d %d %d %d %d %d\n", iend, beg, mi, mj, mask, pos, m);*/ \
} \ } \
} \ } \
} }
@ -186,19 +193,18 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置 int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
int *_gtle, // query全部匹配上的target的长度 int *_gtle, // query全部匹配上的target的长度
int *_gscore, // query的端到端匹配得分 int *_gscore, // query的端到端匹配得分
int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值 int *_max_off, // 取得最大得分时在query和reference上位置差的 最大值
buf_t *buf) // 之前已经开辟过的缓存
{ {
//return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off); //return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
// fprintf(stderr, "qlen: %d, tlen: %d\n", qlen, tlen);
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off); if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M
int16_t *seq, *ref; int16_t *seq, *ref;
uint8_t *mem; uint8_t *mem;
int16_t *qtmem, *vmem; int16_t *qtmem, *vmem;
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH; int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
int Dloop = tlen + qlen; // 循环跳出条件 int Dloop = tlen + qlen; // 循环跳出条件
int span, beg1, end1; // 边界条件计算 int span, beg1, end1; // 边界条件计算
int col_size = qlen + 2 + SIMD_WIDTH; int col_size = qlen + 2 + SIMD_WIDTH;
@ -210,8 +216,15 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
assert(h0 > 0); assert(h0 > 0);
// allocate memory // allocate memory
mem = malloc(mem_size); //mem = malloc(mem_size);
qtmem = (int16_t*)&mem[0];
if (buf->m < mem_size) {
buf->m = mem_size;
buf->addr = realloc(buf->addr, mem_size);
}
mem = buf->addr;
qtmem = (int16_t *)&mem[0];
seq=&qtmem[0]; ref=&qtmem[seq_size]; seq=&qtmem[0]; ref=&qtmem[seq_size];
if (is_left) { if (is_left) {
for (i=0; i<qlen; ++i) seq[i] = query[qlen - 1 - i]; for (i=0; i<qlen; ++i) seq[i] = query[qlen - 1 - i];
@ -259,6 +272,40 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
int m_last=0; int m_last=0;
int iend; int iend;
//#undef KSW_EQUAL
#ifdef KSW_EQUAL
int midx = 1, icheck = 0, checkspecial = 1;
int m3 = 0, m2 = 0, m1 = 0;
//int marr[10] = {0};
//int marr[b];
//memset(marr, 0, 4 * b);
#endif
//int print_flag = 0; //(qlen == 64 && tlen == 123);
#ifdef DEBUG_SW_EXTEND
int dii, djj;
int16_t ins[tlen + 1][qlen + 2];
int16_t del[tlen + 1][qlen + 2];
int16_t score[tlen + 1][qlen + 2];
for (dii = 0; dii <= tlen; ++dii)
{
for (djj = 0; djj <= qlen; ++djj)
{
ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL;
}
}
for (dii = 1; dii <= tlen; ++dii)
{
del[dii][0] = MAX(0, h0 - o_del - e_del * dii);
score[dii][0] = del[dii][0];
}
for (djj = 1; djj <= qlen; ++djj)
{
ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj);
score[0][djj] = ins[0][djj];
}
ins[0][0] = del[0][0] = score[0][0] = h0;
#endif
for (D = 1; LIKELY(D < Dloop); ++D) { for (D = 1; LIKELY(D < Dloop); ++D) {
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
@ -277,18 +324,24 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
iend = D - (beg - 1); // ref开始计算的位置倒序 iend = D - (beg - 1); // ref开始计算的位置倒序
span = end - beg; span = end - beg;
iStart = iend - span - 1; // 0开始的ref索引位置 ibeg = iend - span - 1; // 0开始的ref索引位置
// 每一轮需要记录的数据 // 每一轮需要记录的数据
int m = 0, mj = -1, mi = -1; int m = 0, mj = -1, mi = -1;
max_vec = zero_vec; max_vec = zero_vec;
//if (print_flag)
//{
//fprintf(stderr, "D: %d, iend: %d, jbeg: %d\n", D, iend, beg);
//}
// 要处理边界 // 要处理边界
// 左边界 处理f (insert) // 左边界 处理f (insert)
if (iStart == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
// 上边界 // 上边界
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
else { hA1[beg-1] = 0; eA1[beg-1] = 0; } else if (D & 1) {
hA1[beg - 1] = 0;
hA2[beg - 1] = 0;
}
for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) {
// 取数据 // 取数据
@ -316,17 +369,89 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
SIMD_FIND_MAX; SIMD_FIND_MAX;
#ifdef KSW_EQUAL
#if 0
if (hA1[0] < b && checkspecial) {
int mi;
if (hA1[0] == b - 1) {
icheck = iend + 1;
}
for (mi = 0; mi < b - 1; ++mi) {
if (midx - mi > 0)
marr[mi] = MAX(marr[mi], hA2[midx - mi]);
}
midx += 1;
if (ibeg > icheck)
{
int stopCalc = 0;
for (mi = 0; mi < b - 1; ++mi)
{
stopCalc |= !marr[mi];
}
if (stopCalc)
break;
else
checkspecial = 0;
}
}
#else
if (hA1[0] < 4 && checkspecial) { // b == 4
if (hA1[0] == 3) {
icheck = iend + 1;
} else if (midx == 2) {
m2 = MAX(m2, hA2[midx - 1]);
} else {
m2 = MAX(m2, hA2[midx - 1]);
m1 = MAX(m1, hA2[midx - 2]);
}
m3 = MAX(m3, hA2[midx]);
midx += 1;
if (ibeg > icheck)
{
if (!m1 || !m2 || !m3)
break;
else
checkspecial = 0;
}
//if (print_flag) {
//fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA2[midx + 1], hA2[midx + 2], hA2[midx + 3], midx);
//if (midx > 2) fprintf(stderr, "%d, %d, %d\n", hA2[midx-1], hA2[midx-2], hA2[midx-3]);
//fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, hA1: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA1[0], m1, m2, m3, midx);
//}
}
#endif
#endif
#ifdef DEBUG_SW_EXTEND
for (djj = beg; djj <= end; ++djj)
{
dii = D - djj + 1;
ins[dii][djj] = fA2[djj];
del[dii][djj] = eA2[djj];
score[dii][djj] = hA2[djj];
}
//if (print_flag)
//{
//fprintf(stderr, "score: %d %d %d\n", hA2[beg], hA2[beg+1], hA2[beg+2]);
//}
#endif
// 注意最后跳出循环j的值 // 注意最后跳出循环j的值
j = end + 1; j = end + 1;
if (j == qlen + 1) { if (j == qlen + 1) {
max_ie = gscore > hA2[qlen] ? max_ie : iStart; max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen]; gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
} }
if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点 if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点
//if (m == 0 && m_last < 2) break;
if (m > max) { if (m > max) {
max = m, max_i = mi, max_j = mj; max = m, max_i = mi, max_j = mj;
max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi); max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
} else if (m == max && max_i >= mi && mj > max_j) {
max_i = mi, max_j = mj;
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
} }
else if (zdrop > 0) { else if (zdrop > 0) {
if (mi - max_i > mj - max_j) { if (mi - max_i > mj - max_j) {
@ -347,7 +472,49 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
SWAP_DATA_POINTER; SWAP_DATA_POINTER;
} }
free(mem); #ifdef DEBUG_SW_EXTEND
fprintf(gfp1, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
fprintf(gfp2, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
fprintf(gfp3, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
#endif
#ifdef DEBUG_SW_EXTEND
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
for (djj = 0; djj < qlen; ++djj) {
fprintf(gfp1, "%-4c", "ACGTN"[query[djj]]);
fprintf(gfp2, "%-4c", "ACGTN"[query[djj]]);
fprintf(gfp3, "%-4c", "ACGTN"[query[djj]]);
}
fprintf(gfp1, "\n");
fprintf(gfp2, "\n");
fprintf(gfp3, "\n");
for (dii = 0; dii <= tlen; ++dii)
{
if (dii > 0) {
fprintf(gfp1, "%-4c", "ACGTN"[target[dii - 1]]);
fprintf(gfp2, "%-4c", "ACGTN"[target[dii - 1]]);
fprintf(gfp3, "%-4c", "ACGTN"[target[dii - 1]]);
} else {
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
}
for (djj = 0; djj <= qlen; ++djj)
{
fprintf(gfp1, "%-4d", score[dii][djj]);
fprintf(gfp2, "%-4d", ins[dii][djj]);
fprintf(gfp3, "%-4d", del[dii][djj]);
}
fprintf(gfp1, "\n");
fprintf(gfp2, "\n");
fprintf(gfp3, "\n");
}
#endif
// free(mem);
if (_qle) *_qle = max_j + 1; if (_qle) *_qle = max_j + 1;
if (_tle) *_tle = max_i + 1; if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1; if (_gtle) *_gtle = max_ie + 1;
@ -417,12 +584,39 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.); max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
max_del = max_del > 1? max_del : 1; max_del = max_del > 1? max_del : 1;
w = w < max_del? w : max_del; // TODO: is this necessary? w = w < max_del? w : max_del; // TODO: is this necessary?
// printf("%d\n", w); //fprintf(stderr, "%d\n", w);
// DP loop // DP loop
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1; max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
max_off = 0; max_off = 0;
beg = 0, end = qlen; beg = 0, end = qlen;
//int print_flag = 0; //(qlen == 116 && tlen == 241);
//fprintf(stderr, "%d %d %d\n", print_flag, qlen, tlen);
#ifdef DEBUG_SW_EXTEND
int dii, djj;
int16_t ins[tlen + 1][qlen + 2];
int16_t del[tlen + 1][qlen + 2];
int16_t score[tlen + 1][qlen + 2];
for (dii = 0; dii <= tlen; ++dii)
{
for (djj = 0; djj <= qlen; ++djj)
{
ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL;
}
}
for (dii = 1; dii <= tlen; ++dii)
{
del[dii][0] = MAX(0, h0 - o_del - e_del * dii);
score[dii][0] = del[dii][0];
}
for (djj = 1; djj <= qlen; ++djj)
{
ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj);
score[0][djj] = ins[0][djj];
}
ins[0][0] = del[0][0] = score[0][0] = h0;
#endif
for (i = 0; LIKELY(i < tlen); ++i) { for (i = 0; LIKELY(i < tlen); ++i) {
int t, f = 0, h1, m = 0, mj = -1; int t, f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[ref[i] * qlen]; int8_t *q = &qp[ref[i] * qlen];
@ -435,7 +629,11 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
h1 = h0 - (o_del + e_del * (i + 1)); h1 = h0 - (o_del + e_del * (i + 1));
if (h1 < 0) h1 = 0; if (h1 < 0) h1 = 0;
} else h1 = 0; } else h1 = 0;
//m = h1;
for (j = beg; LIKELY(j < end); ++j) { for (j = beg; LIKELY(j < end); ++j) {
#ifdef DEBUG_SW_EXTEND
ins[i+1][j+1] = f;
#endif
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order: // Similar to SSE2-SW, cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)} // H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
@ -444,16 +642,22 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
eh_t *p = &eh[j]; eh_t *p = &eh[j];
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
p->h = h1; // set H(i,j-1) for the next row p->h = h1; // set H(i,j-1) for the next row
M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M" M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M",保证分值不小于0sw和nw的区别
h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0 h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
h = h > f? h : f; h = h > f? h : f;
h1 = h; // save H(i,j) to h1 for the next column h1 = h; // save H(i,j) to h1 for the next column
#ifdef DEBUG_SW_EXTEND
score[i+1][j+1] = h;
#endif
mj = m > h? mj : j; // record the position where max score is achieved mj = m > h? mj : j; // record the position where max score is achieved
m = m > h? m : h; // m is stored at eh[mj+1] m = m > h? m : h; // m is stored at eh[mj+1]
t = M - oe_del; t = M - oe_del;
t = t > 0? t : 0; t = t > 0? t : 0;
e -= e_del; e -= e_del;
e = e > t? e : t; // computed E(i+1,j) e = e > t? e : t; // computed E(i+1,j)
#ifdef DEBUG_SW_EXTEND
del[i+1][j+1] = e;
#endif
p->e = e; // save E(i+1,j) for the next row p->e = e; // save E(i+1,j) for the next row
t = M - oe_ins; t = M - oe_ins;
t = t > 0? t : 0; t = t > 0? t : 0;
@ -469,6 +673,7 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
if (m > max) { if (m > max) {
max = m, max_i = i, max_j = mj; max = m, max_i = i, max_j = mj;
max_off = max_off > abs(mj - i)? max_off : abs(mj - i); max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
//fprintf(stderr, "%d %d %d %d\n", i, mj, max_off, m);
} else if (zdrop > 0) { } else if (zdrop > 0) {
if (i - max_i > mj - max_j) { if (i - max_i > mj - max_j) {
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break; if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break;
@ -477,12 +682,61 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
} }
} }
// update beg and end for the next round // update beg and end for the next round
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); // 这里为什么不考虑finsert score
beg = j; beg = j;
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j); for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
end = j + 2 < qlen? j + 2 : qlen; end = j + 2 < qlen? j + 2 : qlen;
//beg = 0; end = qlen; // uncomment this line for debugging //beg = 0; end = qlen; // uncomment this line for debugging
//if (print_flag) {
// fprintf(stderr, "beg: %d; end: %d\n", beg, end);
//}
} }
#ifdef DEBUG_SW_EXTEND
fprintf(gfp1, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
fprintf(gfp2, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
fprintf(gfp3, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
#endif
#ifdef DEBUG_SW_EXTEND
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
for (djj = 0; djj < qlen; ++djj)
{
fprintf(gfp1, "%-4c", "ACGTN"[query[djj]]);
fprintf(gfp2, "%-4c", "ACGTN"[query[djj]]);
fprintf(gfp3, "%-4c", "ACGTN"[query[djj]]);
}
fprintf(gfp1, "\n");
fprintf(gfp2, "\n");
fprintf(gfp3, "\n");
for (dii = 0; dii <= tlen; ++dii)
{
if (dii > 0)
{
fprintf(gfp1, "%-4c", "ACGTN"[target[dii - 1]]);
fprintf(gfp2, "%-4c", "ACGTN"[target[dii - 1]]);
fprintf(gfp3, "%-4c", "ACGTN"[target[dii - 1]]);
}
else
{
fprintf(gfp1, "%-4d", -1);
fprintf(gfp2, "%-4d", -1);
fprintf(gfp3, "%-4d", -1);
}
for (djj = 0; djj <= qlen; ++djj)
{
fprintf(gfp1, "%-4d", score[dii][djj]);
fprintf(gfp2, "%-4d", ins[dii][djj]);
fprintf(gfp3, "%-4d", del[dii][djj]);
}
fprintf(gfp1, "\n");
fprintf(gfp2, "\n");
fprintf(gfp3, "\n");
}
#endif
free(eh); free(qp); free(qmem); free(eh); free(qp); free(qmem);
if (_qle) *_qle = max_j + 1; if (_qle) *_qle = max_j + 1;

View File

@ -20,6 +20,8 @@
#define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MIN(x, y) ((x) < (y) ? (x) : (y))
#define SIMD_WIDTH 32 #define SIMD_WIDTH 32
typedef struct { size_t m; uint8_t *addr; } buf_t;
static const uint8_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { static const uint8_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
{0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
@ -204,13 +206,14 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置 int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
int *_gtle, // query全部匹配上的target的长度 int *_gtle, // query全部匹配上的target的长度
int *_gscore, // query的端到端匹配得分 int *_gscore, // query的端到端匹配得分
int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值 int *_max_off, // 取得最大得分时在query和reference上位置差的 最大值
buf_t *buf) // 之前已经开辟过的缓存
{ {
uint8_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M uint8_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H其他的保存上个H E F M
uint8_t *seq, *ref; uint8_t *seq, *ref;
uint8_t *mem, *qtmem, *vmem; uint8_t *mem, *qtmem, *vmem;
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH; int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
int Dloop = tlen + qlen; // 循环跳出条件 int Dloop = tlen + qlen; // 循环跳出条件
int span, beg1, end1; // 边界条件计算 int span, beg1, end1; // 边界条件计算
int col_size = qlen + 2 + SIMD_WIDTH; int col_size = qlen + 2 + SIMD_WIDTH;
@ -222,7 +225,13 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
assert(h0 > 0); assert(h0 > 0);
// allocate memory // allocate memory
mem = malloc(mem_size); //mem = malloc(mem_size);
if (buf->m < mem_size) {
buf->m = mem_size;
buf->addr = realloc(buf->addr, mem_size);
}
mem = buf->addr;
qtmem = &mem[0]; qtmem = &mem[0];
seq=(uint8_t*)&qtmem[0]; ref=(uint8_t*)&qtmem[seq_size]; seq=(uint8_t*)&qtmem[0]; ref=(uint8_t*)&qtmem[seq_size];
if (is_left) { if (is_left) {
@ -272,7 +281,13 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
int m_last=0; int m_last=0;
int iend; int iend;
#ifdef KSW_EQUAL
int midx = 1, icheck = 0, checkspecial = 1;
int m3 = 0, m2 = 0, m1 = 0;
//int marr[10] = {0};
// int marr[b];
// memset(marr, 0, 4 * b);
#endif
for (D = 1; LIKELY(D < Dloop); ++D) { for (D = 1; LIKELY(D < Dloop); ++D) {
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
if (D > tlen) { if (D > tlen) {
@ -290,7 +305,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
iend = D - (beg - 1); // ref开始计算的位置倒序 iend = D - (beg - 1); // ref开始计算的位置倒序
span = end - beg; span = end - beg;
iStart = iend - span - 1; // 0开始的ref索引位置 ibeg = iend - span - 1; // 0开始的ref索引位置
// 每一轮需要记录的数据 // 每一轮需要记录的数据
int m = 0, mj = -1, mi = -1; int m = 0, mj = -1, mi = -1;
@ -298,10 +313,13 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
// 要处理边界 // 要处理边界
// 左边界 处理f (insert) // 左边界 处理f (insert)
if (iStart == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); }
// 上边界 // 上边界
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
else { hA1[beg-1] = 0; eA1[beg-1] = 0; } else if (D & 1) {
hA1[beg - 1] = 0;
hA2[beg - 1] = 0;
}
for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) {
// 取数据 // 取数据
@ -329,11 +347,65 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
SIMD_FIND_MAX; SIMD_FIND_MAX;
#ifdef KSW_EQUAL
#if 0
if (hA1[0] < b && checkspecial) {
int mi;
if (hA1[0] == b - 1) {
icheck = iend + 1;
}
for (mi = 0; mi < b - 1; ++mi) {
if (midx - mi > 0)
marr[mi] = MAX(marr[mi], hA2[midx - mi]);
}
midx += 1;
if (ibeg > icheck)
{
int stopCalc = 0;
for (mi = 0; mi < b - 1; ++mi)
{
stopCalc |= !marr[mi];
}
if (stopCalc)
break;
else
checkspecial = 0;
}
}
#else
if (hA1[0] < 4 && checkspecial)
{ // b == 4
if (hA1[0] == 3)
{
icheck = iend + 1;
}
else if (midx == 2)
{
m2 = MAX(m2, hA2[midx - 1]);
}
else
{
m2 = MAX(m2, hA2[midx - 1]);
m1 = MAX(m1, hA2[midx - 2]);
}
m3 = MAX(m3, hA2[midx]);
midx += 1;
if (ibeg > icheck)
{
if (!m1 || !m2 || !m3)
break;
else
checkspecial = 0;
}
}
#endif
#endif
// 注意最后跳出循环j的值 // 注意最后跳出循环j的值
j = end + 1; j = end + 1;
if (j == qlen + 1) { if (j == qlen + 1) {
max_ie = gscore > hA2[qlen] ? max_ie : iStart; max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen]; gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
} }
if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点 if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点
@ -341,6 +413,10 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
max = m, max_i = mi, max_j = mj; max = m, max_i = mi, max_j = mj;
max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi); max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi);
} }
else if (m == max && max_i >= mi && mj > max_j) {
max_i = mi, max_j = mj;
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
}
else if (zdrop > 0) { else if (zdrop > 0) {
if (mi - max_i > mj - max_j) { if (mi - max_i > mj - max_j) {
if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) break; if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) break;
@ -360,7 +436,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
SWAP_DATA_POINTER; SWAP_DATA_POINTER;
} }
free(mem); // free(mem);
if (_qle) *_qle = max_j + 1; if (_qle) *_qle = max_j + 1;
if (_tle) *_tle = max_i + 1; if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1; if (_gtle) *_gtle = max_ie + 1;

108
out.sam
View File

@ -1,108 +0,0 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
@SQ SN:4 LN:191154276
@SQ SN:5 LN:180915260
@SQ SN:6 LN:171115067
@SQ SN:7 LN:159138663
@SQ SN:8 LN:146364022
@SQ SN:9 LN:141213431
@SQ SN:10 LN:135534747
@SQ SN:11 LN:135006516
@SQ SN:12 LN:133851895
@SQ SN:13 LN:115169878
@SQ SN:14 LN:107349540
@SQ SN:15 LN:102531392
@SQ SN:16 LN:90354753
@SQ SN:17 LN:81195210
@SQ SN:18 LN:78077248
@SQ SN:19 LN:59128983
@SQ SN:20 LN:63025520
@SQ SN:21 LN:48129895
@SQ SN:22 LN:51304566
@SQ SN:X LN:155270560
@SQ SN:Y LN:59373566
@SQ SN:MT LN:16569
@SQ SN:GL000207.1 LN:4262
@SQ SN:GL000226.1 LN:15008
@SQ SN:GL000229.1 LN:19913
@SQ SN:GL000231.1 LN:27386
@SQ SN:GL000210.1 LN:27682
@SQ SN:GL000239.1 LN:33824
@SQ SN:GL000235.1 LN:34474
@SQ SN:GL000201.1 LN:36148
@SQ SN:GL000247.1 LN:36422
@SQ SN:GL000245.1 LN:36651
@SQ SN:GL000197.1 LN:37175
@SQ SN:GL000203.1 LN:37498
@SQ SN:GL000246.1 LN:38154
@SQ SN:GL000249.1 LN:38502
@SQ SN:GL000196.1 LN:38914
@SQ SN:GL000248.1 LN:39786
@SQ SN:GL000244.1 LN:39929
@SQ SN:GL000238.1 LN:39939
@SQ SN:GL000202.1 LN:40103
@SQ SN:GL000234.1 LN:40531
@SQ SN:GL000232.1 LN:40652
@SQ SN:GL000206.1 LN:41001
@SQ SN:GL000240.1 LN:41933
@SQ SN:GL000236.1 LN:41934
@SQ SN:GL000241.1 LN:42152
@SQ SN:GL000243.1 LN:43341
@SQ SN:GL000242.1 LN:43523
@SQ SN:GL000230.1 LN:43691
@SQ SN:GL000237.1 LN:45867
@SQ SN:GL000233.1 LN:45941
@SQ SN:GL000204.1 LN:81310
@SQ SN:GL000198.1 LN:90085
@SQ SN:GL000208.1 LN:92689
@SQ SN:GL000191.1 LN:106433
@SQ SN:GL000227.1 LN:128374
@SQ SN:GL000228.1 LN:129120
@SQ SN:GL000214.1 LN:137718
@SQ SN:GL000221.1 LN:155397
@SQ SN:GL000209.1 LN:159169
@SQ SN:GL000218.1 LN:161147
@SQ SN:GL000220.1 LN:161802
@SQ SN:GL000213.1 LN:164239
@SQ SN:GL000211.1 LN:166566
@SQ SN:GL000199.1 LN:169874
@SQ SN:GL000217.1 LN:172149
@SQ SN:GL000216.1 LN:172294
@SQ SN:GL000215.1 LN:172545
@SQ SN:GL000205.1 LN:174588
@SQ SN:GL000219.1 LN:179198
@SQ SN:GL000224.1 LN:179693
@SQ SN:GL000223.1 LN:180455
@SQ SN:GL000195.1 LN:182896
@SQ SN:GL000212.1 LN:186858
@SQ SN:GL000222.1 LN:186861
@SQ SN:GL000200.1 LN:187035
@SQ SN:GL000193.1 LN:189789
@SQ SN:GL000194.1 LN:191469
@SQ SN:GL000225.1 LN:211173
@SQ SN:GL000192.1 LN:547496
@SQ SN:NC_007605 LN:171823
@SQ SN:hs37d5 LN:35477943
@HD VN:1.5 SO:unsorted GO:query
@RG ID:normal SM:normal PL:illumina LB:normal PG:bwa
@PG ID:bwa PN:bwa VN:0.7.17-r1198-dirty CL:./bwa mem -t 1 -M -R @RG\tID:normal\tSM:normal\tPL:illumina\tLB:normal\tPG:bwa /home/zzh/reference/human_g1k_v37_decoy.fasta /home/zzh/fastq/diff_r1.fq /home/zzh/fastq/diff_r2.fq -o ./out.sam
A00289:748:HLCH3DSX3:4:1102:24749:34929 97 16 75642166 60 150M 12 66451372 0 AAAGCCATTGGCAGTAACATCCAAAGGCTGCTCAGGAACTGCACTCCAGCTGATGGCTATGAAAAGATAAGATTCTAGGGTTAAAACTGCTCAATTTCTTGTGAAAGTCAGAAGTATTTTAATAAAAATATTTCTCTGACTCTACAGACC FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF: NM:i:0 MD:Z:150 MC:Z:17S91M42S AS:i:150 XS:i:0 RG:Z:normal
A00289:748:HLCH3DSX3:4:1102:24749:34929 145 12 66451372 0 17S91M42S 16 75642166 0 TTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATAAATAATTCTTTGTGAAACCCCT F,F,FFF,:FFF,,F,F,,F,F:,FFF:F:FFF:FFFFFFF:F,F,F,FFFFFF::,,FFFFFFF,:F,:,F,::,,F:FF:,FF:F::F::FFFFFFFFFF,F:F,FFFF,FFFFF,FF,F,,F,F,,,,,,:,,,,,,,,,,,,,,,F NM:i:0 MD:Z:91 MC:Z:150M AS:i:91 XS:i:90 RG:Z:normal SA:Z:12,49106363,+,117S33M,0,0; XA:Z:12,-66451373,35S90M25S,0;12,-66451373,28S90M32S,0;6,-160521757,23S83M44S,0;15,-77910871,23S79M48S,0;
A00289:748:HLCH3DSX3:4:1102:24749:34929 385 12 49106363 0 117H33M 16 75642166 0 AAAAAAAAAAAAAAATAAAAAAAAAAAAAAAAA FFF:F:FFF,:F,F,,F,F,,FFF:,FFF,F,F NM:i:0 MD:Z:33 MC:Z:150M AS:i:33 XS:i:32 RG:Z:normal SA:Z:12,66451372,-,17S91M42S,0,0; XA:Z:4,-19079503,32M118S,0;15,-55218248,32M118S,0;X,-149551159,32M118S,0;
A00289:748:HLCH3DSX3:4:1104:6234:8656 113 12 66451373 8 12S91M47S 22 18873826 0 TTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTTTGTTTTTTTTTTTTTTGGGGGGGGGGGGGGGTGAGGTGGTCTGTG ,,:FFF:,:,::,,,,,,FF,FFF,,FF,F,FFF:,,:FFFFFF:FFF,:,:FF:F:::::FFF::,::F:FFFFFF:,FFFF,,,:FFFFFFFF:,,::,,,,,:,,FFF:,,,FF:F,:,FFFF:F::,,FF:,:FFFFFF:FFFFFF NM:i:1 MD:Z:13T77 MC:Z:150M AS:i:86 XS:i:81 RG:Z:normal SA:Z:3,121668661,-,89S32M29S,0,0; XA:Z:12,-66451372,25S91M34S,2;6,-160521757,19S84M47S,1;15,-77910874,26S77M47S,0;7,-107410599,25M1I3M15D74M47S,16;
A00289:748:HLCH3DSX3:4:1104:6234:8656 369 3 121668661 0 89H32M29H 22 18873826 0 TTTTTTTTTTTTTGTTTGTTTTTTTTTTTTTT FFFFFF:,,::,,,,,:,,FFF:,,,FF:F,: NM:i:0 MD:Z:32 MC:Z:150M AS:i:32 XS:i:32 RG:Z:normal SA:Z:12,66451373,-,12S91M47S,8,1;
A00289:748:HLCH3DSX3:4:1104:6234:8656 177 22 18873826 22 150M 12 66451373 0 CTATACCCCACATCCTCATTCCCACCAGATGTCTTGGATCATGGAGGCTCTCCAGACAAAAGCCAGCAGTTAAGCTCCAGATTTCCTATAGAATCCTTTTCTAACAACCAGTGAGTGATTCCAGAATACGTACCATTGAATGTGCTCCCT FFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:12S91M47S AS:i:150 XS:i:140 RG:Z:normal
A00289:748:HLCH3DSX3:4:1104:1741:18129 97 11 113220686 60 150M 12 66451373 0 TATTTCACCCGGATGCTGACTCCTTTTTTGGTTTCCTTGCAAGGTTACCCTGTTGGGATGTTGTTAGTTGGAGGACAGCTACCTCTGAGGTTATTATGGTTTTTTGCAGATTATTGGAAGCGCTGGTGTCATTTCTTGATTTCTCGGATA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFF:FFFFFFF:FFFF NM:i:0 MD:Z:150 MC:Z:90M60S AS:i:150 XS:i:19 RG:Z:normal
A00289:748:HLCH3DSX3:4:1104:1741:18129 145 12 66451373 7 90M60S 11 113220686 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTCCCAC FF:FFFF,F,,FFFFFFFF:FFFF:,F,,:F:F::::,F::::FFF::F::,,:FFF,:::FFFFFFFFFFF:FFFFFFFF,FFFF:FFF,:,:,,::FFFFFFFFFFF,FF:FFF:F:F:F:,F,,::F,,F,,FFFFF:F:,,,,,,, NM:i:1 MD:Z:81T8 MC:Z:150M AS:i:85 XS:i:81 RG:Z:normal SA:Z:8,112168232,-,79S59M12S,0,1; XA:Z:6,-160521757,81M69S,0;4,-82515382,13S69M68S,0;
A00289:748:HLCH3DSX3:4:1104:1741:18129 401 8 112168232 0 79H59M12H 11 113220686 0 TTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTT FF,FFFF:FFF,:,:,,::FFFFFFFFFFF,FF:FFF:F:F:F:,F,,::F,,F,,FFF NM:i:1 MD:Z:46T12 MC:Z:150M AS:i:54 XS:i:53 RG:Z:normal SA:Z:12,66451373,-,90M60S,7,1;
A00289:748:HLCH3DSX3:4:1105:27281:29700 113 X 117117442 60 150M 12 66451372 0 CCCTGAAGTTGATTTTTTGAATATATCCTTGAAATCGGCTTGTTTTAATGACAATTGCCTCCGTCTCCTGAAAGTGGAGTGCAAAAAGAAACCTGGTTAATTGTAGTTTTATTATATTTAGTTCAAATTTTTCAAAGCATACCCTGACAG FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:12S89M49S AS:i:150 XS:i:0 RG:Z:normal
A00289:748:HLCH3DSX3:4:1105:27281:29700 177 12 66451372 0 12S89M49S X 117117442 0 TTTTTTAAAAATATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTATTTATTTTTTTATTTTTTTATTATTTTTTTTTTTTTTTTTTTTTTTTTGGTGCCGCCCCC ,,,,:,,F:F,,,,:FF,F:FF,::F:F:F,,F::FFF,FFFFFFFFF,FFFFFF::FFF:F:FFFFFFF,FFFFFFF::FFFF:,:FF:,,F,:,FFFFF,:,,:F,,,,:,FFFFF:F,,FFF:FFFFF:,F,F,,,:,,,,,:FFFF NM:i:3 MD:Z:73T3T3T7 MC:Z:150M AS:i:74 XS:i:72 RG:Z:normal SA:Z:15,85942734,+,20S48M82S,0,1;
A00289:748:HLCH3DSX3:4:1105:27281:29700 417 15 85942734 0 20H48M82H X 117117442 0 AAAAAAAAAAAAAAAAATAATAAAAAAATAAAAAAATAAATAAATAAA FFFF:FFF,,F:FFFFF,:,,,,F:,,:,FFFFF,:,F,,:FF:,:FF NM:i:1 MD:Z:17A30 MC:Z:150M AS:i:43 XS:i:36 RG:Z:normal SA:Z:12,66451372,-,12S89M49S,0,3; XA:Z:11,-118049564,69S35M7D34M12S,11;2,+204499190,12S31M1D21M86S,3;
A00289:748:HLCH3DSX3:4:1106:2880:19288 97 4 169630630 0 29M1I4M1I68M9D22M25S 12 66451373 0 CAGCCTGGGTGACAGAGTGAGACTCCAGCTAAGGCAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAATGAAGGAAGGAAGGAGGGAGGGAGGGAGGGAGGGAGAGATCGGAAGAGCACACGTCTGAAC FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFF:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF NM:i:14 MD:Z:18C8T59G13^AAGGAAGGA22 MC:Z:36S91M23S AS:i:73 XS:i:72 RG:Z:normal
A00289:748:HLCH3DSX3:4:1106:2880:19288 145 12 66451373 9 36S91M23S 4 169630630 0 TATATACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTGGGTGGGTGGGTGGGTTGGCAG :,,,,,,,:FF:FFFF:,:FF:,F,F,FFF,,,FFF:FFFFFFFFF,:,,F:F,FFFF,:FFFFFFFFFFFFFFFFFFFF:FF:FFFF:FFFFFFF:FFF:FFFFFFF:FF,FFF:,:::,:F::,,:,,:,:,,,,,:,::,,,:F,FF NM:i:1 MD:Z:10T80 MC:Z:29M1I4M1I68M9D22M25S AS:i:86 XS:i:80 RG:Z:normal SA:Z:15,55218228,-,9S52M89S,0,0; XA:Z:15,-77910871,47S80M23S,0;6,-160521761,47S80M23S,0;6,-160521757,47S79M24S,0;7,-107410599,25S28M15D74M23S,16;
A00289:748:HLCH3DSX3:4:1106:2880:19288 401 15 55218228 0 9H52M89H 4 169630630 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTT FF:FFFF:,:FF:,F,F,FFF,,,FFF:FFFFFFFFF,:,,F:F,FFFF,:F NM:i:0 MD:Z:52 MC:Z:29M1I4M1I68M9D22M25H AS:i:52 XS:i:50 RG:Z:normal SA:Z:12,66451373,-,36S91M23S,9,1;
A00289:748:HLCH3DSX3:4:1107:2365:20462 97 22 18737670 0 150M 12 66451372 0 GAGGACACCATACAAGCCGTCGCTAACAAGGACACTGTACACAACATCGCTAATGACGGCACCGTACAAGACATCACCAATGAGGGCGCTTTATACGACATTGCTAATGATACCGACAAGGCACGCTAACGTGGACGCTGTACACGACAT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:21S90M39S AS:i:150 XS:i:150 RG:Z:normal
A00289:748:HLCH3DSX3:4:1107:2365:20462 145 12 66451372 13 21S90M39S 22 18737670 0 TTTTATATATTTATATTTATTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTTTGTTTTTTGTGGTGGGGGGGGGGTTTGGCATAGCAT F:F,,F,F,F:FFF,,,:,F:,:,,:,,,,:,F:,,FFFFFFFFFFFFF,F:FFFFFFFFF:FFFF,,FFFFF:FFF::FFFFF,F:F,,F:F,FFFFFFFF,:F:F::::,,:F,F,,F,:,,,,,::,,,,:,,:,,,,,,F,F::FF NM:i:0 MD:Z:90 MC:Z:150M AS:i:90 XS:i:84 RG:Z:normal XA:Z:6,-160521757,28S84M38S,0;15,-77910871,32S80M38S,0;7,-107410642,38S74M38S,0;
A00289:748:HLCH3DSX3:4:1107:26377:28385 113 GL000220.1 122227 60 150M 6 160521757 0 TGCTCTGTTGCTCACGCTGGTCTCAAACTCCTGGCCTTGACGCTTCTCCCGTCACATCCGCCGTCTGGTTGTTGAAATGAGCATCTCTCGTAAAATGGAAAAGATGAAAGAAATAAACACGAAGACGGAAAGCACGGTGTGAACGTTTCT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:85M65S AS:i:150 XS:i:113 RG:Z:normal
A00289:748:HLCH3DSX3:4:1107:26377:28385 177 6 160521757 0 85M65S GL000220.1 122227 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGTGTTTTTTTTTTGTTGGGGGTTGGGTGTTGGGGGGCGGGGGGGGGGGCGCCCCCCCCCAAGGAGA F:FF,F,,F,F,FFFFFF,:FF,:FF:F,:FF,FFFF:FFF:FF,FFFFFFFFFFFFFFFFFFFFFFFF:FF,F,F:F,:FF,,,FFFF,F,:,,,,,:::,,:,,,,::,,,,,,,,,F,,FF,:F:,,F,,,,,FF:FF:F,FFFF:: NM:i:0 MD:Z:85 MC:Z:150M AS:i:85 XS:i:84 RG:Z:normal XA:Z:12,-66451380,84M66S,0;12,-66451373,83M67S,0;15,-77910871,4S80M66S,0;

View File

@ -216,9 +216,9 @@ static void process_seqs(const pem_opt_t *opt, int n_, bseq1_t *seqs, int64_t cn
int main_pemerge(int argc, char *argv[]) int main_pemerge(int argc, char *argv[])
{ {
int c, flag = 0, i, n, min_ovlp = 10; int c, flag = 0, i, n, m = 0, min_ovlp = 10;
int64_t cnt[MAX_ERR+1]; int64_t cnt[MAX_ERR + 1], seq_size = 0;
bseq1_t *bseq; bseq1_t *bseq = 0;
gzFile fp, fp2 = 0; gzFile fp, fp2 = 0;
kseq_t *ks, *ks2 = 0; kseq_t *ks, *ks2 = 0;
pem_opt_t *opt; pem_opt_t *opt;
@ -269,10 +269,12 @@ int main_pemerge(int argc, char *argv[])
} }
memset(cnt, 0, 8 * (MAX_ERR+1)); memset(cnt, 0, 8 * (MAX_ERR+1));
while ((bseq = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) { bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2, 1, &seq_size, &m, &bseq);
while (n > 0) {
process_seqs(opt, n, bseq, cnt); process_seqs(opt, n, bseq, cnt);
free(bseq); bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2, 1, &seq_size, &m, &bseq);
} }
if (bseq != 0) free(bseq);
fprintf(stderr, "%12ld %s\n", (long)cnt[0], err_msg[0]); fprintf(stderr, "%12ld %s\n", (long)cnt[0], err_msg[0]);
for (i = 1; i <= MAX_ERR; ++i) for (i = 1; i <= MAX_ERR; ++i)

23
run.sh
View File

@ -1,6 +1,14 @@
thread=1 thread=32
n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq n_r1=~/fastq/ZY2105177532213010_L4_1.fq
n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq n_r2=~/fastq/ZY2105177532213010_L4_2.fq
#n_r1=~/fastq/na12878/nas_1.fq
#n_r2=~/fastq/na12878/nas_2.fq
#n_r1=~/fastq/na12878/na_1.fq
#n_r2=~/fastq/na12878/na_2.fq
#n_r1=~/fastq/n_r1.fq
#n_r2=~/fastq/n_r2.fq
#n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq
#n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
#n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq #n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq
#n_r2=~/data/fastq/ZY2105177532213000/sn_r2.fq #n_r2=~/data/fastq/ZY2105177532213000/sn_r2.fq
#reference=~/data/reference/human_g1k_v37_decoy.fasta #reference=~/data/reference/human_g1k_v37_decoy.fasta
@ -8,6 +16,8 @@ n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
#n_r2=~/fastq/sn_r2.fq #n_r2=~/fastq/sn_r2.fq
#n_r1=~/fastq/ssn_r1.fq #n_r1=~/fastq/ssn_r1.fq
#n_r2=~/fastq/ssn_r2.fq #n_r2=~/fastq/ssn_r2.fq
#n_r1=~/fastq/ERR194147_1_120w.fastq
#n_r2=~/fastq/ERR194147_2_120w.fastq
#n_r1=~/fastq/tiny_n_r1.fq #n_r1=~/fastq/tiny_n_r1.fq
#n_r2=~/fastq/tiny_n_r2.fq #n_r2=~/fastq/tiny_n_r2.fq
#n_r1=~/fastq/diff_r1.fq #n_r1=~/fastq/diff_r1.fq
@ -15,9 +25,12 @@ n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq
#n_r1=~/fastq/d_r1.fq #n_r1=~/fastq/d_r1.fq
#n_r2=~/fastq/d_r2.fq #n_r2=~/fastq/d_r2.fq
reference=~/reference/human_g1k_v37_decoy.fasta reference=~/reference/human_g1k_v37_decoy.fasta
#out=./ssn.sam #out=./all.sam
#out=./sn.sam
#out=./ssn-x1.sam
#out=./out.sam #out=./out.sam
out=/dev/null out=/dev/null
#out=./na12878.sam
#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ #time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ # /home/zzh/data/reference/human_g1k_v37_decoy.fasta \
# /home/zzh/data/fastq/nm1.fq \ # /home/zzh/data/fastq/nm1.fq \
@ -29,7 +42,7 @@ out=/dev/null
# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \ # /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \
# -o /dev/null # -o /dev/null
time ./bwa mem -b 64 -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ time ./bwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
$reference \ $reference \
$n_r1 \ $n_r1 \
$n_r2 \ $n_r2 \

View File

@ -46,12 +46,14 @@ extern int64_t time_ksw_extend2,
time_bwt_sa, time_bwt_sa,
time_bwt_sa_read, time_bwt_sa_read,
time_bns, time_bns,
time_core_process; time_work1,
time_work2,
time_flt_chain;
extern int64_t dn, n16, n17, n18, n19, nall, num_sa; extern int64_t dn, n16, n17, n18, n19, nall, num_sa;
extern int64_t s1n, s2n, s3n, s1l, s2l, s3l; extern int64_t s1n, s2n, s3n, s1l, s2l, s3l;
extern int64_t g_num_smem2; extern int64_t g_num_smem2;
extern FILE *fp1; extern FILE *gfp1, *gfp2, *gfp3;
#endif #endif
@ -83,6 +85,8 @@ typedef struct {
typedef struct { size_t n, m; uint64_t *a; } uint64_v; typedef struct { size_t n, m; uint64_t *a; } uint64_v;
typedef struct { size_t n, m; pair64_t *a; } pair64_v; typedef struct { size_t n, m; pair64_t *a; } pair64_v;
typedef struct { size_t m; uint8_t *addr; } buf_t;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif