seed1和seed2结果都正确
This commit is contained in:
parent
d41b8da061
commit
5d8ace386f
41
bwamem.c
41
bwamem.c
|
|
@ -154,7 +154,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
// fprintf(stderr, "\n");
|
||||
|
||||
// first pass: find all SMEMs
|
||||
fprintf(fp1, "seq: %ld\n", dn++);
|
||||
//fprintf(fp1, "seq: %ld\n", dn++);
|
||||
//fprintf(stderr, "seq: %ld\n", dn++);
|
||||
//dn ++;
|
||||
while (x < len) {
|
||||
|
|
@ -162,7 +162,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
#ifdef SHOW_PERF
|
||||
int64_t tmp_time = realtime_msec();
|
||||
#endif
|
||||
// x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
//x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
x = fmt_smem(fmt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
#ifdef SHOW_PERF
|
||||
tmp_time = realtime_msec() - tmp_time;
|
||||
|
|
@ -170,9 +170,14 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
#endif
|
||||
for (i = 0; i < a->mem1.n; ++i) {
|
||||
bwtintv_t *p = &a->mem1.a[i];
|
||||
fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info);
|
||||
//fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info);
|
||||
//fprintf(stderr, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info);
|
||||
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
|
||||
// if (slen < 16) ++n16;
|
||||
// if (slen < 17) ++n17;
|
||||
// if (slen < 18) ++n18;
|
||||
// if (slen < 19) ++n19;
|
||||
// ++nall;
|
||||
max_seed_len = fmax(max_seed_len, slen);
|
||||
if (slen >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, a->mem, *p);
|
||||
|
|
@ -181,7 +186,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
}
|
||||
// second pass: find MEMs inside a long SMEM
|
||||
//if (max_seed_len == len)
|
||||
goto collect_intv_end;
|
||||
// goto collect_intv_end;
|
||||
|
||||
old_n = a->mem.n;
|
||||
for (k = 0; k < old_n; ++k) {
|
||||
|
|
@ -191,15 +196,29 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
#ifdef SHOW_PERF
|
||||
int64_t tmp_time = realtime_msec();
|
||||
#endif
|
||||
bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
|
||||
// bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
|
||||
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, &a->mem1, a->tmpv);
|
||||
#ifdef SHOW_PERF
|
||||
tmp_time = realtime_msec() - tmp_time;
|
||||
__sync_fetch_and_add(&time_bwt_smem1a, tmp_time);
|
||||
#endif
|
||||
for (i = 0; i < a->mem1.n; ++i)
|
||||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
||||
for (i = 0; i < a->mem1.n; ++i) {
|
||||
bwtintv_t *p = &a->mem1.a[i];
|
||||
//fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info);
|
||||
//fprintf(stderr, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info);
|
||||
int slen = (uint32_t)p->info - (p->info >> 32);
|
||||
if (slen < 16) ++n16;
|
||||
if (slen < 17) ++n17;
|
||||
if (slen < 18) ++n18;
|
||||
if (slen < 19) ++n19;
|
||||
++nall;
|
||||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info >> 32) >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// goto collect_intv_end;
|
||||
|
||||
// third pass: LAST-like
|
||||
if (opt->max_mem_intv > 0) {
|
||||
x = 0;
|
||||
|
|
@ -207,17 +226,17 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn
|
|||
if (seq[x] < 4) {
|
||||
if (1) {
|
||||
bwtintv_t m;
|
||||
x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
|
||||
if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
|
||||
} else { // for now, we never come to this block which is slower
|
||||
#ifdef SHOW_PERF
|
||||
int64_t tmp_time = realtime_msec();
|
||||
#endif
|
||||
x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
|
||||
x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
|
||||
#ifdef SHOW_PERF
|
||||
tmp_time = realtime_msec() - tmp_time;
|
||||
__sync_fetch_and_add(&time_bwt_smem1a, tmp_time);
|
||||
#endif
|
||||
if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
|
||||
} else { // for now, we never come to this block which is slower
|
||||
x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
|
||||
for (i = 0; i < a->mem1.n; ++i)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
}
|
||||
|
|
|
|||
4
bwt.c
4
bwt.c
|
|
@ -315,7 +315,7 @@ int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t
|
|||
void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back)
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
#if 1
|
||||
#if 0
|
||||
int64_t tmp_time = realtime_msec();
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -331,7 +331,7 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b
|
|||
ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
|
||||
ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
|
||||
#ifdef SHOW_PERF
|
||||
#if 1
|
||||
#if 0
|
||||
tmp_time = realtime_msec() - tmp_time;
|
||||
__sync_fetch_and_add(&time_bwt_extend, tmp_time);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ int64_t time_ksw_extend2 = 0,
|
|||
time_bwt_sa = 0,
|
||||
time_bwt_sa_read = 0;
|
||||
|
||||
int64_t dn = 0;
|
||||
int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0;
|
||||
|
||||
FILE *fp1;
|
||||
|
||||
|
|
@ -437,6 +437,11 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads);
|
||||
fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads);
|
||||
fprintf(stderr, "time_bwt_sa_read: %f s\n", time_bwt_sa_read / 1000.0 / opt->n_threads);
|
||||
fprintf(stderr, "all seed num: %ld\n", nall);
|
||||
fprintf(stderr, "all seed num litter than 19: %ld\n", n19);
|
||||
fprintf(stderr, "all seed num litter than 18: %ld\n", n18);
|
||||
fprintf(stderr, "all seed num litter than 17: %ld\n", n17);
|
||||
fprintf(stderr, "all seed num litter than 16: %ld\n", n16);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fclose(fp1);
|
||||
|
|
|
|||
83
fmt_idx.c
83
fmt_idx.c
|
|
@ -343,7 +343,7 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
|
|||
}
|
||||
|
||||
// 扩展两个个碱基,计算bwt base为b的pre-bwt str中各个碱基的occ
|
||||
void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4])
|
||||
inline void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4])
|
||||
{
|
||||
uint32_t x = 0;
|
||||
uint32_t *p, *q, tmp;
|
||||
|
|
@ -616,7 +616,7 @@ inline static uint32_t build_backward_kmer(const uint8_t *q, int start_pos, int
|
|||
qbit |= q[i] << ((HASH_KMER_LEN - 1 - j) << 1);
|
||||
}
|
||||
*base_consumed = start_pos - i;
|
||||
return qbit;
|
||||
return (~qbit) & ((1 << (HASH_KMER_LEN << 1)) - 1);
|
||||
}
|
||||
|
||||
// 当x为0,或者q[x-1]为N时,只需要前向搜索即可
|
||||
|
|
@ -645,7 +645,7 @@ int fmt_smem_forward(const FMTIndex *fmt, int len, const uint8_t *q, int x, bwti
|
|||
|
||||
// 继续向前扩展
|
||||
for (i = x + kmer_len; i + 1 < len; i += 2) {
|
||||
//if (ik.x[2] < 5)
|
||||
//if (ik.x[2] < 2)
|
||||
// goto fmt_smem_forward_end;
|
||||
if (q[i] < 4 && q[i + 1] < 4) // 两个都可以扩展
|
||||
{
|
||||
|
|
@ -692,6 +692,7 @@ fmt_smem_forward_end:
|
|||
#endif
|
||||
#endif
|
||||
return ret;
|
||||
//return len;
|
||||
}
|
||||
|
||||
// 找smem(seed)
|
||||
|
|
@ -699,13 +700,12 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
|
|||
{
|
||||
int i, j, ret, kmer_len;
|
||||
bwtintv_t ik, ok1, ok2;
|
||||
// bwtintv_t tik, tok1, tok2;
|
||||
bwtintv_v a[1], *curr;
|
||||
uint32_t qbit = 0;
|
||||
mem->n = 0;
|
||||
|
||||
if (q[x] > 3) return x + 1;
|
||||
//if (x == 0 || q[x-1] > 3) return fmt_smem_forward(fmt, len, q, x, mem); // 只用向前扩展
|
||||
// if (x == 0 || q[x-1] > 3) return fmt_smem_forward(fmt, len, q, x, mem); // 只用向前扩展
|
||||
|
||||
if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
|
||||
kv_init(a[0]);
|
||||
|
|
@ -715,9 +715,6 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
|
|||
fmt_kmer_get(fmt, &ik, qbit, 0); // 初始碱基位置
|
||||
ik.info = x + 1;
|
||||
|
||||
//fmt_set_intv(fmt, q[x], tik);
|
||||
//tik.info = x + 1;
|
||||
|
||||
// check change of the interval size and whether the interval size is too small to be extended further
|
||||
#define CHECK_INTV_CHANGE(iv, ov, end_pos) \
|
||||
if (ov.x[2] != iv.x[2]) { kv_push(bwtintv_t, *curr, iv); if (ov.x[2] < min_intv) break; } iv = ov; iv.info = end_pos
|
||||
|
|
@ -726,8 +723,6 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv
|
|||
|
||||
// 处理kmer对应的匹配信息
|
||||
for (j = 1, curr->n = 0; j < kmer_len; ++j) {
|
||||
//fmt_extend1(fmt, &tik, &tok1, 0, 3 - q[x + j]);
|
||||
//tik = tok1;
|
||||
fmt_kmer_get(fmt, &ok1, qbit, j);
|
||||
CHECK_INTV_CHANGE(ik, ok1, x + j + 1);
|
||||
}
|
||||
|
|
@ -771,69 +766,59 @@ backward_search:
|
|||
ret = curr->a[0].info; // this will be the returned value,扩展到的最远的位置
|
||||
|
||||
// 按照种子进行遍历,反向扩展
|
||||
#define CHECK_PUT_MEM(ok, pos, intv) \
|
||||
if (ok.x[2] < min_intv) { \
|
||||
if (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32) { \
|
||||
(intv).info |= (uint64_t)(pos) << 32; \
|
||||
kv_push(bwtintv_t, *mem, intv); \
|
||||
} \
|
||||
break; }
|
||||
#define CHECK_ADD_MEM(pos, intv, mem) \
|
||||
if (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32) { (intv).info |= (uint64_t)(pos) << 32; kv_push(bwtintv_t, *mem, (intv)); }
|
||||
|
||||
#define CHECK_INTV_ADD_MEM(ok, pos, intv, mem) \
|
||||
if (ok.x[2] < min_intv) { CHECK_ADD_MEM(pos, intv, mem); break; }
|
||||
|
||||
for (j = 0; j < curr->n; ++j)
|
||||
{
|
||||
bwtintv_t *p = &curr->a[j]; // 前向扩展的种子
|
||||
//if (p->info - x < HASH_KMER_LEN) {
|
||||
// // 创建反向kmer
|
||||
// uint32_t qbit = build_backward_kmer(q, p->info - 1, &kmer_len);
|
||||
// fmt_kmer_get(fmt, &ik, qbit, kmer_len - 1);
|
||||
//}
|
||||
// for (i = p->info - kmer_len; i > 0 i -= 2)
|
||||
|
||||
for (i = x - 1; i > 0; i -= 2)
|
||||
if (p->info - x < HASH_KMER_LEN) {
|
||||
uint32_t qbit = build_backward_kmer(q, p->info - 1, &kmer_len); // 创建反向kmer
|
||||
i = 1;
|
||||
do { fmt_kmer_get(fmt, &ik, qbit, kmer_len - i++); } while (ik.x[2] < min_intv);
|
||||
p->x[0] = ik.x[1]; p->x[1] = ik.x[0]; p->x[2] = ik.x[2];
|
||||
i = p->info - (kmer_len - i + 3);
|
||||
} else {
|
||||
i = x - 1;
|
||||
}
|
||||
for (; i > 0; i -= 2)
|
||||
{
|
||||
if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展
|
||||
{
|
||||
fmt_extend2(fmt, p, &ok1, &ok2, 1, q[i], q[i - 1]);
|
||||
CHECK_PUT_MEM(ok1, i + 1, *p);
|
||||
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
|
||||
ok1.info = p->info;
|
||||
CHECK_PUT_MEM(ok2, i, ok1);
|
||||
ok2.info = p->info;
|
||||
*p = ok2;
|
||||
CHECK_INTV_ADD_MEM(ok2, i, ok1, mem);
|
||||
ok2.info = p->info; *p = ok2;
|
||||
}
|
||||
else if (q[i] < 4) // 只能扩展一个
|
||||
{
|
||||
fmt_extend1(fmt, p, &ok1, 1, q[i]);
|
||||
CHECK_PUT_MEM(ok1, i + 1, *p);
|
||||
} else
|
||||
{ // 不能扩展
|
||||
if (mem->n == 0 || (i + 1) < mem->a[mem->n - 1].info >> 32)
|
||||
{
|
||||
p->info |= (uint64_t)(i + 1) << 32;
|
||||
kv_push(bwtintv_t, *mem, *p);
|
||||
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
|
||||
ok1.info = p->info; *p = ok1;
|
||||
}
|
||||
else
|
||||
{ // 不能扩展
|
||||
CHECK_ADD_MEM(i + 1, *p, mem);
|
||||
goto fmt_smem_end;
|
||||
}
|
||||
}
|
||||
if (i == 0) { // 扩展到了第一个碱基
|
||||
for (; i == 0; --i)
|
||||
{ // 扩展到了第一个碱基
|
||||
if (q[i] < 4) {
|
||||
fmt_extend1(fmt, p, &ok1, 1, q[i]);
|
||||
CHECK_PUT_MEM(ok1, i + 1, *p);
|
||||
CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem);
|
||||
ok1.info = p->info; *p = ok1;
|
||||
} else {
|
||||
if (mem->n == 0 || (i + 1) < mem->a[mem->n - 1].info >> 32)
|
||||
{
|
||||
p->info |= (uint64_t)(i + 1) << 32;
|
||||
kv_push(bwtintv_t, *mem, *p);
|
||||
}
|
||||
CHECK_ADD_MEM(i + 1, *p, mem);
|
||||
goto fmt_smem_end;
|
||||
}
|
||||
--i;
|
||||
}
|
||||
if (i == -1) {
|
||||
if (mem->n == 0 || (i + 1) < mem->a[mem->n - 1].info >> 32)
|
||||
{
|
||||
p->info |= (uint64_t)(i + 1) << 32;
|
||||
kv_push(bwtintv_t, *mem, *p);
|
||||
}
|
||||
CHECK_ADD_MEM(i + 1, *p, mem);
|
||||
goto fmt_smem_end;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue