r929: added simplified LAST-like seeding
This commit is contained in:
parent
25d45512d9
commit
038af2a551
21
bwamem.c
21
bwamem.c
|
|
@ -121,7 +121,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
|
|||
// first pass: find all SMEMs
|
||||
while (x < len) {
|
||||
if (seq[x] < 4) {
|
||||
x = bwt_smem1a(bwt, len, seq, x, start_width, split_len, opt->max_mem_intv, &a->mem1, a->tmpv);
|
||||
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
for (i = 0; i < a->mem1.n; ++i) {
|
||||
bwtintv_t *p = &a->mem1.a[i];
|
||||
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||
|
|
@ -131,7 +131,6 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
|
|||
} else ++x;
|
||||
}
|
||||
// second pass: find MEMs inside a long SMEM
|
||||
if (opt->max_mem_intv > 0) goto sort_intv;
|
||||
old_n = a->mem.n;
|
||||
for (k = 0; k < old_n; ++k) {
|
||||
bwtintv_t *p = &a->mem.a[k];
|
||||
|
|
@ -142,8 +141,24 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
|
|||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
}
|
||||
// third pass: LAST-like
|
||||
if (opt->max_mem_intv > 0) {
|
||||
x = 0;
|
||||
while (x < len) {
|
||||
if (seq[x] < 4) {
|
||||
if (1) {
|
||||
bwtintv_t m;
|
||||
x = bwt_seed_strategy1(bwt, len, seq, x, opt->max_mem_intv, &m);
|
||||
if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
|
||||
} else { // for now, we never come to this block which is slower
|
||||
x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
|
||||
for (i = 0; i < a->mem1.n; ++i)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
}
|
||||
} else ++x;
|
||||
}
|
||||
}
|
||||
// sort
|
||||
sort_intv:
|
||||
ks_introsort(mem_intv, a->mem.n, a->mem.a);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ const bwtintv_v *smem_next(smem_i *itr)
|
|||
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
||||
if (itr->start == itr->len) return 0;
|
||||
ori_start = itr->start;
|
||||
itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_len, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM
|
||||
itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM
|
||||
return itr->matches;
|
||||
}
|
||||
|
||||
|
|
|
|||
44
bwt.c
44
bwt.c
|
|
@ -285,8 +285,8 @@ static void bwt_reverse_intvs(bwtintv_v *p)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, int max_len, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
|
||||
// NOTE: $max_intv is not currently used in BWA-MEM
|
||||
int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
|
||||
{
|
||||
int i, j, c, ret;
|
||||
bwtintv_t ik, ok[4];
|
||||
|
|
@ -302,10 +302,12 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv,
|
|||
ik.info = x + 1;
|
||||
|
||||
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
|
||||
if (q[i] < 4) { // an A/C/G/T base
|
||||
if (ik.x[2] < max_intv) { // an interval small enough
|
||||
kv_push(bwtintv_t, *curr, ik);
|
||||
break;
|
||||
} else if (q[i] < 4) { // an A/C/G/T base
|
||||
c = 3 - q[i]; // complement of q[i]
|
||||
bwt_extend(bwt, &ik, ok, 0);
|
||||
if (ik.x[2] < max_intv && (i - x > max_len || ik.x[2] == 1)) break;
|
||||
if (ok[c].x[2] != ik.x[2]) { // change of the interval size
|
||||
kv_push(bwtintv_t, *curr, ik);
|
||||
if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
|
||||
|
|
@ -325,13 +327,8 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv,
|
|||
c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base
|
||||
for (j = 0, curr->n = 0; j < prev->n; ++j) {
|
||||
bwtintv_t *p = &prev->a[j];
|
||||
int max_stop = 0;
|
||||
bwt_extend(bwt, p, ok, 1);
|
||||
if (ok[c].x[2] != p->x[2]) { // change of interval size
|
||||
if (p->info - i - 1 > max_len && p->x[2] < max_intv)
|
||||
max_stop = 1;
|
||||
}
|
||||
if (c < 0 || ok[c].x[2] < min_intv || max_stop) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
|
||||
if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1);
|
||||
if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
|
||||
if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches
|
||||
if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
|
||||
ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
|
||||
|
|
@ -355,7 +352,30 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv,
|
|||
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
|
||||
{
|
||||
return bwt_smem1a(bwt, len, q, x, min_intv, INT_MAX, 0, mem, tmpvec);
|
||||
return bwt_smem1a(bwt, len, q, x, min_intv, 0, mem, tmpvec);
|
||||
}
|
||||
|
||||
int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int max_intv, bwtintv_t *mem)
|
||||
{
|
||||
int i, c;
|
||||
bwtintv_t ik, ok[4];
|
||||
|
||||
memset(mem, 0, sizeof(bwtintv_t));
|
||||
if (q[x] > 3) return x + 1;
|
||||
bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
|
||||
for (i = x + 1; i < len; ++i) { // forward search
|
||||
if (q[i] < 4) { // an A/C/G/T base
|
||||
c = 3 - q[i]; // complement of q[i]
|
||||
bwt_extend(bwt, &ik, ok, 0);
|
||||
if (ok[c].x[2] < max_intv) {
|
||||
*mem = ok[c];
|
||||
mem->info = (uint64_t)x<<32 | (i + 1);
|
||||
return i + 1;
|
||||
}
|
||||
ik = ok[c];
|
||||
} else return i + 1;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/*************************
|
||||
|
|
|
|||
4
bwt.h
4
bwt.h
|
|
@ -119,9 +119,9 @@ extern "C" {
|
|||
* Return the end of the longest exact match starting from _x_.
|
||||
*/
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||
int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, int max_len, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||
int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||
|
||||
// SMEM iterator interface
|
||||
int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int max_intv, bwtintv_t *mem);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue