better treatment for micro-repeat
This commit is contained in:
parent
45b0d3423a
commit
d890c7997c
52
bwamem.c
52
bwamem.c
|
|
@ -344,12 +344,13 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
|
||||||
|
|
||||||
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_t *a)
|
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_t *a)
|
||||||
{ // FIXME: in general, we SHOULD check funny seed patterns such as contained seeds. When that happens, we should use a SW or extend more seeds
|
{ // FIXME: in general, we SHOULD check funny seed patterns such as contained seeds. When that happens, we should use a SW or extend more seeds
|
||||||
int i, qbeg, max = 0, max_i = -1;
|
int i, k;
|
||||||
int64_t rlen, rbeg, rmax[2], tmp;
|
int64_t rlen, rmax[2], tmp, max = 0, max_i = 0;
|
||||||
const mem_seed_t *s;
|
const mem_seed_t *s;
|
||||||
uint8_t *rseq = 0;
|
uint8_t *rseq = 0;
|
||||||
|
mem_alnreg_t best;
|
||||||
|
|
||||||
memset(a, 0, sizeof(mem_alnreg_t));
|
memset(&best, 0, sizeof(mem_alnreg_t));
|
||||||
// get the max possible span
|
// get the max possible span
|
||||||
rmax[0] = l_pac<<1; rmax[1] = 0;
|
rmax[0] = l_pac<<1; rmax[1] = 0;
|
||||||
for (i = 0; i < c->n; ++i) {
|
for (i = 0; i < c->n; ++i) {
|
||||||
|
|
@ -359,31 +360,32 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
e = t->rbeg + t->len + ((l_query - t->qbeg - t->len) + cal_max_gap(opt, l_query - t->qbeg - t->len));
|
e = t->rbeg + t->len + ((l_query - t->qbeg - t->len) + cal_max_gap(opt, l_query - t->qbeg - t->len));
|
||||||
rmax[0] = rmax[0] < b? rmax[0] : b;
|
rmax[0] = rmax[0] < b? rmax[0] : b;
|
||||||
rmax[1] = rmax[1] > e? rmax[1] : e;
|
rmax[1] = rmax[1] > e? rmax[1] : e;
|
||||||
if (max < t->len) max = t->len, max_i = i;
|
if (t->len > max) max = t->len, max_i = i;
|
||||||
}
|
}
|
||||||
// get the start and end of the seeded region
|
|
||||||
rbeg = c->seeds[max_i].rbeg; qbeg = c->seeds[max_i].qbeg;
|
|
||||||
// retrieve the reference sequence
|
// retrieve the reference sequence
|
||||||
rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen);
|
rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen);
|
||||||
|
|
||||||
if (qbeg) { // left extension of the first seed
|
for (k = 0; k < c->n;) {
|
||||||
|
s = &c->seeds[k];
|
||||||
|
memset(a, 0, sizeof(mem_alnreg_t));
|
||||||
|
if (s->qbeg) { // left extension
|
||||||
uint8_t *rs, *qs;
|
uint8_t *rs, *qs;
|
||||||
int qle, tle;
|
int qle, tle;
|
||||||
qs = malloc(qbeg);
|
qs = malloc(s->qbeg);
|
||||||
for (i = 0; i < qbeg; ++i) qs[i] = query[qbeg - 1 - i];
|
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
|
||||||
tmp = rbeg - rmax[0];
|
tmp = s->rbeg - rmax[0];
|
||||||
rs = malloc(tmp);
|
rs = malloc(tmp);
|
||||||
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
||||||
a->score = ksw_extend(qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, c->seeds[max_i].len * opt->a, 0, &qle, &tle);
|
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, s->len * opt->a, 0, &qle, &tle);
|
||||||
a->qb = qbeg - qle; a->rb = rbeg - tle;
|
a->qb = s->qbeg - qle; a->rb = s->rbeg - tle;
|
||||||
free(qs); free(rs);
|
free(qs); free(rs);
|
||||||
} else a->score = c->seeds[max_i].len * opt->a, a->qb = 0, a->rb = rbeg;
|
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||||
|
|
||||||
s = &c->seeds[max_i];
|
|
||||||
if (s->qbeg + s->len != l_query) { // right extension of the first seed
|
if (s->qbeg + s->len != l_query) { // right extension of the first seed
|
||||||
int qle, tle, qe, re;
|
int qle, tle, qe, re;
|
||||||
int16_t *qw = 0;
|
int16_t *qw = 0;
|
||||||
qe = s->qbeg + s->len; re = s->rbeg + s->len - rmax[0];
|
qe = s->qbeg + s->len;
|
||||||
|
re = s->rbeg + s->len - rmax[0];
|
||||||
#if 0 // FIXME: I am not sure if the following block works. Comment it out if SW extension gives unexpected result.
|
#if 0 // FIXME: I am not sure if the following block works. Comment it out if SW extension gives unexpected result.
|
||||||
if (c->n > 1) { // generate $qw
|
if (c->n > 1) { // generate $qw
|
||||||
int j, l = rmax[1] - (s->rbeg + s->len);
|
int j, l = rmax[1] - (s->rbeg + s->len);
|
||||||
|
|
@ -398,22 +400,24 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
else if (qw[x] >= 0) qw[x] = -2; // in a seed overlap, do not set any constraint
|
else if (qw[x] >= 0) qw[x] = -2; // in a seed overlap, do not set any constraint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// for (i = 0; i < l; ++i) printf("%d:%d\t", i, qw[i]); putchar('\n');
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, qw, &qle, &tle);
|
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, qw, &qle, &tle);
|
||||||
a->qe = qe + qle; a->re = rmax[0] + re + tle;
|
a->qe = qe + qle; a->re = rmax[0] + re + tle;
|
||||||
free(qw);
|
free(qw);
|
||||||
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
||||||
/*
|
if (mem_debug >= 2) printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re);
|
||||||
a->is_all = 1;
|
// check how many seeds have been covered
|
||||||
if (c->n > 1) { // check if all the seeds have been included
|
for (i = k + 1; i < c->n; ++i) {
|
||||||
s = &c->seeds[c->n - 1];
|
const mem_seed_t *t = &c->seeds[i];
|
||||||
if (s->qbeg + s->len > a->qe) a->is_all = 0;
|
if (t->rbeg + t->len > a->re || t->qbeg + t->len > a->qe)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
*/
|
if (i >= c->n) break; // all seeds are included; no need to proceed
|
||||||
if (mem_debug >= 2)
|
if (a->score > best.score) best = *a;
|
||||||
printf("[%d] score=%d\t[%d,%d) <=> [%lld,%lld)\n", c->n, a->score, a->qb, a->qe, a->rb, a->re);
|
k = i;
|
||||||
|
}
|
||||||
|
if (a->score < best.score) *a = best;
|
||||||
free(rseq);
|
free(rseq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue