r202: trim bad chain ends before extension
This fixes a few more FP long INDELs towards the end of alignments.
This commit is contained in:
parent
21ca564112
commit
f2ef48878a
57
align.c
57
align.c
|
|
@ -176,9 +176,39 @@ static void mm_filter_bad_seeds(int n, mm128_t *a, int max_space, int max_diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mm_skip_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int32_t *as, int32_t *cnt)
|
||||||
|
{
|
||||||
|
int32_t i, l;
|
||||||
|
*as = r->as, *cnt = r->cnt;
|
||||||
|
if (r->cnt < 3) return;
|
||||||
|
l = a[r->as].y >> 32 & 0xff;
|
||||||
|
for (i = r->as + 1; i < r->as + r->cnt - 1; ++i) {
|
||||||
|
int32_t lq, lr, min, max;
|
||||||
|
lr = (int32_t)a[i].x - (int32_t)a[i-1].x;
|
||||||
|
lq = (int32_t)a[i].y - (int32_t)a[i-1].y;
|
||||||
|
min = lr < lq? lr : lq;
|
||||||
|
max = lr > lq? lr : lq;
|
||||||
|
if (max - min > l >> 1) *as = i;
|
||||||
|
l += min;
|
||||||
|
if (l >= bw << 1) break;
|
||||||
|
}
|
||||||
|
*cnt = r->as + r->cnt - *as;
|
||||||
|
l = a[r->as + r->cnt - 1].y >> 32 & 0xff;
|
||||||
|
for (i = r->as + r->cnt - 2; i > *as; --i) {
|
||||||
|
int32_t lq, lr, min, max;
|
||||||
|
lr = (int32_t)a[i+1].x - (int32_t)a[i].x;
|
||||||
|
lq = (int32_t)a[i+1].y - (int32_t)a[i].y;
|
||||||
|
min = lr < lq? lr : lq;
|
||||||
|
max = lr > lq? lr : lq;
|
||||||
|
if (max - min > l >> 1) *cnt = i + 1 - *as;
|
||||||
|
l += min;
|
||||||
|
if (l >= bw) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez)
|
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez)
|
||||||
{
|
{
|
||||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
|
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
|
||||||
uint8_t *tseq, *qseq;
|
uint8_t *tseq, *qseq;
|
||||||
int32_t i, l, bw, dropped = 0, rs0, re0, qs0, qe0;
|
int32_t i, l, bw, dropped = 0, rs0, re0, qs0, qe0;
|
||||||
int32_t rs, re, qs, qe;
|
int32_t rs, re, qs, qe;
|
||||||
|
|
@ -190,13 +220,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
bw = (int)(opt->bw * 1.5 + 1.);
|
bw = (int)(opt->bw * 1.5 + 1.);
|
||||||
|
|
||||||
r2->cnt = 0;
|
r2->cnt = 0;
|
||||||
mm_adjust_minier(mi, qseq0, &a[r->as], &rs, &qs);
|
mm_skip_ends(r, a, opt->bw, &as1, &cnt1);
|
||||||
mm_adjust_minier(mi, qseq0, &a[r->as + r->cnt - 1], &re, &qe);
|
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
|
||||||
mm_filter_bad_seeds(r->cnt, &a[r->as], opt->max_gap, 100);
|
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
|
||||||
|
mm_filter_bad_seeds(cnt1, &a[as1], opt->max_gap, 100);
|
||||||
|
|
||||||
// compute rs0 and qs0
|
// compute rs0 and qs0
|
||||||
if (r->split && r->as > 0) {
|
if (r->split && as1 > 0) {
|
||||||
mm_adjust_minier(mi, qseq0, &a[r->as-1], &rs0, &qs0);
|
mm_adjust_minier(mi, qseq0, &a[as1-1], &rs0, &qs0);
|
||||||
} else {
|
} else {
|
||||||
if (qs > 0 && rs > 0) { // actually this is always true
|
if (qs > 0 && rs > 0) { // actually this is always true
|
||||||
l = qs < opt->max_gap? qs : opt->max_gap;
|
l = qs < opt->max_gap? qs : opt->max_gap;
|
||||||
|
|
@ -238,13 +269,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
re1 = rs, qe1 = qs;
|
re1 = rs, qe1 = qs;
|
||||||
assert(qs1 >= 0 && rs1 >= 0);
|
assert(qs1 >= 0 && rs1 >= 0);
|
||||||
|
|
||||||
for (i = 1; i < r->cnt; ++i) { // gap filling
|
for (i = 1; i < cnt1; ++i) { // gap filling
|
||||||
if (a[r->as+i].y>>41&1) continue;
|
if (a[as1+i].y>>41&1) continue;
|
||||||
mm_adjust_minier(mi, qseq0, &a[r->as + i], &re, &qe);
|
mm_adjust_minier(mi, qseq0, &a[as1 + i], &re, &qe);
|
||||||
re1 = re, qe1 = qe;
|
re1 = re, qe1 = qe;
|
||||||
if (i == r->cnt - 1 || (a[r->as+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
|
if (i == cnt1 - 1 || (a[as1+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
|
||||||
int bw1 = bw;
|
int bw1 = bw;
|
||||||
if (a[r->as+i].y>>40&1)
|
if (a[as1+i].y>>40&1)
|
||||||
bw1 = qe - qs > re - rs? qe - qs : re - rs;
|
bw1 = qe - qs > re - rs? qe - qs : re - rs;
|
||||||
qseq = &qseq0[rev][qs];
|
qseq = &qseq0[rev][qs];
|
||||||
mm_idx_getseq(mi, rid, rs, re, tseq);
|
mm_idx_getseq(mi, rid, rs, re, tseq);
|
||||||
|
|
@ -256,13 +287,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle.
|
if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle.
|
||||||
int j;
|
int j;
|
||||||
for (j = i - 1; j >= 0; --j)
|
for (j = i - 1; j >= 0; --j)
|
||||||
if ((int32_t)a[r->as + j].x < re + ez->max_t)
|
if ((int32_t)a[as1 + j].x < re + ez->max_t)
|
||||||
break;
|
break;
|
||||||
dropped = 1;
|
dropped = 1;
|
||||||
r->p->dp_score += ez->max;
|
r->p->dp_score += ez->max;
|
||||||
re1 = rs + (ez->max_t + 1);
|
re1 = rs + (ez->max_t + 1);
|
||||||
qe1 = qs + (ez->max_q + 1);
|
qe1 = qs + (ez->max_q + 1);
|
||||||
if (r->cnt - (j + 1) >= opt->min_cnt)
|
if (cnt1 - (j + 1) >= opt->min_cnt)
|
||||||
mm_split_reg(r, r2, j + 1, qlen, a);
|
mm_split_reg(r, r2, j + 1, qlen, a);
|
||||||
break;
|
break;
|
||||||
} else r->p->dp_score += ez->score;
|
} else r->p->dp_score += ez->score;
|
||||||
|
|
|
||||||
6
chain.c
6
chain.c
|
|
@ -67,10 +67,8 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
||||||
for (i = n_u = 0; i < n; ++i) {
|
for (i = n_u = 0; i < n; ++i) {
|
||||||
if (t[i] == 0 && v[i] >= min_sc) {
|
if (t[i] == 0 && v[i] >= min_sc) {
|
||||||
j = i;
|
j = i;
|
||||||
if (f[j] < v[j]) { // find the point that maximizes f[]
|
while (j >= 0 && f[j] < v[j]) j = p[j]; // find the point that maximizes f[]
|
||||||
while (j >= 0 && f[j] < v[j]) j = p[j];
|
if (j < 0) j = i; // TODO: this should really be assert(j>=0)
|
||||||
if (j < 0) j = i; // TODO: this should really be assert(j>=0)
|
|
||||||
}
|
|
||||||
u[n_u++] = (uint64_t)f[j] << 32 | j;
|
u[n_u++] = (uint64_t)f[j] << 32 | j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue