r202: trim bad chain ends before extension

This fixes a few more FP long INDELs towards the end of alignments.
This commit is contained in:
Heng Li 2017-07-25 19:53:19 -04:00
parent 21ca564112
commit f2ef48878a
3 changed files with 47 additions and 18 deletions

57
align.c
View File

@ -176,9 +176,39 @@ static void mm_filter_bad_seeds(int n, mm128_t *a, int max_space, int max_diff)
}
}
static void mm_skip_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int32_t *as, int32_t *cnt)
{
int32_t i, l;
*as = r->as, *cnt = r->cnt;
if (r->cnt < 3) return;
l = a[r->as].y >> 32 & 0xff;
for (i = r->as + 1; i < r->as + r->cnt - 1; ++i) {
int32_t lq, lr, min, max;
lr = (int32_t)a[i].x - (int32_t)a[i-1].x;
lq = (int32_t)a[i].y - (int32_t)a[i-1].y;
min = lr < lq? lr : lq;
max = lr > lq? lr : lq;
if (max - min > l >> 1) *as = i;
l += min;
if (l >= bw << 1) break;
}
*cnt = r->as + r->cnt - *as;
l = a[r->as + r->cnt - 1].y >> 32 & 0xff;
for (i = r->as + r->cnt - 2; i > *as; --i) {
int32_t lq, lr, min, max;
lr = (int32_t)a[i+1].x - (int32_t)a[i].x;
lq = (int32_t)a[i+1].y - (int32_t)a[i].y;
min = lr < lq? lr : lq;
max = lr > lq? lr : lq;
if (max - min > l >> 1) *cnt = i + 1 - *as;
l += min;
if (l >= bw) break;
}
}
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez)
{
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
uint8_t *tseq, *qseq;
int32_t i, l, bw, dropped = 0, rs0, re0, qs0, qe0;
int32_t rs, re, qs, qe;
@ -190,13 +220,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
bw = (int)(opt->bw * 1.5 + 1.);
r2->cnt = 0;
mm_adjust_minier(mi, qseq0, &a[r->as], &rs, &qs);
mm_adjust_minier(mi, qseq0, &a[r->as + r->cnt - 1], &re, &qe);
mm_filter_bad_seeds(r->cnt, &a[r->as], opt->max_gap, 100);
mm_skip_ends(r, a, opt->bw, &as1, &cnt1);
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
mm_filter_bad_seeds(cnt1, &a[as1], opt->max_gap, 100);
// compute rs0 and qs0
if (r->split && r->as > 0) {
mm_adjust_minier(mi, qseq0, &a[r->as-1], &rs0, &qs0);
if (r->split && as1 > 0) {
mm_adjust_minier(mi, qseq0, &a[as1-1], &rs0, &qs0);
} else {
if (qs > 0 && rs > 0) { // actually this is always true
l = qs < opt->max_gap? qs : opt->max_gap;
@ -238,13 +269,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
re1 = rs, qe1 = qs;
assert(qs1 >= 0 && rs1 >= 0);
for (i = 1; i < r->cnt; ++i) { // gap filling
if (a[r->as+i].y>>41&1) continue;
mm_adjust_minier(mi, qseq0, &a[r->as + i], &re, &qe);
for (i = 1; i < cnt1; ++i) { // gap filling
if (a[as1+i].y>>41&1) continue;
mm_adjust_minier(mi, qseq0, &a[as1 + i], &re, &qe);
re1 = re, qe1 = qe;
if (i == r->cnt - 1 || (a[r->as+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
if (i == cnt1 - 1 || (a[as1+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
int bw1 = bw;
if (a[r->as+i].y>>40&1)
if (a[as1+i].y>>40&1)
bw1 = qe - qs > re - rs? qe - qs : re - rs;
qseq = &qseq0[rev][qs];
mm_idx_getseq(mi, rid, rs, re, tseq);
@ -256,13 +287,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle.
int j;
for (j = i - 1; j >= 0; --j)
if ((int32_t)a[r->as + j].x < re + ez->max_t)
if ((int32_t)a[as1 + j].x < re + ez->max_t)
break;
dropped = 1;
r->p->dp_score += ez->max;
re1 = rs + (ez->max_t + 1);
qe1 = qs + (ez->max_q + 1);
if (r->cnt - (j + 1) >= opt->min_cnt)
if (cnt1 - (j + 1) >= opt->min_cnt)
mm_split_reg(r, r2, j + 1, qlen, a);
break;
} else r->p->dp_score += ez->score;

View File

@ -67,10 +67,8 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
for (i = n_u = 0; i < n; ++i) {
if (t[i] == 0 && v[i] >= min_sc) {
j = i;
if (f[j] < v[j]) { // find the point that maximizes f[]
while (j >= 0 && f[j] < v[j]) j = p[j];
if (j < 0) j = i; // TODO: this should really be assert(j>=0)
}
while (j >= 0 && f[j] < v[j]) j = p[j]; // find the point that maximizes f[]
if (j < 0) j = i; // TODO: this should really be assert(j>=0)
u[n_u++] = (uint64_t)f[j] << 32 | j;
}
}

2
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r201-dirty"
#define MM_VERSION "2.0-r202-dirty"
void liftrlimit()
{