r769: filter out seeds breaking long gaps
This commit is contained in:
parent
aef7b0744c
commit
759f8e4ac9
52
align.c
52
align.c
|
|
@ -268,20 +268,30 @@ static inline void mm_adjust_minier(const mm_idx_t *mi, uint8_t *const qseq0[2],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int diff_thres, int max_ext_len, int max_ext_cnt)
|
static int *collect_long_gaps(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int *n_)
|
||||||
{
|
{
|
||||||
int max_st, max_en, n, i, k, max, *K;
|
int i, n, *K;
|
||||||
|
*n_ = 0;
|
||||||
for (i = 1, n = 0; i < cnt1; ++i) { // count the number of gaps longer than min_gap
|
for (i = 1, n = 0; i < cnt1; ++i) { // count the number of gaps longer than min_gap
|
||||||
int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x);
|
int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x);
|
||||||
if (gap < -min_gap || gap > min_gap) ++n;
|
if (gap < -min_gap || gap > min_gap) ++n;
|
||||||
}
|
}
|
||||||
if (n <= 1) return;
|
if (n <= 1) return 0;
|
||||||
K = (int*)kmalloc(km, n * sizeof(int));
|
K = (int*)kmalloc(km, n * sizeof(int));
|
||||||
for (i = 1, n = 0; i < cnt1; ++i) { // store the positions of long gaps
|
for (i = 1, n = 0; i < cnt1; ++i) { // store the positions of long gaps
|
||||||
int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x);
|
int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x);
|
||||||
if (gap < -min_gap || gap > min_gap)
|
if (gap < -min_gap || gap > min_gap)
|
||||||
K[n++] = i;
|
K[n++] = i;
|
||||||
}
|
}
|
||||||
|
*n_ = n;
|
||||||
|
return K;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int diff_thres, int max_ext_len, int max_ext_cnt)
|
||||||
|
{
|
||||||
|
int max_st, max_en, n, i, k, max, *K;
|
||||||
|
K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n);
|
||||||
|
if (K == 0) return;
|
||||||
max = 0, max_st = max_en = -1;
|
max = 0, max_st = max_en = -1;
|
||||||
for (k = 0;; ++k) { // traverse long gaps
|
for (k = 0;; ++k) { // traverse long gaps
|
||||||
int gap, l, n_ins = 0, n_del = 0, qs, rs, max_diff = 0, max_diff_l = -1;
|
int gap, l, n_ins = 0, n_del = 0, qs, rs, max_diff = 0, max_diff_l = -1;
|
||||||
|
|
@ -314,6 +324,41 @@ static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min
|
||||||
kfree(km, K);
|
kfree(km, K);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mm_filter_bad_seeds_alt(void *km, int as1, int cnt1, mm128_t *a, int min_gap)
|
||||||
|
{
|
||||||
|
int n, k, *K;
|
||||||
|
K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n);
|
||||||
|
if (K == 0) return;
|
||||||
|
for (k = 0; k < n;) {
|
||||||
|
int i = K[k], l;
|
||||||
|
int gap1 = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x);
|
||||||
|
int re1 = (int32_t)a[as1 + i].x;
|
||||||
|
int qe1 = (int32_t)a[as1 + i].y;
|
||||||
|
gap1 = gap1 > 0? gap1 : -gap1;
|
||||||
|
for (l = k + 1; l < n; ++l) {
|
||||||
|
int j = K[l];
|
||||||
|
int gap2 = ((int32_t)a[as1 + j].y - a[as1 + j - 1].y) - ((int32_t)a[as1 + j].x - a[as1 + j - 1].x);
|
||||||
|
int q_span_pre = a[as1 + j - 1].y >> 32 & 0xff;
|
||||||
|
int rs2 = (int32_t)a[as1 + j - 1].x + q_span_pre;
|
||||||
|
int qs2 = (int32_t)a[as1 + j - 1].x + q_span_pre;
|
||||||
|
int m = rs2 - re1 < qs2 - qe1? rs2 - re1 : qs2 - qe1;
|
||||||
|
gap2 = gap2 > 0? gap2 : -gap2;
|
||||||
|
if (m > gap1 + gap2) break;
|
||||||
|
re1 = (int32_t)a[as1 + j].x;
|
||||||
|
qe1 = (int32_t)a[as1 + j].y;
|
||||||
|
gap1 = gap2;
|
||||||
|
}
|
||||||
|
if (l > k + 1) {
|
||||||
|
int j, end = K[l - 1];
|
||||||
|
for (j = K[k]; j < end; ++j)
|
||||||
|
a[as1 + j].y |= MM_SEED_IGNORE;
|
||||||
|
a[as1 + end].y |= MM_SEED_LONG_JOIN;
|
||||||
|
}
|
||||||
|
k = l;
|
||||||
|
}
|
||||||
|
kfree(km, K);
|
||||||
|
}
|
||||||
|
|
||||||
static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int min_match, int32_t *as, int32_t *cnt)
|
static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int min_match, int32_t *as, int32_t *cnt)
|
||||||
{
|
{
|
||||||
int32_t i, l, m;
|
int32_t i, l, m;
|
||||||
|
|
@ -450,6 +495,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
mm_fix_bad_ends(r, a, opt->bw, opt->min_chain_score * 2, &as1, &cnt1);
|
mm_fix_bad_ends(r, a, opt->bw, opt->min_chain_score * 2, &as1, &cnt1);
|
||||||
}
|
}
|
||||||
mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10);
|
mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10);
|
||||||
|
mm_filter_bad_seeds_alt(km, as1, cnt1, a, 50);
|
||||||
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
|
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
|
||||||
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
|
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue