diff --git a/align.c b/align.c index b56bde2..8e62a4c 100644 --- a/align.c +++ b/align.c @@ -268,20 +268,30 @@ static inline void mm_adjust_minier(const mm_idx_t *mi, uint8_t *const qseq0[2], } } -static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int diff_thres, int max_ext_len, int max_ext_cnt) +static int *collect_long_gaps(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int *n_) { - int max_st, max_en, n, i, k, max, *K; + int i, n, *K; + *n_ = 0; for (i = 1, n = 0; i < cnt1; ++i) { // count the number of gaps longer than min_gap int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x); if (gap < -min_gap || gap > min_gap) ++n; } - if (n <= 1) return; + if (n <= 1) return 0; K = (int*)kmalloc(km, n * sizeof(int)); for (i = 1, n = 0; i < cnt1; ++i) { // store the positions of long gaps int gap = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x); if (gap < -min_gap || gap > min_gap) K[n++] = i; } + *n_ = n; + return K; +} + +static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min_gap, int diff_thres, int max_ext_len, int max_ext_cnt) +{ + int max_st, max_en, n, i, k, max, *K; + K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n); + if (K == 0) return; max = 0, max_st = max_en = -1; for (k = 0;; ++k) { // traverse long gaps int gap, l, n_ins = 0, n_del = 0, qs, rs, max_diff = 0, max_diff_l = -1; @@ -314,6 +324,41 @@ static void mm_filter_bad_seeds(void *km, int as1, int cnt1, mm128_t *a, int min kfree(km, K); } +static void mm_filter_bad_seeds_alt(void *km, int as1, int cnt1, mm128_t *a, int min_gap) +{ + int n, k, *K; + K = collect_long_gaps(km, as1, cnt1, a, min_gap, &n); + if (K == 0) return; + for (k = 0; k < n;) { + int i = K[k], l; + int gap1 = ((int32_t)a[as1 + i].y - a[as1 + i - 1].y) - ((int32_t)a[as1 + i].x - a[as1 + i - 1].x); + int re1 = (int32_t)a[as1 + i].x; + int qe1 = (int32_t)a[as1 + i].y; + gap1 = gap1 > 0? gap1 : -gap1; + for (l = k + 1; l < n; ++l) { + int j = K[l]; + int gap2 = ((int32_t)a[as1 + j].y - a[as1 + j - 1].y) - ((int32_t)a[as1 + j].x - a[as1 + j - 1].x); + int q_span_pre = a[as1 + j - 1].y >> 32 & 0xff; + int rs2 = (int32_t)a[as1 + j - 1].x + q_span_pre; + int qs2 = (int32_t)a[as1 + j - 1].x + q_span_pre; + int m = rs2 - re1 < qs2 - qe1? rs2 - re1 : qs2 - qe1; + gap2 = gap2 > 0? gap2 : -gap2; + if (m > gap1 + gap2) break; + re1 = (int32_t)a[as1 + j].x; + qe1 = (int32_t)a[as1 + j].y; + gap1 = gap2; + } + if (l > k + 1) { + int j, end = K[l - 1]; + for (j = K[k]; j < end; ++j) + a[as1 + j].y |= MM_SEED_IGNORE; + a[as1 + end].y |= MM_SEED_LONG_JOIN; + } + k = l; + } + kfree(km, K); +} + static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int min_match, int32_t *as, int32_t *cnt) { int32_t i, l, m; @@ -450,6 +495,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int mm_fix_bad_ends(r, a, opt->bw, opt->min_chain_score * 2, &as1, &cnt1); } mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10); + mm_filter_bad_seeds_alt(km, as1, cnt1, a, 50); mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs); mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe); } diff --git a/main.c b/main.c index 4dc7205..c83371a 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "getopt.h" #endif -#define MM_VERSION "2.10-r768-dirty" +#define MM_VERSION "2.10-r769-dirty" #ifdef __linux__ #include