diff --git a/hit.c b/hit.c index c2e8180..2cf7a63 100644 --- a/hit.c +++ b/hit.c @@ -252,7 +252,7 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id, mm_set_sam_pri(n_regs, regs); } -void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r) +void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r) { if (pri_ratio > 0.0f && *n_ > 0) { int i, k, n = *n_, n_2nd = 0; @@ -264,6 +264,9 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, if (!(r[i].qs == r[p].qs && r[i].qe == r[p].qe && r[i].rid == r[p].rid && r[i].rs == r[p].rs && r[i].re == r[p].re)) // not identical hits r[k++] = r[i], ++n_2nd; else if (r[i].p) free(r[i].p); + } else if (check_strand && n_2nd < best_n && r[i].score > min_strand_sc && r[i].rev != r[p].rev) { + r[i].strand_retained = 1; + r[k++] = r[i], ++n_2nd; } else if (r[i].p) free(r[i].p); } if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync() @@ -271,6 +274,19 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, } } +int mm_filter_strand_retained(int n_regs, mm_reg1_t *r) +{ + int i, k; + for (i = k = 0; i < n_regs; ++i) { + int p = r[i].parent; + if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f) { + if (k < i) r[k++] = r[i]; + else ++k; + } + } + return k; +} + void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs) { // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out int i, k; diff --git a/main.c b/main.c index e60a6a4..fb8e1bc 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.22-r1105-dirty" +#define MM_VERSION "2.22-r1108-dirty" #ifdef __linux__ #include diff --git a/map.c b/map.c index eb541ca..98d3d7f 100644 --- a/map.c +++ b/map.c @@ -212,7 +212,7 @@ static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_i { if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s) mm_set_parent(km, opt->mask_level, opt->mask_len, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); - if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); + if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, 1, opt->max_gap, n_regs, regs); else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs); } } @@ -223,7 +223,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s) mm_set_parent(km, opt->mask_level, opt->mask_len, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); - mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); + mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, 0, opt->max_gap, n_regs, regs); mm_set_sam_pri(*n_regs, regs); } return regs; @@ -335,8 +335,10 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x)); chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a); - if (!is_sr && !(opt->flag&MM_F_QSTRAND)) + if (!is_sr && !(opt->flag&MM_F_QSTRAND)) { mm_est_err(mi, qlen_sum, n_regs0, regs0, a, n_mini_pos, mini_pos); + n_regs0 = mm_filter_strand_retained(n_regs0, regs0); + } if (n_segs == 1) { // uni-segment regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a); @@ -509,7 +511,7 @@ static void merge_hits(step_t *s) mm_hit_sort(km, &s->n_reg[k], s->reg[k], opt->alt_drop); mm_set_parent(km, opt->mask_level, opt->mask_len, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop); if (!(opt->flag & MM_F_ALL_CHAINS)) { - mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]); + mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, 0, opt->max_gap, &s->n_reg[k], s->reg[k]); mm_set_sam_pri(s->n_reg[k], s->reg[k]); } mm_set_mapq(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & MM_F_SR)); diff --git a/minimap.h b/minimap.h index 3846c07..5a0ff2f 100644 --- a/minimap.h +++ b/minimap.h @@ -108,7 +108,7 @@ typedef struct { int32_t mlen, blen; // seeded exact match length; seeded alignment block length int32_t n_sub; // number of suboptimal mappings int32_t score0; // initial chaining score (before chain merging/spliting) - uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, split_inv:1, is_alt:1, dummy:6; + uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, split_inv:1, is_alt:1, strand_retained:1, dummy:5; uint32_t hash; float div; mm_extra_t *p; diff --git a/mmpriv.h b/mmpriv.h index a2b5a80..e765b47 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -91,8 +91,9 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a); int mm_set_sam_pri(int n, mm_reg1_t *r); void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level, float alt_diff_frac); -void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r); +void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r); void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r); +int mm_filter_strand_retained(int n_regs, mm_reg1_t *r); void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs); void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac); void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);