r1108: fixed missing inversions for #816 and #806

This commit is contained in:
Heng Li 2021-10-04 16:34:30 -04:00
parent aefa2c0d86
commit 39bdd45875
5 changed files with 27 additions and 8 deletions

18
hit.c
View File

@ -252,7 +252,7 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,
mm_set_sam_pri(n_regs, regs);
}
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r)
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r)
{
if (pri_ratio > 0.0f && *n_ > 0) {
int i, k, n = *n_, n_2nd = 0;
@ -264,6 +264,9 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_,
if (!(r[i].qs == r[p].qs && r[i].qe == r[p].qe && r[i].rid == r[p].rid && r[i].rs == r[p].rs && r[i].re == r[p].re)) // not identical hits
r[k++] = r[i], ++n_2nd;
else if (r[i].p) free(r[i].p);
} else if (check_strand && n_2nd < best_n && r[i].score > min_strand_sc && r[i].rev != r[p].rev) {
r[i].strand_retained = 1;
r[k++] = r[i], ++n_2nd;
} else if (r[i].p) free(r[i].p);
}
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
@ -271,6 +274,19 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_,
}
}
int mm_filter_strand_retained(int n_regs, mm_reg1_t *r)
{
int i, k;
for (i = k = 0; i < n_regs; ++i) {
int p = r[i].parent;
if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f) {
if (k < i) r[k++] = r[i];
else ++k;
}
}
return k;
}
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs)
{ // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out
int i, k;

2
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.22-r1105-dirty"
#define MM_VERSION "2.22-r1108-dirty"
#ifdef __linux__
#include <sys/resource.h>

10
map.c
View File

@ -212,7 +212,7 @@ static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_i
{
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, opt->mask_len, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, 1, opt->max_gap, n_regs, regs);
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
}
}
@ -223,7 +223,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, opt->mask_len, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, 0, opt->max_gap, n_regs, regs);
mm_set_sam_pri(*n_regs, regs);
}
return regs;
@ -335,8 +335,10 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
if (!is_sr && !(opt->flag&MM_F_QSTRAND))
if (!is_sr && !(opt->flag&MM_F_QSTRAND)) {
mm_est_err(mi, qlen_sum, n_regs0, regs0, a, n_mini_pos, mini_pos);
n_regs0 = mm_filter_strand_retained(n_regs0, regs0);
}
if (n_segs == 1) { // uni-segment
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
@ -509,7 +511,7 @@ static void merge_hits(step_t *s)
mm_hit_sort(km, &s->n_reg[k], s->reg[k], opt->alt_drop);
mm_set_parent(km, opt->mask_level, opt->mask_len, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop);
if (!(opt->flag & MM_F_ALL_CHAINS)) {
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]);
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, 0, opt->max_gap, &s->n_reg[k], s->reg[k]);
mm_set_sam_pri(s->n_reg[k], s->reg[k]);
}
mm_set_mapq(km, s->n_reg[k], s->reg[k], opt->min_chain_score, opt->a, rep_len, !!(opt->flag & MM_F_SR));

View File

@ -108,7 +108,7 @@ typedef struct {
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
int32_t n_sub; // number of suboptimal mappings
int32_t score0; // initial chaining score (before chain merging/spliting)
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, split_inv:1, is_alt:1, dummy:6;
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, split_inv:1, is_alt:1, strand_retained:1, dummy:5;
uint32_t hash;
float div;
mm_extra_t *p;

View File

@ -91,8 +91,9 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a);
int mm_set_sam_pri(int n, mm_reg1_t *r);
void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level, float alt_diff_frac);
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r);
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
int mm_filter_strand_retained(int n_regs, mm_reg1_t *r);
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac);
void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);