r846: added --hard-mask-level for #244

This commit is contained in:
Heng Li 2018-09-27 14:46:26 -04:00
parent c57b59f02f
commit 1077b7ddc8
6 changed files with 18 additions and 8 deletions

4
hit.c
View File

@ -106,7 +106,7 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a)
r->split |= 1, r2->split |= 2; r->split |= 1, r2->split |= 2;
} }
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff) // and compute mm_reg1_t::subsc void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level) // and compute mm_reg1_t::subsc
{ {
int i, j, k, *w; int i, j, k, *w;
uint64_t *cov; uint64_t *cov;
@ -118,6 +118,7 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff
for (i = 1, k = 1; i < n; ++i) { for (i = 1, k = 1; i < n; ++i) {
mm_reg1_t *ri = &r[i]; mm_reg1_t *ri = &r[i];
int si = ri->qs, ei = ri->qe, n_cov = 0, uncov_len = 0; int si = ri->qs, ei = ri->qe, n_cov = 0, uncov_len = 0;
if (hard_mask_level) goto skip_uncov;
for (j = 0; j < k; ++j) { // traverse existing primary hits to find overlapping hits for (j = 0; j < k; ++j) { // traverse existing primary hits to find overlapping hits
mm_reg1_t *rp = &r[w[j]]; mm_reg1_t *rp = &r[w[j]];
int sj = rp->qs, ej = rp->qe; int sj = rp->qs, ej = rp->qe;
@ -137,6 +138,7 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff
} }
if (ei > x) uncov_len += ei - x; if (ei > x) uncov_len += ei - x;
} }
skip_uncov:
for (j = 0; j < k; ++j) { // traverse existing primary hits again for (j = 0; j < k; ++j) { // traverse existing primary hits again
mm_reg1_t *rp = &r[w[j]]; mm_reg1_t *rp = &r[w[j]];
int sj = rp->qs, ej = rp->qe, min, max, ol; int sj = rp->qs, ej = rp->qe, min, max, ol;

4
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "ketopt.h" #include "ketopt.h"
#define MM_VERSION "2.12-r845-dirty" #define MM_VERSION "2.12-r846-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>
@ -59,6 +59,7 @@ static ko_longopt_t long_options[] = {
{ "paf-no-hit", ko_no_argument, 333 }, { "paf-no-hit", ko_no_argument, 333 },
{ "split-prefix", ko_required_argument, 334 }, { "split-prefix", ko_required_argument, 334 },
{ "no-end-flt", ko_no_argument, 335 }, { "no-end-flt", ko_no_argument, 335 },
{ "hard-mask-level",ko_no_argument, 336 },
{ "help", ko_no_argument, 'h' }, { "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' }, { "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' }, { "version", ko_no_argument, 'V' },
@ -188,6 +189,7 @@ int main(int argc, char *argv[])
else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
else if (c == 334) opt.split_prefix = o.arg; // --split-prefix else if (c == 334) opt.split_prefix = o.arg; // --split-prefix
else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
else if (c == 314) { // --frag else if (c == 314) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1); yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
} else if (c == 315) { // --secondary } else if (c == 315) { // --secondary

8
map.c
View File

@ -248,7 +248,7 @@ static mm128_t *collect_seed_hits(void *km, const mm_mapopt_t *opt, int max_occ,
static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a) static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
{ {
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s) if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b); mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs); else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains
@ -261,7 +261,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
if (!(opt->flag & MM_F_CIGAR)) return regs; if (!(opt->flag & MM_F_CIGAR)) return regs;
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s) if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b); mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
mm_set_sam_pri(*n_regs, regs); mm_set_sam_pri(*n_regs, regs);
} }
@ -359,7 +359,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
seg = mm_seg_gen(b->km, hash, n_segs, qlens, n_regs0, regs0, n_regs, regs, a); // split fragment chain to separate segment chains seg = mm_seg_gen(b->km, hash, n_segs, qlens, n_regs0, regs0, n_regs, regs, a); // split fragment chain to separate segment chains
free(regs0); free(regs0);
for (i = 0; i < n_segs; ++i) { for (i = 0; i < n_segs; ++i) {
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b); // update mm_reg1_t::parent mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL); // update mm_reg1_t::parent
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a); regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
mm_set_mapq(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr); mm_set_mapq(b->km, n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
} }
@ -502,7 +502,7 @@ static void merge_hits(step_t *s)
} }
} }
mm_hit_sort(km, &s->n_reg[k], s->reg[k]); mm_hit_sort(km, &s->n_reg[k], s->reg[k]);
mm_set_parent(km, opt->mask_level, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b); mm_set_parent(km, opt->mask_level, s->n_reg[k], s->reg[k], opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL);
if (!(opt->flag & MM_F_ALL_CHAINS)) { if (!(opt->flag & MM_F_ALL_CHAINS)) {
mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]); mm_select_sub(km, opt->pri_ratio, s->p->mi->k*2, opt->best_n, &s->n_reg[k], s->reg[k]);
mm_set_sam_pri(s->n_reg[k], s->reg[k]); mm_set_sam_pri(s->n_reg[k], s->reg[k]);

View File

@ -34,6 +34,7 @@
#define MM_F_EQX 0x4000000 // use =/X instead of M #define MM_F_EQX 0x4000000 // use =/X instead of M
#define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF #define MM_F_PAF_NO_HIT 0x8000000 // output unmapped reads to PAF
#define MM_F_NO_END_FLT 0x10000000 #define MM_F_NO_END_FLT 0x10000000
#define MM_F_HARD_MLEVEL 0x20000000
#define MM_I_HPC 0x1 #define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2 #define MM_I_NO_SEQ 0x2

View File

@ -193,7 +193,7 @@ Primarily used for all-vs-all read overlapping.
.BI -p \ FLOAT .BI -p \ FLOAT
Minimal secondary-to-primary score ratio to output secondary mappings [0.8]. Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
Between two chains overlaping over half of the shorter chain (controlled by Between two chains overlaping over half of the shorter chain (controlled by
.BR --mask-level ), .BR -M ),
the chain with a lower score is secondary to the chain with a higher score. the chain with a lower score is secondary to the chain with a higher score.
If the ratio of the scores is below If the ratio of the scores is below
.IR FLOAT , .IR FLOAT ,
@ -226,6 +226,11 @@ Mark as secondary a chain that overlaps with a better chain by
.I FLOAT .I FLOAT
or more of the shorter chain [0.5] or more of the shorter chain [0.5]
.TP .TP
.B --hard-mask-level
Honor option
.B -M
and disable a heurstic to save unmapped subsequences.
.TP
.BI --max-chain-skip \ INT .BI --max-chain-skip \ INT
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
for chaining. The time complexity is quadratic in the number of seeds. This for chaining. The time complexity is quadratic in the number of seeds. This

View File

@ -75,7 +75,7 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a); int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a);
int mm_set_sam_pri(int n, mm_reg1_t *r); int mm_set_sam_pri(int n, mm_reg1_t *r);
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff); void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level);
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r); void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r); void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs); void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);