From aefa2c0d8670d88153d264c2ad4a3e7dcc82247a Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 1 Oct 2021 16:58:03 -0400 Subject: [PATCH] added --chain-skip-scale --- main.c | 2 ++ map.c | 11 +++++++---- minimap.h | 1 + options.c | 1 + python/cmappy.pxd | 1 + 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/main.c b/main.c index 84d3129..e60a6a4 100644 --- a/main.c +++ b/main.c @@ -75,6 +75,7 @@ static ko_longopt_t long_options[] = { { "qstrand", ko_no_argument, 348 }, { "cap-kalloc", ko_required_argument, 349 }, { "q-occ-frac", ko_required_argument, 350 }, + { "chain-skip-scale",ko_required_argument,351 }, { "help", ko_no_argument, 'h' }, { "max-intron-len", ko_required_argument, 'G' }, { "version", ko_no_argument, 'V' }, @@ -225,6 +226,7 @@ int main(int argc, char *argv[]) else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale + else if (c == 351) opt.chain_skip_scale = atof(o.arg); // --chain-skip-scale else if (c == 344) alt_list = o.arg; // --alt else if (c == 345) opt.alt_drop = atof(o.arg); // --alt-drop else if (c == 346) opt.mask_len = mm_parse_num(o.arg); // --mask-len diff --git a/map.c b/map.c index 7da9a49..eb541ca 100644 --- a/map.c +++ b/map.c @@ -240,6 +240,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** mm128_v mv = {0,0,0}; mm_reg1_t *regs0; km_stat_t kmst; + float chn_pen_gap, chn_pen_skip; for (i = 0, qlen_sum = 0; i < n_segs; ++i) qlen_sum += qlens[i], n_regs[i] = 0, regs[i] = 0; @@ -274,12 +275,14 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap; } else max_chain_gap_ref = opt->max_gap; + chn_pen_gap = opt->chain_gap_scale * 0.01 * mi->k; + chn_pen_skip = opt->chain_skip_scale * 0.01 * mi->k; if (opt->flag & MM_F_RMQ) { a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score, - opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km); + chn_pen_gap, chn_pen_skip, n_a, a, &n_regs0, &u, b->km); } else { a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, - opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); + chn_pen_gap, chn_pen_skip, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); } if (opt->bw_long > opt->bw && (opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN)) == 0 && n_segs == 1 && n_regs0 > 1) { // re-chain/long-join for long sequences @@ -290,7 +293,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** kfree(b->km, u); radix_sort_128x(a, a + n_a); a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw_long, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score, - opt->chain_gap_scale * 0.01 * mi->k, 0.0f, n_a, a, &n_regs0, &u, b->km); + chn_pen_gap, chn_pen_skip, n_a, a, &n_regs0, &u, b->km); } } else if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) { // re-chain, mostly for short reads int rechain = 0; @@ -313,7 +316,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** if (opt->flag & MM_F_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos); else a = collect_seed_hits(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos); a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, - opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); + chn_pen_gap, chn_pen_skip, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km); } } b->frag_gap = max_chain_gap_ref; diff --git a/minimap.h b/minimap.h index 2204b6c..3846c07 100644 --- a/minimap.h +++ b/minimap.h @@ -135,6 +135,7 @@ typedef struct { int min_cnt; // min number of minimizers on each chain int min_chain_score; // min chaining score float chain_gap_scale; + float chain_skip_scale; int rmq_size_cap, rmq_inner_dist; int rmq_rescue_size; float rmq_rescue_ratio; diff --git a/options.c b/options.c index e62515d..d4ef242 100644 --- a/options.c +++ b/options.c @@ -33,6 +33,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->rmq_rescue_size = 1000; opt->rmq_rescue_ratio = 0.1f; opt->chain_gap_scale = 0.8f; + opt->chain_skip_scale = 0.04f; opt->max_max_occ = 4095; opt->occ_dist = 500; diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 5eedb90..c208c4c 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -23,6 +23,7 @@ cdef extern from "minimap.h": int min_cnt int min_chain_score float chain_gap_scale + float chain_skip_scale int rmq_size_cap, rmq_inner_dist int rmq_rescue_size float rmq_rescue_ratio