r965: added --chain-gap-scale for #540
This commit is contained in:
parent
cdb7857841
commit
040f74102c
12
chain.c
12
chain.c
|
|
@ -19,7 +19,7 @@ static inline int ilog2_32(uint32_t v)
|
|||
return (t = v>>8) ? 8 + LogTable256[t] : LogTable256[v];
|
||||
}
|
||||
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale, int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
||||
{ // TODO: make sure this works when n has more than 32 bits
|
||||
int32_t k, *f, *p, *t, *v, n_u, n_v;
|
||||
int64_t i, j, st = 0;
|
||||
|
|
@ -52,7 +52,7 @@ mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int m
|
|||
if (i - st > max_iter) st = i - max_iter;
|
||||
for (j = i - 1; j >= st; --j) {
|
||||
int64_t dr = ri - a[j].x;
|
||||
int32_t dq = qi - (int32_t)a[j].y, dd, sc, log_dd;
|
||||
int32_t dq = qi - (int32_t)a[j].y, dd, sc, log_dd, gap_cost;
|
||||
int32_t sidj = (a[j].y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT;
|
||||
if ((sidi == sidj && dr == 0) || dq <= 0) continue; // don't skip if an anchor is used by multiple segments; see below
|
||||
if ((sidi == sidj && dq > max_dist_y) || dq > max_dist_x) continue;
|
||||
|
|
@ -62,14 +62,16 @@ mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int m
|
|||
min_d = dq < dr? dq : dr;
|
||||
sc = min_d > q_span? q_span : dq < dr? dq : dr;
|
||||
log_dd = dd? ilog2_32(dd) : 0;
|
||||
gap_cost = 0;
|
||||
if (is_cdna || sidi != sidj) {
|
||||
int c_log, c_lin;
|
||||
c_lin = (int)(dd * .01 * avg_qspan);
|
||||
c_log = log_dd;
|
||||
if (sidi != sidj && dr == 0) ++sc; // possibly due to overlapping paired ends; give a minor bonus
|
||||
else if (dr > dq || sidi != sidj) sc -= c_lin < c_log? c_lin : c_log;
|
||||
else sc -= c_lin + (c_log>>1);
|
||||
} else sc -= (int)(dd * .01 * avg_qspan) + (log_dd>>1);
|
||||
else if (dr > dq || sidi != sidj) gap_cost = c_lin < c_log? c_lin : c_log;
|
||||
else gap_cost = c_lin + (c_log>>1);
|
||||
} else gap_cost = (int)(dd * .01 * avg_qspan) + (log_dd>>1);
|
||||
sc -= (int)((double)gap_cost * gap_scale + .499);
|
||||
sc += f[j];
|
||||
if (sc > max_f) {
|
||||
max_f = sc, max_j = j;
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -7,7 +7,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "ketopt.h"
|
||||
|
||||
#define MM_VERSION "2.17-r963-dirty"
|
||||
#define MM_VERSION "2.17-r965-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -67,6 +67,7 @@ static ko_longopt_t long_options[] = {
|
|||
{ "junc-bed", ko_required_argument, 340 },
|
||||
{ "junc-bonus", ko_required_argument, 341 },
|
||||
{ "sam-hit-only", ko_no_argument, 342 },
|
||||
{ "chain-gap-scale",ko_required_argument, 343 },
|
||||
{ "help", ko_no_argument, 'h' },
|
||||
{ "max-intron-len", ko_required_argument, 'G' },
|
||||
{ "version", ko_no_argument, 'V' },
|
||||
|
|
@ -211,6 +212,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 340) junc_bed = o.arg; // --junc-bed
|
||||
else if (c == 341) opt.junc_bonus = atoi(o.arg); // --junc-bonus
|
||||
else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
|
||||
else if (c == 343) opt.chain_gap_scale = atof(o.arg); // --chain-gap-scale
|
||||
else if (c == 314) { // --frag
|
||||
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
|
||||
} else if (c == 315) { // --secondary
|
||||
|
|
|
|||
4
map.c
4
map.c
|
|
@ -313,7 +313,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap;
|
||||
} else max_chain_gap_ref = opt->max_gap;
|
||||
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
|
||||
if (opt->max_occ > opt->mid_occ && rep_len > 0) {
|
||||
int rechain = 0;
|
||||
|
|
@ -335,7 +335,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
kfree(b->km, mini_pos);
|
||||
if (opt->flag & MM_F_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||
else a = collect_seed_hits(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
}
|
||||
}
|
||||
b->frag_gap = max_chain_gap_ref;
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ typedef struct {
|
|||
int max_chain_skip, max_chain_iter;
|
||||
int min_cnt; // min number of minimizers on each chain
|
||||
int min_chain_score; // min chaining score
|
||||
float chain_gap_scale;
|
||||
|
||||
float mask_level;
|
||||
float pri_ratio;
|
||||
|
|
|
|||
|
|
@ -245,6 +245,9 @@ Check up to
|
|||
partial chains during chaining [5000]. This is a heuristic to avoid quadratic
|
||||
time complexity in the worst case.
|
||||
.TP
|
||||
.BI --chain-gap-scale \ FLOAT
|
||||
Scale of gap cost during chaining [1.0]
|
||||
.TP
|
||||
.B --no-long-join
|
||||
Disable the long gap patching heuristic. When this option is applied, the
|
||||
maximum alignment gap is mostly controlled by
|
||||
|
|
|
|||
2
mmpriv.h
2
mmpriv.h
|
|
@ -69,7 +69,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|||
void mm_idxopt_init(mm_idxopt_t *opt);
|
||||
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
||||
int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale, int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
||||
|
||||
mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a);
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->max_gap_ref = -1;
|
||||
opt->max_chain_skip = 25;
|
||||
opt->max_chain_iter = 5000;
|
||||
opt->chain_gap_scale = 1.0f;
|
||||
|
||||
opt->mask_level = 0.5f;
|
||||
opt->pri_ratio = 0.8f;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ cdef extern from "minimap.h":
|
|||
int max_chain_skip, max_chain_iter
|
||||
int min_cnt
|
||||
int min_chain_score
|
||||
float chain_gap_scale
|
||||
float mask_level
|
||||
float pri_ratio
|
||||
int best_n
|
||||
|
|
|
|||
Loading…
Reference in New Issue