r174: make max-chain-skip work
The max-chain-skip heuristics did not work due to a bug. Without this heuristics, chaining is too slow for long-read overlap.
This commit is contained in:
parent
33451aba45
commit
eeeb2ffb68
14
chain.c
14
chain.c
|
|
@ -42,11 +42,6 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
|||
int64_t dr = ri - a[j].x;
|
||||
int32_t dq = qi - (int32_t)a[j].y, dd, sc;
|
||||
if (dr == 0 || dq <= 0 || dq > max_dist) continue;
|
||||
if (t[j] == i) { // FIXME: never enter this block. i.e. max_skip has no effect at all. This may affect speed.
|
||||
if (p[j] >= 0) t[p[j]] = i;
|
||||
if (++n_skip > max_skip) break;
|
||||
continue;
|
||||
}
|
||||
dd = dr > dq? dr - dq : dq - dr;
|
||||
if (dd > bw) continue;
|
||||
min_d = dq < dr? dq : dr;
|
||||
|
|
@ -54,7 +49,14 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
|
|||
sc -= dd? ilog2_32(dd) * 2 : 0;
|
||||
if (min_d > q_span) sc -= ilog2_32(min_d) / 2;
|
||||
sc += f[j];
|
||||
if (sc > max_f) max_f = sc, max_j = j;
|
||||
if (sc > max_f) {
|
||||
max_f = sc, max_j = j;
|
||||
if (n_skip > 0) --n_skip;
|
||||
} else if (t[j] == i) {
|
||||
if (++n_skip > max_skip)
|
||||
break;
|
||||
}
|
||||
if (p[j] >= 0) t[p[j]] = i;
|
||||
}
|
||||
if (max_j >= 0) f[i] = max_f, p[i] = max_j;
|
||||
else f[i] = q_span, p[i] = -1;
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r173-pre"
|
||||
#define MM_VERSION "2.0-r174-pre"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -49,6 +49,7 @@ static struct option long_options[] = {
|
|||
{ "print-qname", no_argument, 0, 0 },
|
||||
{ "no-self", no_argument, 0, 0 },
|
||||
{ "print-seed", no_argument, 0, 0 },
|
||||
{ "max-chain-skip", required_argument, 0, 0 },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
{ "min-count", required_argument, 0, 'n' },
|
||||
{ "min-chain-score",required_argument, 0, 'm' },
|
||||
|
|
@ -103,6 +104,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
|
||||
else if (c == 0 && long_idx == 5) opt.flag |= MM_F_NO_SELF; // --no-self
|
||||
else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED; // --print-seed
|
||||
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
|
||||
else if (c == 'V') {
|
||||
puts(MM_VERSION);
|
||||
return 0;
|
||||
|
|
|
|||
2
map.c
2
map.c
|
|
@ -19,7 +19,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->min_chain_score = 40;
|
||||
opt->bw = 500;
|
||||
opt->max_gap = 5000;
|
||||
opt->max_chain_skip = 15;
|
||||
opt->max_chain_skip = 50;
|
||||
opt->min_seedcov_ratio = 0.0f;
|
||||
|
||||
opt->mask_level = 0.5f;
|
||||
|
|
|
|||
19
minimap2.1
19
minimap2.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH minimap2 1 "11 July 2017" "minimap2-2.0-r172-pre" "Bioinformatics tools"
|
||||
.TH minimap2 1 "12 July 2017" "minimap2-2.0-r174-pre" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
minimap2 - mapping and alignment between collections of DNA sequences
|
||||
|
|
@ -192,6 +192,14 @@ PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
|||
.B asm1m
|
||||
Long assembly to reference mapping (-k19 -w19)
|
||||
.RE
|
||||
.TP
|
||||
.BI --max-chain-skip \ INT
|
||||
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
|
||||
for chaining. The time complexity is quadratic in the number of seeds. This
|
||||
option makes minimap2 exits the inner loop if it repeatedly sees seeds already
|
||||
on chains. Set
|
||||
.I INT
|
||||
to a large number to switch off this heurstics.
|
||||
.SS Alignment options
|
||||
.TP 10
|
||||
.BI -A \ INT
|
||||
|
|
@ -243,6 +251,12 @@ takes little CPU time).
|
|||
.TP
|
||||
.B -V
|
||||
Print version number to stdout
|
||||
.TP
|
||||
.BI --mb-size \ STR
|
||||
Number of bases loaded into memory to process in a mini-batch [200M].
|
||||
Similar to option
|
||||
.BR -I ,
|
||||
K/M/G/k/m/g suffix is accepted. This option affects both indexing and mapping.
|
||||
.SS Miscellaneous options
|
||||
.TP 10
|
||||
.B --no-kalloc
|
||||
|
|
@ -253,6 +267,9 @@ multi-threading mode.
|
|||
.TP
|
||||
.B --print-qname
|
||||
Print query names to stderr, mostly to see which query is crashing minimap2.
|
||||
.TP
|
||||
.B --print-seed
|
||||
Print seed positions to stderr, for debugging only.
|
||||
.SH OUTPUT FORMAT
|
||||
.PP
|
||||
Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by
|
||||
|
|
|
|||
Loading…
Reference in New Issue