r190: default -k to 15; added -x map-ont

This commit is contained in:
Heng Li 2017-07-19 10:11:14 -04:00
parent 470021fd27
commit 4aff301ef4
2 changed files with 60 additions and 40 deletions

22
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r189-dirty"
#define MM_VERSION "2.0-r190-dirty"
void liftrlimit()
{
@ -63,7 +63,7 @@ static struct option long_options[] = {
int main(int argc, char *argv[])
{
mm_mapopt_t opt;
int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
int minibatch_size = 200000000;
uint64_t batch_size = 4000000000ULL;
mm_bseq_file_t *fp = 0;
@ -135,8 +135,10 @@ int main(int argc, char *argv[])
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
minibatch_size = 500000000;
is_hpc = 1, k = 19, w = 5;
} else if (strcmp(optarg, "map10k") == 0) {
} else if (strcmp(optarg, "map10k") == 0 || strcmp(optarg, "map-pb") == 0) {
is_hpc = 1, k = 19;
} else if (strcmp(optarg, "map-ont") == 0) {
is_hpc = 0, k = 15;
} else if (strcmp(optarg, "asm5") == 0) {
k = 19, w = 19;
opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200;
@ -172,12 +174,6 @@ int main(int argc, char *argv[])
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(stderr, " Alignment:\n");
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
@ -193,6 +189,14 @@ int main(int argc, char *argv[])
fprintf(stderr, " -K NUM minibatch size [200M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(stderr, " -V show version number\n");
fprintf(stderr, " Preset:\n");
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " map10k/map-pb: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " map-ont: -k15 (slightly more sensitive than 'map10k' for ONT vs reference)\n");
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
return 1;
}

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "18 July 2017" "minimap2-2.0-r180-dirty" "Bioinformatics tools"
.TH minimap2 1 "19 July 2017" "minimap2-2.0-r190-dirty" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
@ -74,7 +74,7 @@ SAM format.
.SS Indexing options
.TP 10
.BI -k \ INT
Minimizer k-mer length [17]
Minimizer k-mer length [15]
.TP
.BI -w \ INT
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
@ -164,35 +164,6 @@ secondary alignments [5]. This option has no effect when
.B -X
is applied.
.TP
.BI -x \ STR
Preset []. This option applies multiple options at the same time. It should be
applied before other options because options applied later will overwrite the
values set by
.BR -x .
Available
.I STR
are:
.RS
.TP 8
.B ava-pb
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP 8
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP
.B map10k
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
.TP
.B asm5
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
.TP
.B asm10
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
to 10% sequence divergence.
.RE
.TP
.BI --max-chain-skip \ INT
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
for chaining. The time complexity is quadratic in the number of seeds. This
@ -265,6 +236,51 @@ use
.TP
.B -V
Print version number to stdout
.SS Preset options
.TP 10
.BI -x \ STR
Preset []. This option applies multiple options at the same time. It should be
applied before other options because options applied later will overwrite the
values set by
.BR -x .
Available
.I STR
are:
.RS
.TP 8
.B map-pb
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
.TP
.B map10k
The same as
.B map-pb
(-Hk19)
.TP
.B map-ont
Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For
PacBio reads, HPC minimizers consistently leads to faster performance and more
sensitive results in comparison to normal minimizers. For Oxford Nanopore data,
normal minimizers are better, though not much. The effectiveness of HPC is
determined by the sequencing error mode.
.TP
.B asm5
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
.TP
.B asm10
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
to 10% sequence divergence.
.TP 8
.B ava-pb
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP 8
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000
--max-chain-skip 25). Similarly, the major difference from
.B ava-pb
is that this preset is not using HPC minimizers.
.RE
.SS Miscellaneous options
.TP 10
.B --no-kalloc