r190: default -k to 15; added -x map-ont
This commit is contained in:
parent
470021fd27
commit
4aff301ef4
22
main.c
22
main.c
|
|
@ -10,7 +10,7 @@
|
||||||
#include "minimap.h"
|
#include "minimap.h"
|
||||||
#include "mmpriv.h"
|
#include "mmpriv.h"
|
||||||
|
|
||||||
#define MM_VERSION "2.0-r189-dirty"
|
#define MM_VERSION "2.0-r190-dirty"
|
||||||
|
|
||||||
void liftrlimit()
|
void liftrlimit()
|
||||||
{
|
{
|
||||||
|
|
@ -63,7 +63,7 @@ static struct option long_options[] = {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
mm_mapopt_t opt;
|
mm_mapopt_t opt;
|
||||||
int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
|
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
|
||||||
int minibatch_size = 200000000;
|
int minibatch_size = 200000000;
|
||||||
uint64_t batch_size = 4000000000ULL;
|
uint64_t batch_size = 4000000000ULL;
|
||||||
mm_bseq_file_t *fp = 0;
|
mm_bseq_file_t *fp = 0;
|
||||||
|
|
@ -135,8 +135,10 @@ int main(int argc, char *argv[])
|
||||||
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
|
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
|
||||||
minibatch_size = 500000000;
|
minibatch_size = 500000000;
|
||||||
is_hpc = 1, k = 19, w = 5;
|
is_hpc = 1, k = 19, w = 5;
|
||||||
} else if (strcmp(optarg, "map10k") == 0) {
|
} else if (strcmp(optarg, "map10k") == 0 || strcmp(optarg, "map-pb") == 0) {
|
||||||
is_hpc = 1, k = 19;
|
is_hpc = 1, k = 19;
|
||||||
|
} else if (strcmp(optarg, "map-ont") == 0) {
|
||||||
|
is_hpc = 0, k = 15;
|
||||||
} else if (strcmp(optarg, "asm5") == 0) {
|
} else if (strcmp(optarg, "asm5") == 0) {
|
||||||
k = 19, w = 19;
|
k = 19, w = 19;
|
||||||
opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200;
|
opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200;
|
||||||
|
|
@ -172,12 +174,6 @@ int main(int argc, char *argv[])
|
||||||
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
|
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
|
||||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||||
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
|
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
|
||||||
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
|
||||||
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
|
|
||||||
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
|
|
||||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
|
||||||
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
|
|
||||||
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
|
|
||||||
fprintf(stderr, " Alignment:\n");
|
fprintf(stderr, " Alignment:\n");
|
||||||
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
|
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
|
||||||
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
|
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
|
||||||
|
|
@ -193,6 +189,14 @@ int main(int argc, char *argv[])
|
||||||
fprintf(stderr, " -K NUM minibatch size [200M]\n");
|
fprintf(stderr, " -K NUM minibatch size [200M]\n");
|
||||||
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
|
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
|
||||||
fprintf(stderr, " -V show version number\n");
|
fprintf(stderr, " -V show version number\n");
|
||||||
|
fprintf(stderr, " Preset:\n");
|
||||||
|
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
||||||
|
fprintf(stderr, " map10k/map-pb: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||||
|
fprintf(stderr, " map-ont: -k15 (slightly more sensitive than 'map10k' for ONT vs reference)\n");
|
||||||
|
fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
|
||||||
|
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
|
||||||
|
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
|
||||||
|
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
|
||||||
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
|
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
78
minimap2.1
78
minimap2.1
|
|
@ -1,4 +1,4 @@
|
||||||
.TH minimap2 1 "18 July 2017" "minimap2-2.0-r180-dirty" "Bioinformatics tools"
|
.TH minimap2 1 "19 July 2017" "minimap2-2.0-r190-dirty" "Bioinformatics tools"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.PP
|
.PP
|
||||||
minimap2 - mapping and alignment between collections of DNA sequences
|
minimap2 - mapping and alignment between collections of DNA sequences
|
||||||
|
|
@ -74,7 +74,7 @@ SAM format.
|
||||||
.SS Indexing options
|
.SS Indexing options
|
||||||
.TP 10
|
.TP 10
|
||||||
.BI -k \ INT
|
.BI -k \ INT
|
||||||
Minimizer k-mer length [17]
|
Minimizer k-mer length [15]
|
||||||
.TP
|
.TP
|
||||||
.BI -w \ INT
|
.BI -w \ INT
|
||||||
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
|
Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
|
||||||
|
|
@ -164,35 +164,6 @@ secondary alignments [5]. This option has no effect when
|
||||||
.B -X
|
.B -X
|
||||||
is applied.
|
is applied.
|
||||||
.TP
|
.TP
|
||||||
.BI -x \ STR
|
|
||||||
Preset []. This option applies multiple options at the same time. It should be
|
|
||||||
applied before other options because options applied later will overwrite the
|
|
||||||
values set by
|
|
||||||
.BR -x .
|
|
||||||
Available
|
|
||||||
.I STR
|
|
||||||
are:
|
|
||||||
.RS
|
|
||||||
.TP 8
|
|
||||||
.B ava-pb
|
|
||||||
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
|
|
||||||
.TP 8
|
|
||||||
.B ava-ont
|
|
||||||
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
|
|
||||||
.TP
|
|
||||||
.B map10k
|
|
||||||
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
|
||||||
.TP
|
|
||||||
.B asm5
|
|
||||||
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
|
|
||||||
Typically, the alignment will not extend to regions with 5% or higher sequence
|
|
||||||
divergence. Only use this preset if the average divergence is far below 5%.
|
|
||||||
.TP
|
|
||||||
.B asm10
|
|
||||||
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
|
|
||||||
to 10% sequence divergence.
|
|
||||||
.RE
|
|
||||||
.TP
|
|
||||||
.BI --max-chain-skip \ INT
|
.BI --max-chain-skip \ INT
|
||||||
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
|
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
|
||||||
for chaining. The time complexity is quadratic in the number of seeds. This
|
for chaining. The time complexity is quadratic in the number of seeds. This
|
||||||
|
|
@ -265,6 +236,51 @@ use
|
||||||
.TP
|
.TP
|
||||||
.B -V
|
.B -V
|
||||||
Print version number to stdout
|
Print version number to stdout
|
||||||
|
.SS Preset options
|
||||||
|
.TP 10
|
||||||
|
.BI -x \ STR
|
||||||
|
Preset []. This option applies multiple options at the same time. It should be
|
||||||
|
applied before other options because options applied later will overwrite the
|
||||||
|
values set by
|
||||||
|
.BR -x .
|
||||||
|
Available
|
||||||
|
.I STR
|
||||||
|
are:
|
||||||
|
.RS
|
||||||
|
.TP 8
|
||||||
|
.B map-pb
|
||||||
|
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
||||||
|
.TP
|
||||||
|
.B map10k
|
||||||
|
The same as
|
||||||
|
.B map-pb
|
||||||
|
(-Hk19)
|
||||||
|
.TP
|
||||||
|
.B map-ont
|
||||||
|
Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For
|
||||||
|
PacBio reads, HPC minimizers consistently leads to faster performance and more
|
||||||
|
sensitive results in comparison to normal minimizers. For Oxford Nanopore data,
|
||||||
|
normal minimizers are better, though not much. The effectiveness of HPC is
|
||||||
|
determined by the sequencing error mode.
|
||||||
|
.TP
|
||||||
|
.B asm5
|
||||||
|
Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200).
|
||||||
|
Typically, the alignment will not extend to regions with 5% or higher sequence
|
||||||
|
divergence. Only use this preset if the average divergence is far below 5%.
|
||||||
|
.TP
|
||||||
|
.B asm10
|
||||||
|
Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up
|
||||||
|
to 10% sequence divergence.
|
||||||
|
.TP 8
|
||||||
|
.B ava-pb
|
||||||
|
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
|
||||||
|
.TP 8
|
||||||
|
.B ava-ont
|
||||||
|
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000
|
||||||
|
--max-chain-skip 25). Similarly, the major difference from
|
||||||
|
.B ava-pb
|
||||||
|
is that this preset is not using HPC minimizers.
|
||||||
|
.RE
|
||||||
.SS Miscellaneous options
|
.SS Miscellaneous options
|
||||||
.TP 10
|
.TP 10
|
||||||
.B --no-kalloc
|
.B --no-kalloc
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue