r518: increased the default -K to 500M

This helps multi-thread performance for ultra-long reads.
This commit is contained in:
Heng Li 2017-10-17 13:21:29 -04:00
parent 25ffd72690
commit 04cf4ebf5e
3 changed files with 10 additions and 17 deletions

8
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "getopt.h" #include "getopt.h"
#define MM_VERSION "2.2-r517-dirty" #define MM_VERSION "2.2-r518-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>
@ -231,7 +231,7 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -c output CIGAR in PAF\n"); fprintf(fp_help, " -c output CIGAR in PAF\n");
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n"); fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads); fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
fprintf(fp_help, " -K NUM minibatch size for mapping [200M]\n"); fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(fp_help, " --version show version number\n"); fprintf(fp_help, " --version show version number\n");
fprintf(fp_help, " Preset:\n"); fprintf(fp_help, " Preset:\n");
@ -240,8 +240,8 @@ int main(int argc, char *argv[])
fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n"); fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n");
fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n"); fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n"); fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n"); fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n"); fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n"); fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n");
fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n"); fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n");
fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");

4
map.c
View File

@ -36,7 +36,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->end_bonus = -1; opt->end_bonus = -1;
opt->min_dp_max = opt->min_chain_score * opt->a; opt->min_dp_max = opt->min_chain_score * opt->a;
opt->min_ksw_len = 200; opt->min_ksw_len = 200;
opt->mini_batch_size = 200000000; opt->mini_batch_size = 500000000;
opt->pe_ori = 0; // FF opt->pe_ori = 0; // FF
opt->pe_bonus = 33; opt->pe_bonus = 33;
@ -61,12 +61,10 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
io->is_hpc = 0, io->k = 15, io->w = 5; io->is_hpc = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
mo->mini_batch_size = 500000000;
} else if (strcmp(preset, "ava-pb") == 0) { } else if (strcmp(preset, "ava-pb") == 0) {
io->is_hpc = 1, io->k = 19, io->w = 5; io->is_hpc = 1, io->k = 19, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
mo->mini_batch_size = 500000000;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) { } else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->is_hpc = 1, io->k = 19; io->is_hpc = 1, io->k = 19;
} else if (strcmp(preset, "map-ont") == 0) { } else if (strcmp(preset, "map-ont") == 0) {

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "20 September 2017" "minimap2-2.2 (r420)" "Bioinformatics tools" .TH minimap2 1 "17 October 2017" "minimap2-2.2-dirty (r518)" "Bioinformatics tools"
.SH NAME .SH NAME
.PP .PP
minimap2 - mapping and alignment between collections of DNA sequences minimap2 - mapping and alignment between collections of DNA sequences
@ -263,18 +263,13 @@ thread may become the bottleneck. Apply this option to use one thread for input
and another thread for output, at the cost of increased peak RAM. and another thread for output, at the cost of increased peak RAM.
.TP .TP
.BI -K \ NUM .BI -K \ NUM
Number of bases loaded into memory to process in a mini-batch [200M]. Number of bases loaded into memory to process in a mini-batch [500M].
Similar to option Similar to option
.BR -I , .BR -I ,
K/M/G/k/m/g suffix is accepted. A large K/M/G/k/m/g suffix is accepted. A large
.I NUM .I NUM
helps load balancing in the multi-threading mode, at the cost of increased helps load balancing in the multi-threading mode, at the cost of increased
memory. Preset memory.
.B ava-pb
and
.B ava-ont
use
.BR -K500m .
.TP .TP
.B --version .B --version
Print version number to stdout Print version number to stdout
@ -320,13 +315,13 @@ Up to 10% sequence divergence.
.B ava-pb .B ava-pb
PacBio all-vs-all overlap mapping PacBio all-vs-all overlap mapping
.RB ( -Hk19 .RB ( -Hk19
.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip .B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.BR 25 ). .BR 25 ).
.TP .TP
.B ava-ont .B ava-ont
Oxford Nanopore all-vs-all overlap mapping Oxford Nanopore all-vs-all overlap mapping
.RB ( -k15 .RB ( -k15
.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip .B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.BR 25 ). .BR 25 ).
Similarly, the major difference from Similarly, the major difference from
.B ava-pb .B ava-pb