r518: increased the default -K to 500M

This helps multi-thread performance for ultra-long reads.
This commit is contained in:
Heng Li 2017-10-17 13:21:29 -04:00
parent 25ffd72690
commit 04cf4ebf5e
3 changed files with 10 additions and 17 deletions

8
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.2-r517-dirty"
#define MM_VERSION "2.2-r518-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -231,7 +231,7 @@ int main(int argc, char *argv[])
fprintf(fp_help, " -c output CIGAR in PAF\n");
fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
fprintf(fp_help, " -K NUM minibatch size for mapping [200M]\n");
fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n");
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(fp_help, " --version show version number\n");
fprintf(fp_help, " Preset:\n");
@ -240,8 +240,8 @@ int main(int argc, char *argv[])
fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n");
fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n");
fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n");
fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");

4
map.c
View File

@ -36,7 +36,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->end_bonus = -1;
opt->min_dp_max = opt->min_chain_score * opt->a;
opt->min_ksw_len = 200;
opt->mini_batch_size = 200000000;
opt->mini_batch_size = 500000000;
opt->pe_ori = 0; // FF
opt->pe_bonus = 33;
@ -61,12 +61,10 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
io->is_hpc = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
mo->mini_batch_size = 500000000;
} else if (strcmp(preset, "ava-pb") == 0) {
io->is_hpc = 1, io->k = 19, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
mo->mini_batch_size = 500000000;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->is_hpc = 1, io->k = 19;
} else if (strcmp(preset, "map-ont") == 0) {

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "20 September 2017" "minimap2-2.2 (r420)" "Bioinformatics tools"
.TH minimap2 1 "17 October 2017" "minimap2-2.2-dirty (r518)" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
@ -263,18 +263,13 @@ thread may become the bottleneck. Apply this option to use one thread for input
and another thread for output, at the cost of increased peak RAM.
.TP
.BI -K \ NUM
Number of bases loaded into memory to process in a mini-batch [200M].
Number of bases loaded into memory to process in a mini-batch [500M].
Similar to option
.BR -I ,
K/M/G/k/m/g suffix is accepted. A large
.I NUM
helps load balancing in the multi-threading mode, at the cost of increased
memory. Preset
.B ava-pb
and
.B ava-ont
use
.BR -K500m .
memory.
.TP
.B --version
Print version number to stdout
@ -320,13 +315,13 @@ Up to 10% sequence divergence.
.B ava-pb
PacBio all-vs-all overlap mapping
.RB ( -Hk19
.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.BR 25 ).
.TP
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping
.RB ( -k15
.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.BR 25 ).
Similarly, the major difference from
.B ava-pb