r518: increased the default -K to 500M

This helps multi-thread performance for ultra-long reads.
2017-10-17 13:21:29 -04:00 · 2017-10-17 13:21:29 -04:00 · 04cf4ebf5e
parent 25ffd72690
commit 04cf4ebf5e
3 changed files with 10 additions and 17 deletions
--- a/main.c
+++ b/main.c
@ -6,7 +6,7 @@
 #include "mmpriv.h"
 #include "getopt.h"

-#define MM_VERSION "2.2-r517-dirty"
+#define MM_VERSION "2.2-r518-dirty"

 #ifdef __linux__
 #include <sys/resource.h>
@ -231,7 +231,7 @@ int main(int argc, char *argv[])
 		fprintf(fp_help, "    -c           output CIGAR in PAF\n");
 		fprintf(fp_help, "    --cs[=STR]   output the cs tag; STR is 'short' (if absent) or 'long' [none]\n");
 		fprintf(fp_help, "    -t INT       number of threads [%d]\n", n_threads);
-		fprintf(fp_help, "    -K NUM       minibatch size for mapping [200M]\n");
+		fprintf(fp_help, "    -K NUM       minibatch size for mapping [500M]\n");
 //		fprintf(fp_help, "    -v INT       verbose level [%d]\n", mm_verbose);
 		fprintf(fp_help, "    --version    show version number\n");
 		fprintf(fp_help, "  Preset:\n");
@ -240,8 +240,8 @@ int main(int argc, char *argv[])
 		fprintf(fp_help, "                 map-ont: -k15 (Oxford Nanopore vs reference mapping)\n");
 		fprintf(fp_help, "                 asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
 		fprintf(fp_help, "                 asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
-		fprintf(fp_help, "                 ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
-		fprintf(fp_help, "                 ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
+		fprintf(fp_help, "                 ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
+		fprintf(fp_help, "                 ava-ont: -k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
 		fprintf(fp_help, "                 splice: long-read spliced alignment (see minimap2.1 for details)\n");
 		fprintf(fp_help, "                 sr: short single-end reads without splicing (see minimap2.1 for details)\n");
 		fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
--- a/map.c
+++ b/map.c
@ -36,7 +36,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
 	opt->end_bonus = -1;
 	opt->min_dp_max = opt->min_chain_score * opt->a;
 	opt->min_ksw_len = 200;
-	opt->mini_batch_size = 200000000;
+	opt->mini_batch_size = 500000000;

 	opt->pe_ori = 0; // FF
 	opt->pe_bonus = 33;
@ -61,12 +61,10 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
 		io->is_hpc = 0, io->k = 15, io->w = 5;
 		mo->flag |= MM_F_AVA | MM_F_NO_SELF;
 		mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
-		mo->mini_batch_size = 500000000;
 	} else if (strcmp(preset, "ava-pb") == 0) {
 		io->is_hpc = 1, io->k = 19, io->w = 5;
 		mo->flag |= MM_F_AVA | MM_F_NO_SELF;
 		mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
-		mo->mini_batch_size = 500000000;
 	} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
 		io->is_hpc = 1, io->k = 19;
 	} else if (strcmp(preset, "map-ont") == 0) {
--- a/minimap2.1
+++ b/minimap2.1
@ -1,4 +1,4 @@
-.TH minimap2 1 "20 September 2017" "minimap2-2.2 (r420)" "Bioinformatics tools"
+.TH minimap2 1 "17 October 2017" "minimap2-2.2-dirty (r518)" "Bioinformatics tools"
 .SH NAME
 .PP
 minimap2 - mapping and alignment between collections of DNA sequences
@ -263,18 +263,13 @@ thread may become the bottleneck. Apply this option to use one thread for input
 and another thread for output, at the cost of increased peak RAM.
 .TP
 .BI -K \ NUM
-Number of bases loaded into memory to process in a mini-batch [200M].
+Number of bases loaded into memory to process in a mini-batch [500M].
 Similar to option
 .BR -I ,
 K/M/G/k/m/g suffix is accepted. A large
 .I NUM
 helps load balancing in the multi-threading mode, at the cost of increased
-memory. Preset
-.B ava-pb
-and
-.B ava-ont
-use
-.BR -K500m .
+memory.
 .TP
 .B --version
 Print version number to stdout
@ -320,13 +315,13 @@ Up to 10% sequence divergence.
 .B ava-pb
 PacBio all-vs-all overlap mapping
 .RB ( -Hk19
-.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip
+.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
 .BR 25 ).
 .TP
 .B ava-ont
 Oxford Nanopore all-vs-all overlap mapping
 .RB ( -k15
-.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip
+.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
 .BR 25 ).
 Similarly, the major difference from
 .B ava-pb