r508: use two I/O threads for mapping

-x sr applies this option by default
This commit is contained in:
Heng Li 2017-10-12 14:56:01 -04:00
parent 2801ed9b4b
commit 7c555f9b7e
4 changed files with 15 additions and 6 deletions

5
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.2-r507-dirty"
#define MM_VERSION "2.2-r508-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -65,7 +65,7 @@ static inline int64_t mm_parse_num(const char *str)
int main(int argc, char *argv[])
{
const char *opt_str = "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:";
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:";
mm_mapopt_t opt;
mm_idxopt_t ipt;
int i, c, n_threads = 3, long_idx, max_gap_ref = 0;
@ -118,6 +118,7 @@ int main(int argc, char *argv[])
else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(optarg);
else if (c == 'R') rg = optarg;
else if (c == 'h') fp_help = stdout;
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
else if (c == 0 && long_idx == 0) ipt.bucket_bits = atoi(optarg); // --bucket-bits
else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc

7
map.c
View File

@ -83,7 +83,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mo->best_n = 50;
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
io->is_hpc = 0, io->k = 21, io->w = 11;
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND;
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS;
mo->pe_ori = 0<<1|1; // FR
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1;
mo->zdrop = 100;
@ -500,7 +500,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads)
{
int i, j;
int i, j, pl_threads;
pipeline_t pl;
if (n_segs < 1) return -1;
memset(&pl, 0, sizeof(pipeline_t));
@ -520,7 +520,8 @@ int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_
pl.opt = opt, pl.mi = idx;
pl.n_threads = n_threads > 1? n_threads : 1;
pl.mini_batch_size = opt->mini_batch_size;
kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3);
pl_threads = n_threads == 1? 1 : (opt->flag&MM_F_2_IO_THREADS)? 3 : 2;
kt_pipeline(pl_threads, worker_pipeline, &pl, 3);
free(pl.str.s);
for (i = 0; i < n_segs; ++i)
mm_bseq_close(pl.fp[i]);

View File

@ -20,6 +20,7 @@
#define MM_F_SR 0x1000
#define MM_F_FRAG_MODE 0x2000
#define MM_F_NO_PRINT_2ND 0x4000
#define MM_F_2_IO_THREADS 0x8000
#define MM_IDX_MAGIC "MMI\2"

View File

@ -256,6 +256,12 @@ sequences, and uses up to
threads when mapping (the extra thread is for I/O, which is frequently idle and
takes little CPU time).
.TP
.B -2
Use two I/O threads during mapping. By default, minimap2 uses one I/O thread.
When I/O is slow (e.g. piping to gzip, or reading from a slow pipe), the I/O
thread may become the bottleneck. Apply this option to use one thread for input
and another thread for output, at the cost of increased peak RAM.
.TP
.BI -K \ NUM
Number of bases loaded into memory to process in a mini-batch [200M].
Similar to option
@ -343,7 +349,7 @@ tag ignores introns to demote hits to pseudogenes.
Short single-end reads without splicing
.RB ( -k21
.B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200
.B -K50m --frag
.B -2K50m --frag
.BR --sr ).
.RE
.SS Miscellaneous options