From 7c555f9b7e5d9b25c6d3b4268ff3be461bf3a9e9 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 12 Oct 2017 14:56:01 -0400 Subject: [PATCH] r508: use two I/O threads for mapping -x sr applies this option by default --- main.c | 5 +++-- map.c | 7 ++++--- minimap.h | 1 + minimap2.1 | 8 +++++++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/main.c b/main.c index 3951f15..3ffe6e8 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r507-dirty" +#define MM_VERSION "2.2-r508-dirty" #ifdef __linux__ #include @@ -65,7 +65,7 @@ static inline int64_t mm_parse_num(const char *str) int main(int argc, char *argv[]) { - const char *opt_str = "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:"; + const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:"; mm_mapopt_t opt; mm_idxopt_t ipt; int i, c, n_threads = 3, long_idx, max_gap_ref = 0; @@ -118,6 +118,7 @@ int main(int argc, char *argv[]) else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(optarg); else if (c == 'R') rg = optarg; else if (c == 'h') fp_help = stdout; + else if (c == '2') opt.flag |= MM_F_2_IO_THREADS; else if (c == 0 && long_idx == 0) ipt.bucket_bits = atoi(optarg); // --bucket-bits else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc diff --git a/map.c b/map.c index ac66e81..27c5818 100644 --- a/map.c +++ b/map.c @@ -83,7 +83,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->best_n = 50; } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { io->is_hpc = 0, io->k = 21, io->w = 11; - mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND; + mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS; mo->pe_ori = 0<<1|1; // FR mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1; mo->zdrop = 100; @@ -500,7 +500,7 @@ static void *worker_pipeline(void *shared, int step, void *in) int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads) { - int i, j; + int i, j, pl_threads; pipeline_t pl; if (n_segs < 1) return -1; memset(&pl, 0, sizeof(pipeline_t)); @@ -520,7 +520,8 @@ int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_ pl.opt = opt, pl.mi = idx; pl.n_threads = n_threads > 1? n_threads : 1; pl.mini_batch_size = opt->mini_batch_size; - kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3); + pl_threads = n_threads == 1? 1 : (opt->flag&MM_F_2_IO_THREADS)? 3 : 2; + kt_pipeline(pl_threads, worker_pipeline, &pl, 3); free(pl.str.s); for (i = 0; i < n_segs; ++i) mm_bseq_close(pl.fp[i]); diff --git a/minimap.h b/minimap.h index 281bf83..9bef3e5 100644 --- a/minimap.h +++ b/minimap.h @@ -20,6 +20,7 @@ #define MM_F_SR 0x1000 #define MM_F_FRAG_MODE 0x2000 #define MM_F_NO_PRINT_2ND 0x4000 +#define MM_F_2_IO_THREADS 0x8000 #define MM_IDX_MAGIC "MMI\2" diff --git a/minimap2.1 b/minimap2.1 index 0d2081b..2a96c91 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -256,6 +256,12 @@ sequences, and uses up to threads when mapping (the extra thread is for I/O, which is frequently idle and takes little CPU time). .TP +.B -2 +Use two I/O threads during mapping. By default, minimap2 uses one I/O thread. +When I/O is slow (e.g. piping to gzip, or reading from a slow pipe), the I/O +thread may become the bottleneck. Apply this option to use one thread for input +and another thread for output, at the cost of increased peak RAM. +.TP .BI -K \ NUM Number of bases loaded into memory to process in a mini-batch [200M]. Similar to option @@ -343,7 +349,7 @@ tag ignores introns to demote hits to pseudogenes. Short single-end reads without splicing .RB ( -k21 .B -w11 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200 -.B -K50m --frag +.B -2K50m --frag .BR --sr ). .RE .SS Miscellaneous options