diff --git a/main.c b/main.c index 16d5d58..6fad2c1 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r189-dirty" +#define MM_VERSION "2.0-r190-dirty" void liftrlimit() { @@ -63,7 +63,7 @@ static struct option long_options[] = { int main(int argc, char *argv[]) { mm_mapopt_t opt; - int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx; + int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx; int minibatch_size = 200000000; uint64_t batch_size = 4000000000ULL; mm_bseq_file_t *fp = 0; @@ -135,8 +135,10 @@ int main(int argc, char *argv[]) opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25; minibatch_size = 500000000; is_hpc = 1, k = 19, w = 5; - } else if (strcmp(optarg, "map10k") == 0) { + } else if (strcmp(optarg, "map10k") == 0 || strcmp(optarg, "map-pb") == 0) { is_hpc = 1, k = 19; + } else if (strcmp(optarg, "map-ont") == 0) { + is_hpc = 0, k = 15; } else if (strcmp(optarg, "asm5") == 0) { k = 19, w = 19; opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200; @@ -172,12 +174,6 @@ int main(int argc, char *argv[]) fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n); - fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); - fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n"); - fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n"); - fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); - fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n"); - fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n"); fprintf(stderr, " Alignment:\n"); fprintf(stderr, " -A INT matching score [%d]\n", opt.a); fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b); @@ -193,6 +189,14 @@ int main(int argc, char *argv[]) fprintf(stderr, " -K NUM minibatch size [200M]\n"); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); fprintf(stderr, " -V show version number\n"); + fprintf(stderr, " Preset:\n"); + fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); + fprintf(stderr, " map10k/map-pb: -Hk19 (PacBio/ONT vs reference mapping)\n"); + fprintf(stderr, " map-ont: -k15 (slightly more sensitive than 'map10k' for ONT vs reference)\n"); + fprintf(stderr, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n"); + fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n"); + fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n"); + fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n"); fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); return 1; } diff --git a/minimap2.1 b/minimap2.1 index 888ff7a..8d76e49 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -1,4 +1,4 @@ -.TH minimap2 1 "18 July 2017" "minimap2-2.0-r180-dirty" "Bioinformatics tools" +.TH minimap2 1 "19 July 2017" "minimap2-2.0-r190-dirty" "Bioinformatics tools" .SH NAME .PP minimap2 - mapping and alignment between collections of DNA sequences @@ -74,7 +74,7 @@ SAM format. .SS Indexing options .TP 10 .BI -k \ INT -Minimizer k-mer length [17] +Minimizer k-mer length [15] .TP .BI -w \ INT Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer @@ -164,35 +164,6 @@ secondary alignments [5]. This option has no effect when .B -X is applied. .TP -.BI -x \ STR -Preset []. This option applies multiple options at the same time. It should be -applied before other options because options applied later will overwrite the -values set by -.BR -x . -Available -.I STR -are: -.RS -.TP 8 -.B ava-pb -PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25) -.TP 8 -.B ava-ont -Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25) -.TP -.B map10k -PacBio/Oxford Nanopore read to reference mapping (-Hk19) -.TP -.B asm5 -Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200). -Typically, the alignment will not extend to regions with 5% or higher sequence -divergence. Only use this preset if the average divergence is far below 5%. -.TP -.B asm10 -Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up -to 10% sequence divergence. -.RE -.TP .BI --max-chain-skip \ INT A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming for chaining. The time complexity is quadratic in the number of seeds. This @@ -265,6 +236,51 @@ use .TP .B -V Print version number to stdout +.SS Preset options +.TP 10 +.BI -x \ STR +Preset []. This option applies multiple options at the same time. It should be +applied before other options because options applied later will overwrite the +values set by +.BR -x . +Available +.I STR +are: +.RS +.TP 8 +.B map-pb +PacBio/Oxford Nanopore read to reference mapping (-Hk19) +.TP +.B map10k +The same as +.B map-pb +(-Hk19) +.TP +.B map-ont +Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For +PacBio reads, HPC minimizers consistently leads to faster performance and more +sensitive results in comparison to normal minimizers. For Oxford Nanopore data, +normal minimizers are better, though not much. The effectiveness of HPC is +determined by the sequencing error mode. +.TP +.B asm5 +Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200). +Typically, the alignment will not extend to regions with 5% or higher sequence +divergence. Only use this preset if the average divergence is far below 5%. +.TP +.B asm10 +Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200). Up +to 10% sequence divergence. +.TP 8 +.B ava-pb +PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25) +.TP 8 +.B ava-ont +Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 +--max-chain-skip 25). Similarly, the major difference from +.B ava-pb +is that this preset is not using HPC minimizers. +.RE .SS Miscellaneous options .TP 10 .B --no-kalloc