diff --git a/hit.c b/hit.c index 63b6da5..cb740fb 100644 --- a/hit.c +++ b/hit.c @@ -197,10 +197,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re mm_reg1_t *r = ®s[i]; int flt = 0; if (r->cnt < opt->min_cnt) flt = 1; - else { - int blen = r->qe - r->qs < r->re - r->rs? r->qe - r->qs : r->re - r->rs; - if (r->score < blen * opt->min_seedcov_ratio * opt->a) flt = 1; - } if (r->p) { if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1; else if (r->p->dp_max < opt->min_dp_max) flt = 1; diff --git a/main.c b/main.c index ff0476b..69c656d 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r175-pre" +#define MM_VERSION "2.0-r176-pre" void liftrlimit() { @@ -43,7 +43,7 @@ static int test_idx(const char *fn) static struct option long_options[] = { { "bucket-bits", required_argument, 0, 0 }, - { "mb-size", required_argument, 0, 0 }, + { "mb-size", required_argument, 0, 'K' }, { "int-rname", no_argument, 0, 0 }, { "no-kalloc", no_argument, 0, 0 }, { "print-qname", no_argument, 0, 0 }, @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) mm_realtime0 = realtime(); mm_mapopt_init(&opt); - while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:Q", long_options, &long_idx)) >= 0) { + while ((c = getopt_long(argc, argv, "aw:k:K:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Q", long_options, &long_idx)) >= 0) { if (c == 'w') w = atoi(optarg); else if (c == 'k') k = atoi(optarg); else if (c == 'H') is_hpc = 1; @@ -85,7 +85,6 @@ int main(int argc, char *argv[]) else if (c == 'g') opt.max_gap = atoi(optarg); else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg); - else if (c == 'D') opt.min_seedcov_ratio = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'c') opt.flag |= MM_F_CIGAR; else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; @@ -114,7 +113,7 @@ int main(int argc, char *argv[]) } else if (c == 'E') { opt.e = opt.e2 = strtol(optarg, &s, 10); if (*s == ',') opt.e2 = strtol(s + 1, &s, 10); - } else if (c == 'I' || (c == 0 && long_idx == 1)) { + } else if (c == 'I' || c == 'K') { double x; char *p; x = strtod(optarg, &p); @@ -126,11 +125,13 @@ int main(int argc, char *argv[]) } else if (c == 'x') { if (strcmp(optarg, "ava-ont") == 0) { opt.flag |= MM_F_AVA | MM_F_NO_SELF; - opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f, opt.max_gap = 10000, opt.max_chain_skip = 25; + opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25; + minibatch_size = 500000000; k = 15, w = 5; } else if (strcmp(optarg, "ava-pb") == 0) { opt.flag |= MM_F_AVA | MM_F_NO_SELF; - opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f, opt.max_gap = 10000, opt.max_chain_skip = 25; + opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25; + minibatch_size = 500000000; is_hpc = 1, k = 19, w = 5; } else if (strcmp(optarg, "map10k") == 0) { is_hpc = 1, k = 19; @@ -163,10 +164,9 @@ int main(int argc, char *argv[]) fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n); - fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio); fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); - fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -D.05 -g10000 --max-chain-skip 25 (PacBio read overlap)\n"); - fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -D.05 -g10000 --max-chain-skip 25 (ONT read overlap)\n"); + fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n"); + fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n"); fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n"); fprintf(stderr, " Alignment:\n"); @@ -181,6 +181,7 @@ int main(int argc, char *argv[]) fprintf(stderr, " -a output in the SAM format (PAF by default)\n"); fprintf(stderr, " -c output CIGAR in PAF\n"); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); + fprintf(stderr, " -K NUM minibatch size [200M]\n"); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); fprintf(stderr, " -V show version number\n"); fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); diff --git a/map.c b/map.c index 3ddac1f..845aa6d 100644 --- a/map.c +++ b/map.c @@ -20,7 +20,6 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->bw = 500; opt->max_gap = 5000; opt->max_chain_skip = 25; - opt->min_seedcov_ratio = 0.0f; opt->mask_level = 0.5f; opt->pri_ratio = 0.8f; diff --git a/minimap.h b/minimap.h index a5ad58d..6c68c62 100644 --- a/minimap.h +++ b/minimap.h @@ -77,7 +77,6 @@ typedef struct { int max_chain_skip; int min_cnt; int min_chain_score; - float min_seedcov_ratio; float mask_level; float pri_ratio; diff --git a/minimap2.1 b/minimap2.1 index 4612b9d..a65bfa5 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -164,12 +164,6 @@ secondary alignments [5]. This option has no effect when .B -X is applied. .TP -.BI -D \ FLOAT -Discard a chain if the fraction of matching bases over the length of -query/target sequences in the chain is -.RI < FLOAT -[0]. -.TP .BI -x \ STR Preset []. This option applies multiple options at the same time. It should be applied before other options because options applied later will overwrite the @@ -181,10 +175,10 @@ are: .RS .TP 8 .B ava-pb -PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -D.05 -g10000) +PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25) .TP 8 .B ava-ont -Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -D.05 -g10000) +Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25) .TP .B map10k PacBio/Oxford Nanopore read to reference mapping (-Hk19) @@ -249,14 +243,22 @@ sequences, and uses up to threads when mapping (the extra thread is for I/O, which is frequently idle and takes little CPU time). .TP -.B -V -Print version number to stdout -.TP -.BI --mb-size \ STR +.BI -K \ NUM Number of bases loaded into memory to process in a mini-batch [200M]. Similar to option .BR -I , -K/M/G/k/m/g suffix is accepted. This option affects both indexing and mapping. +K/M/G/k/m/g suffix is accepted. A large +.I NUM +helps load balancing in the multi-threading mode, at the cost of increased +memory. Preset +.B ava-pb +and +.B ava-ont +use +.BR -K500m . +.TP +.B -V +Print version number to stdout .SS Miscellaneous options .TP 10 .B --no-kalloc