r176: removed seedcov_ratio; changed default opt

min_seedcov_ratio is not used
This commit is contained in:
Heng Li 2017-07-12 12:47:46 -04:00
parent 52caf79395
commit b4280d186f
5 changed files with 26 additions and 29 deletions

4
hit.c
View File

@ -197,10 +197,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
mm_reg1_t *r = &regs[i]; mm_reg1_t *r = &regs[i];
int flt = 0; int flt = 0;
if (r->cnt < opt->min_cnt) flt = 1; if (r->cnt < opt->min_cnt) flt = 1;
else {
int blen = r->qe - r->qs < r->re - r->rs? r->qe - r->qs : r->re - r->rs;
if (r->score < blen * opt->min_seedcov_ratio * opt->a) flt = 1;
}
if (r->p) { if (r->p) {
if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1; if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1;
else if (r->p->dp_max < opt->min_dp_max) flt = 1; else if (r->p->dp_max < opt->min_dp_max) flt = 1;

21
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#define MM_VERSION "2.0-r175-pre" #define MM_VERSION "2.0-r176-pre"
void liftrlimit() void liftrlimit()
{ {
@ -43,7 +43,7 @@ static int test_idx(const char *fn)
static struct option long_options[] = { static struct option long_options[] = {
{ "bucket-bits", required_argument, 0, 0 }, { "bucket-bits", required_argument, 0, 0 },
{ "mb-size", required_argument, 0, 0 }, { "mb-size", required_argument, 0, 'K' },
{ "int-rname", no_argument, 0, 0 }, { "int-rname", no_argument, 0, 0 },
{ "no-kalloc", no_argument, 0, 0 }, { "no-kalloc", no_argument, 0, 0 },
{ "print-qname", no_argument, 0, 0 }, { "print-qname", no_argument, 0, 0 },
@ -73,7 +73,7 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime(); mm_realtime0 = realtime();
mm_mapopt_init(&opt); mm_mapopt_init(&opt);
while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:Q", long_options, &long_idx)) >= 0) { while ((c = getopt_long(argc, argv, "aw:k:K:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Q", long_options, &long_idx)) >= 0) {
if (c == 'w') w = atoi(optarg); if (c == 'w') w = atoi(optarg);
else if (c == 'k') k = atoi(optarg); else if (c == 'k') k = atoi(optarg);
else if (c == 'H') is_hpc = 1; else if (c == 'H') is_hpc = 1;
@ -85,7 +85,6 @@ int main(int argc, char *argv[])
else if (c == 'g') opt.max_gap = atoi(optarg); else if (c == 'g') opt.max_gap = atoi(optarg);
else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'N') opt.best_n = atoi(optarg);
else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'D') opt.min_seedcov_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'c') opt.flag |= MM_F_CIGAR; else if (c == 'c') opt.flag |= MM_F_CIGAR;
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
@ -114,7 +113,7 @@ int main(int argc, char *argv[])
} else if (c == 'E') { } else if (c == 'E') {
opt.e = opt.e2 = strtol(optarg, &s, 10); opt.e = opt.e2 = strtol(optarg, &s, 10);
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10); if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
} else if (c == 'I' || (c == 0 && long_idx == 1)) { } else if (c == 'I' || c == 'K') {
double x; double x;
char *p; char *p;
x = strtod(optarg, &p); x = strtod(optarg, &p);
@ -126,11 +125,13 @@ int main(int argc, char *argv[])
} else if (c == 'x') { } else if (c == 'x') {
if (strcmp(optarg, "ava-ont") == 0) { if (strcmp(optarg, "ava-ont") == 0) {
opt.flag |= MM_F_AVA | MM_F_NO_SELF; opt.flag |= MM_F_AVA | MM_F_NO_SELF;
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f, opt.max_gap = 10000, opt.max_chain_skip = 25; opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
minibatch_size = 500000000;
k = 15, w = 5; k = 15, w = 5;
} else if (strcmp(optarg, "ava-pb") == 0) { } else if (strcmp(optarg, "ava-pb") == 0) {
opt.flag |= MM_F_AVA | MM_F_NO_SELF; opt.flag |= MM_F_AVA | MM_F_NO_SELF;
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f, opt.max_gap = 10000, opt.max_chain_skip = 25; opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
minibatch_size = 500000000;
is_hpc = 1, k = 19, w = 5; is_hpc = 1, k = 19, w = 5;
} else if (strcmp(optarg, "map10k") == 0) { } else if (strcmp(optarg, "map10k") == 0) {
is_hpc = 1, k = 19; is_hpc = 1, k = 19;
@ -163,10 +164,9 @@ int main(int argc, char *argv[])
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n); fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -D.05 -g10000 --max-chain-skip 25 (PacBio read overlap)\n"); fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -D.05 -g10000 --max-chain-skip 25 (ONT read overlap)\n"); fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n"); fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
fprintf(stderr, " Alignment:\n"); fprintf(stderr, " Alignment:\n");
@ -181,6 +181,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -a output in the SAM format (PAF by default)\n"); fprintf(stderr, " -a output in the SAM format (PAF by default)\n");
fprintf(stderr, " -c output CIGAR in PAF\n"); fprintf(stderr, " -c output CIGAR in PAF\n");
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
fprintf(stderr, " -K NUM minibatch size [200M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(stderr, " -V show version number\n"); fprintf(stderr, " -V show version number\n");
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");

1
map.c
View File

@ -20,7 +20,6 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->bw = 500; opt->bw = 500;
opt->max_gap = 5000; opt->max_gap = 5000;
opt->max_chain_skip = 25; opt->max_chain_skip = 25;
opt->min_seedcov_ratio = 0.0f;
opt->mask_level = 0.5f; opt->mask_level = 0.5f;
opt->pri_ratio = 0.8f; opt->pri_ratio = 0.8f;

View File

@ -77,7 +77,6 @@ typedef struct {
int max_chain_skip; int max_chain_skip;
int min_cnt; int min_cnt;
int min_chain_score; int min_chain_score;
float min_seedcov_ratio;
float mask_level; float mask_level;
float pri_ratio; float pri_ratio;

View File

@ -164,12 +164,6 @@ secondary alignments [5]. This option has no effect when
.B -X .B -X
is applied. is applied.
.TP .TP
.BI -D \ FLOAT
Discard a chain if the fraction of matching bases over the length of
query/target sequences in the chain is
.RI < FLOAT
[0].
.TP
.BI -x \ STR .BI -x \ STR
Preset []. This option applies multiple options at the same time. It should be Preset []. This option applies multiple options at the same time. It should be
applied before other options because options applied later will overwrite the applied before other options because options applied later will overwrite the
@ -181,10 +175,10 @@ are:
.RS .RS
.TP 8 .TP 8
.B ava-pb .B ava-pb
PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -D.05 -g10000) PacBio all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP 8 .TP 8
.B ava-ont .B ava-ont
Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -D.05 -g10000) Oxford Nanopore all-vs-all overlap mapping (-k15 -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip 25)
.TP .TP
.B map10k .B map10k
PacBio/Oxford Nanopore read to reference mapping (-Hk19) PacBio/Oxford Nanopore read to reference mapping (-Hk19)
@ -249,14 +243,22 @@ sequences, and uses up to
threads when mapping (the extra thread is for I/O, which is frequently idle and threads when mapping (the extra thread is for I/O, which is frequently idle and
takes little CPU time). takes little CPU time).
.TP .TP
.B -V .BI -K \ NUM
Print version number to stdout
.TP
.BI --mb-size \ STR
Number of bases loaded into memory to process in a mini-batch [200M]. Number of bases loaded into memory to process in a mini-batch [200M].
Similar to option Similar to option
.BR -I , .BR -I ,
K/M/G/k/m/g suffix is accepted. This option affects both indexing and mapping. K/M/G/k/m/g suffix is accepted. A large
.I NUM
helps load balancing in the multi-threading mode, at the cost of increased
memory. Preset
.B ava-pb
and
.B ava-ont
use
.BR -K500m .
.TP
.B -V
Print version number to stdout
.SS Miscellaneous options .SS Miscellaneous options
.TP 10 .TP 10
.B --no-kalloc .B --no-kalloc