r741: added --min-occ-floor to improve #107
This commit is contained in:
parent
ad1beaf255
commit
bdc615c1d4
4
main.c
4
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.9-r720"
|
||||
#define MM_VERSION "2.9-r741-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -51,6 +51,7 @@ static struct option long_options[] = {
|
|||
{ "all-chain", no_argument, 0, 'P' },
|
||||
{ "dual", required_argument, 0, 0 }, // 26
|
||||
{ "max-clip-ratio", required_argument, 0, 0 }, // 27
|
||||
{ "min-occ-floor", required_argument, 0, 0 }, // 28
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
|
|
@ -164,6 +165,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only
|
||||
else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only
|
||||
else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio
|
||||
else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor
|
||||
else if (c == 0 && long_idx == 14) { // --frag
|
||||
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
|
||||
} else if (c == 0 && long_idx == 15) { // --secondary
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ typedef struct {
|
|||
int pe_ori, pe_bonus;
|
||||
|
||||
float mid_occ_frac; // only used by mm_mapopt_update(); see below
|
||||
int32_t min_mid_occ;
|
||||
int32_t mid_occ; // ignore seeds with occurrences above this threshold
|
||||
int32_t max_occ;
|
||||
int mini_batch_size; // size of a batch of query bases to process in parallel
|
||||
|
|
|
|||
23
minimap2.1
23
minimap2.1
|
|
@ -217,6 +217,14 @@ on chains. Set
|
|||
.I INT
|
||||
to a large number to switch off this heurstics.
|
||||
.TP
|
||||
.BI --min-occ-floor \ INT
|
||||
Force minimap2 to always use k-mers occurring
|
||||
.I INT
|
||||
times or less [0]. In effect, the max occurrence threshold is set to
|
||||
the
|
||||
.RI max{ INT ,
|
||||
.BR -f }.
|
||||
.TP
|
||||
.B --no-long-join
|
||||
Disable the long gap patching heuristic. When this option is applied, the
|
||||
maximum alignment gap is mostly controlled by
|
||||
|
|
@ -433,18 +441,25 @@ is determined by the sequencing error mode.
|
|||
.B asm5
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200
|
||||
.BR -z200 ).
|
||||
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200
|
||||
.BR --min-occ-floor=100 ).
|
||||
Typically, the alignment will not extend to regions with 5% or higher sequence
|
||||
divergence. Only use this preset if the average divergence is far below 5%.
|
||||
.TP
|
||||
.B asm10
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200
|
||||
.BR -z200 ).
|
||||
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200
|
||||
.BR --min-occ-floor=100 ).
|
||||
Up to 10% sequence divergence.
|
||||
.TP
|
||||
.B asm20
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w10 -A1 -B6 -O6,26 -E2,1 -s200 -z200
|
||||
.BR --min-occ-floor=100 ).
|
||||
Up to 20% sequence divergence.
|
||||
.TP
|
||||
.B ava-pb
|
||||
PacBio all-vs-all overlap mapping
|
||||
.RB ( -Hk19
|
||||
|
|
|
|||
10
options.c
10
options.c
|
|
@ -51,6 +51,8 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
|||
opt->flag |= MM_F_SPLICE;
|
||||
if (opt->mid_occ <= 0)
|
||||
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
||||
if (opt->mid_occ < opt->min_mid_occ)
|
||||
opt->mid_occ = opt->min_mid_occ;
|
||||
if (mm_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
||||
}
|
||||
|
|
@ -81,11 +83,19 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|||
} else if (strcmp(preset, "asm5") == 0) {
|
||||
io->flag = 0, io->k = 19, io->w = 19;
|
||||
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
|
||||
mo->min_mid_occ = 100;
|
||||
mo->min_dp_max = 200;
|
||||
mo->best_n = 50;
|
||||
} else if (strcmp(preset, "asm10") == 0) {
|
||||
io->flag = 0, io->k = 19, io->w = 19;
|
||||
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
|
||||
mo->min_mid_occ = 100;
|
||||
mo->min_dp_max = 200;
|
||||
mo->best_n = 50;
|
||||
} else if (strcmp(preset, "asm20") == 0) {
|
||||
io->flag = 0, io->k = 19, io->w = 10;
|
||||
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
|
||||
mo->min_mid_occ = 100;
|
||||
mo->min_dp_max = 200;
|
||||
mo->best_n = 50;
|
||||
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ cdef extern from "minimap.h":
|
|||
float max_clip_ratio
|
||||
int pe_ori, pe_bonus
|
||||
float mid_occ_frac
|
||||
int32_t min_mid_occ
|
||||
int32_t mid_occ
|
||||
int32_t max_occ
|
||||
int mini_batch_size
|
||||
|
|
|
|||
Loading…
Reference in New Issue