r741: added --min-occ-floor to improve #107

This commit is contained in:
Heng Li 2018-03-12 14:32:27 -04:00
parent ad1beaf255
commit bdc615c1d4
5 changed files with 34 additions and 5 deletions

4
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.9-r720"
#define MM_VERSION "2.9-r741-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -51,6 +51,7 @@ static struct option long_options[] = {
{ "all-chain", no_argument, 0, 'P' },
{ "dual", required_argument, 0, 0 }, // 26
{ "max-clip-ratio", required_argument, 0, 0 }, // 27
{ "min-occ-floor", required_argument, 0, 0 }, // 28
{ "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' },
@ -164,6 +165,7 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only
else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only
else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio
else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor
else if (c == 0 && long_idx == 14) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 15) { // --secondary

View File

@ -126,6 +126,7 @@ typedef struct {
int pe_ori, pe_bonus;
float mid_occ_frac; // only used by mm_mapopt_update(); see below
int32_t min_mid_occ;
int32_t mid_occ; // ignore seeds with occurrences above this threshold
int32_t max_occ;
int mini_batch_size; // size of a batch of query bases to process in parallel

View File

@ -217,6 +217,14 @@ on chains. Set
.I INT
to a large number to switch off this heurstics.
.TP
.BI --min-occ-floor \ INT
Force minimap2 to always use k-mers occurring
.I INT
times or less [0]. In effect, the max occurrence threshold is set to
the
.RI max{ INT ,
.BR -f }.
.TP
.B --no-long-join
Disable the long gap patching heuristic. When this option is applied, the
maximum alignment gap is mostly controlled by
@ -433,18 +441,25 @@ is determined by the sequencing error mode.
.B asm5
Long assembly to reference mapping
.RB ( -k19
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200
.BR -z200 ).
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200
.BR --min-occ-floor=100 ).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
.TP
.B asm10
Long assembly to reference mapping
.RB ( -k19
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200
.BR -z200 ).
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200
.BR --min-occ-floor=100 ).
Up to 10% sequence divergence.
.TP
.B asm20
Long assembly to reference mapping
.RB ( -k19
.B -w10 -A1 -B6 -O6,26 -E2,1 -s200 -z200
.BR --min-occ-floor=100 ).
Up to 20% sequence divergence.
.TP
.B ava-pb
PacBio all-vs-all overlap mapping
.RB ( -Hk19

View File

@ -51,6 +51,8 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
opt->flag |= MM_F_SPLICE;
if (opt->mid_occ <= 0)
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
if (opt->mid_occ < opt->min_mid_occ)
opt->mid_occ = opt->min_mid_occ;
if (mm_verbose >= 3)
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
}
@ -81,11 +83,19 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
} else if (strcmp(preset, "asm5") == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->min_dp_max = 200;
mo->best_n = 50;
} else if (strcmp(preset, "asm10") == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->min_dp_max = 200;
mo->best_n = 50;
} else if (strcmp(preset, "asm20") == 0) {
io->flag = 0, io->k = 19, io->w = 10;
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->min_dp_max = 200;
mo->best_n = 50;
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {

View File

@ -34,6 +34,7 @@ cdef extern from "minimap.h":
float max_clip_ratio
int pe_ori, pe_bonus
float mid_occ_frac
int32_t min_mid_occ
int32_t mid_occ
int32_t max_occ
int mini_batch_size