diff --git a/main.c b/main.c index 9b91e69..0a3eaad 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.9-r720" +#define MM_VERSION "2.9-r741-dirty" #ifdef __linux__ #include @@ -51,6 +51,7 @@ static struct option long_options[] = { { "all-chain", no_argument, 0, 'P' }, { "dual", required_argument, 0, 0 }, // 26 { "max-clip-ratio", required_argument, 0, 0 }, // 27 + { "min-occ-floor", required_argument, 0, 0 }, // 28 { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -164,6 +165,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only else if (c == 0 && long_idx ==27) opt.max_clip_ratio = atof(optarg); // --max-clip-ratio + else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor else if (c == 0 && long_idx == 14) { // --frag yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1); } else if (c == 0 && long_idx == 15) { // --secondary diff --git a/minimap.h b/minimap.h index 4e3a617..ae68bfe 100644 --- a/minimap.h +++ b/minimap.h @@ -126,6 +126,7 @@ typedef struct { int pe_ori, pe_bonus; float mid_occ_frac; // only used by mm_mapopt_update(); see below + int32_t min_mid_occ; int32_t mid_occ; // ignore seeds with occurrences above this threshold int32_t max_occ; int mini_batch_size; // size of a batch of query bases to process in parallel diff --git a/minimap2.1 b/minimap2.1 index 6745ffa..f0c040a 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -217,6 +217,14 @@ on chains. Set .I INT to a large number to switch off this heurstics. .TP +.BI --min-occ-floor \ INT +Force minimap2 to always use k-mers occurring +.I INT +times or less [0]. In effect, the max occurrence threshold is set to +the +.RI max{ INT , +.BR -f }. +.TP .B --no-long-join Disable the long gap patching heuristic. When this option is applied, the maximum alignment gap is mostly controlled by @@ -433,18 +441,25 @@ is determined by the sequencing error mode. .B asm5 Long assembly to reference mapping .RB ( -k19 -.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -.BR -z200 ). +.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 +.BR --min-occ-floor=100 ). Typically, the alignment will not extend to regions with 5% or higher sequence divergence. Only use this preset if the average divergence is far below 5%. .TP .B asm10 Long assembly to reference mapping .RB ( -k19 -.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -.BR -z200 ). +.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 +.BR --min-occ-floor=100 ). Up to 10% sequence divergence. .TP +.B asm20 +Long assembly to reference mapping +.RB ( -k19 +.B -w10 -A1 -B6 -O6,26 -E2,1 -s200 -z200 +.BR --min-occ-floor=100 ). +Up to 20% sequence divergence. +.TP .B ava-pb PacBio all-vs-all overlap mapping .RB ( -Hk19 diff --git a/options.c b/options.c index b98e0eb..e820484 100644 --- a/options.c +++ b/options.c @@ -51,6 +51,8 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) opt->flag |= MM_F_SPLICE; if (opt->mid_occ <= 0) opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); + if (opt->mid_occ < opt->min_mid_occ) + opt->mid_occ = opt->min_mid_occ; if (mm_verbose >= 3) fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ); } @@ -81,11 +83,19 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) } else if (strcmp(preset, "asm5") == 0) { io->flag = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; + mo->min_mid_occ = 100; mo->min_dp_max = 200; mo->best_n = 50; } else if (strcmp(preset, "asm10") == 0) { io->flag = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; + mo->min_mid_occ = 100; + mo->min_dp_max = 200; + mo->best_n = 50; + } else if (strcmp(preset, "asm20") == 0) { + io->flag = 0, io->k = 19, io->w = 10; + mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; + mo->min_mid_occ = 100; mo->min_dp_max = 200; mo->best_n = 50; } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 532f6f5..3735307 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -34,6 +34,7 @@ cdef extern from "minimap.h": float max_clip_ratio int pe_ori, pe_bonus float mid_occ_frac + int32_t min_mid_occ int32_t mid_occ int32_t max_occ int mini_batch_size