From 0f5608c4a43d733af3c458220eb862ddd7a072b5 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 1 May 2021 15:41:39 -0400 Subject: [PATCH] r1028: backport minigraph -U --- main.c | 7 +++++-- minimap.h | 2 +- minimap2.1 | 2 +- options.c | 5 ++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index 160801f..5234e86 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.18-r1027-dirty" +#define MM_VERSION "2.18-r1028-dirty" #ifdef __linux__ #include @@ -108,7 +108,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const cha int main(int argc, char *argv[]) { - const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:"; + const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:"; ketopt_t o = KETOPT_INIT; mm_mapopt_t opt; mm_idxopt_t ipt; @@ -248,6 +248,9 @@ int main(int argc, char *argv[]) } else if (c == 'V') { puts(MM_VERSION); return 0; + } else if (c == 'U') { + opt.min_mid_occ = strtol(o.arg, &s, 10); + if (*s == ',') opt.max_mid_occ = strtol(s + 1, &s, 10); } else if (c == 'f') { double x; char *p; diff --git a/minimap.h b/minimap.h index 9b7b4ae..543a0ed 100644 --- a/minimap.h +++ b/minimap.h @@ -146,7 +146,7 @@ typedef struct { int pe_ori, pe_bonus; float mid_occ_frac; // only used by mm_mapopt_update(); see below - int32_t min_mid_occ; + int32_t min_mid_occ, max_mid_occ; int32_t mid_occ; // ignore seeds with occurrences above this threshold int32_t max_occ, max_max_occ, occ_dist; int64_t mini_batch_size; // size of a batch of query bases to process in parallel diff --git a/minimap2.1 b/minimap2.1 index 018d958..66ea41a 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -540,7 +540,7 @@ is determined by the sequencing error mode. PacBio HiFi reads to reference mapping .RB ( -k19 .B -w10 -A1 -B4 -O6,26 -E2,1 -s200 -e100 -g10k -.BR --min-occ-floor=100 ). +.BR -U100,500 ). .TP .B asm5 Long assembly to reference mapping diff --git a/options.c b/options.c index a597962..2044b62 100644 --- a/options.c +++ b/options.c @@ -16,6 +16,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) memset(opt, 0, sizeof(mm_mapopt_t)); opt->seed = 11; opt->mid_occ_frac = 2e-4f; + opt->max_mid_occ = 1000000000; opt->sdust_thres = 0; // no SDUST masking opt->min_cnt = 3; @@ -63,6 +64,8 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); if (opt->mid_occ < opt->min_mid_occ) opt->mid_occ = opt->min_mid_occ; + if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ) + opt->mid_occ = opt->max_mid_occ; if (mm_verbose >= 3) fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ); } @@ -96,7 +99,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1; mo->max_gap = 10000; mo->occ_dist = 500; - mo->min_mid_occ = 100; + mo->min_mid_occ = 100, mo->max_mid_occ = 500; mo->min_dp_max = 200; } else if (strcmp(preset, "asm5") == 0) { io->flag = 0, io->k = 19, io->w = 19;