From 3c91d652dd774a08fc8a0eaa5b4305ad8649bb8f Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 13 Sep 2017 11:37:00 -0400 Subject: [PATCH] r360: allow to set integer max occ --- index.c | 4 ++-- main.c | 10 +++++++--- map.c | 6 ++++-- minimap.h | 4 +--- mmpriv.h | 2 ++ 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/index.c b/index.c index 2123ab5..70f82d6 100644 --- a/index.c +++ b/index.c @@ -104,13 +104,13 @@ int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, ui return en - st; } -uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) +int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) { int i; size_t n = 0; uint32_t thres; khint_t *a, k; - if (f <= 0.) return UINT32_MAX; + if (f <= 0.) return INT32_MAX; for (i = 0; i < 1<b; ++i) if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h); a = (uint32_t*)malloc(n * 4); diff --git a/main.c b/main.c index d0ac576..da019b2 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.1.1-r347-dirty" +#define MM_VERSION "2.1.1-r360-dirty" #ifdef __linux__ #include @@ -78,7 +78,6 @@ int main(int argc, char *argv[]) else if (c == 'H') is_hpc = 1, idx_par_set = 1; else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I else if (c == 'r') opt.bw = (int)mm_parse_num(optarg); - else if (c == 'f') opt.mid_occ_frac = atof(optarg); else if (c == 't') n_threads = atoi(optarg); else if (c == 'v') mm_verbose = atoi(optarg); else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg); @@ -117,6 +116,11 @@ int main(int argc, char *argv[]) else if (c == 'V') { puts(MM_VERSION); return 0; + } else if (c == 'f') { + double x; + x = atof(optarg); + if (x < 1.0) opt.mid_occ_frac = x, opt.mid_occ = 0; + else opt.mid_occ = (int)(x + .499); } else if (c == 'u') { if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; else if (*optarg == 'B') opt.flag |= MM_F_SPLICE_BOTH; @@ -168,7 +172,7 @@ int main(int argc, char *argv[]) opt.min_dp_max = 40; opt.best_n = 20; opt.bw = 50; - opt.mid_occ_frac = 2e-5f; + opt.mid_occ = 1000; } else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) { k = 15, w = 5; opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV; diff --git a/map.c b/map.c index bb11c31..5510de6 100644 --- a/map.c +++ b/map.c @@ -40,8 +40,10 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) { if (opt->flag & MM_F_SPLICE_BOTH) opt->flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); - opt->max_occ = mm_idx_cal_max_occ(mi, opt->max_occ_frac); - opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); + if (opt->max_occ <= 0) + opt->max_occ = mm_idx_cal_max_occ(mi, opt->max_occ_frac); + if (opt->mid_occ <= 0) + opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); if (mm_verbose >= 3) fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d; max_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ, opt->max_occ); diff --git a/minimap.h b/minimap.h index def9640..e0a28fd 100644 --- a/minimap.h +++ b/minimap.h @@ -105,8 +105,7 @@ typedef struct { int min_dp_max; int min_ksw_len; - int max_occ; - int mid_occ; + int32_t max_occ, mid_occ; } mm_mapopt_t; extern int mm_verbose, mm_dbg_flag; @@ -127,7 +126,6 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc); void mm_idx_destroy(mm_idx_t *mi); mm_idx_t *mm_idx_gen(struct mm_bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name); -uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f); void mm_idx_stat(const mm_idx_t *idx); const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n); int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq); diff --git a/mmpriv.h b/mmpriv.h index 3c40c28..3f87686 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -44,6 +44,8 @@ void mm_write_sam_SQ(const mm_idx_t *idx); void mm_write_sam_hdr_no_SQ(const char *rg, const char *ver, int argc, char *argv[]); void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag); void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs); + +int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f); int mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int min_cnt, int min_sc, int is_cdna, int64_t n, mm128_t *a, uint64_t **_u, void *km); mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);