diff --git a/index.c b/index.c index 47a62f8..4d7f971 100644 --- a/index.c +++ b/index.c @@ -58,6 +58,26 @@ const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n) } } +void mm_idx_stat(const mm_idx_t *mi) +{ + int i, n = 0; + uint64_t sum = 0, len = 0; + for (i = 0; i < mi->n_seq; ++i) + len += mi->seq[i].len; + for (i = 0; i < 1<b; ++i) + if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h); + for (i = 0; i < 1<b; ++i) { + idxhash_t *h = (idxhash_t*)mi->B[i].h; + khint_t k; + if (h == 0) continue; + for (k = 0; k < kh_end(h); ++k) + if (kh_exist(h, k)) + sum += kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k); + } + fprintf(stderr, "[M::%s::%.3f*%.2f] distinct minimizers: %d; average occurrences: %.3lf; average spacing: %.3lf\n", + __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), n, (double)sum / n, (double)len / sum); +} + uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) { int i; diff --git a/main.c b/main.c index 66dc1e8..4073c75 100644 --- a/main.c +++ b/main.c @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) mm_realtime0 = realtime(); mm_mapopt_init(&opt); - while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lRPST:L:Dx:H")) >= 0) { + while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lST:L:Dx:H")) >= 0) { if (c == 'w') w = atoi(optarg); else if (c == 'k') k = atoi(optarg); else if (c == 'b') b = atoi(optarg); @@ -48,8 +48,6 @@ int main(int argc, char *argv[]) else if (c == 'v') mm_verbose = atoi(optarg); else if (c == 'g') opt.max_gap = atoi(optarg); else if (c == 'N') keep_name = 0; - else if (c == 'R') opt.flag |= MM_F_WITH_REP; - else if (c == 'P') opt.flag &= ~MM_F_WITH_REP; else if (c == 'D') opt.flag |= MM_F_NO_SELF; else if (c == 'O') opt.flag |= MM_F_NO_ISO; else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF; @@ -98,8 +96,6 @@ int main(int argc, char *argv[]) // fprintf(stderr, " -D skip self mappings but keep dual mappings\n"); // too confusing to expose to end users fprintf(stderr, " -S skip self and dual mappings\n"); fprintf(stderr, " -O drop isolated hits before chaining (EXPERIMENTAL)\n"); - fprintf(stderr, " -P filtering potential repeats after mapping (EXPERIMENTAL)\n"); -// fprintf(stderr, " -R skip post-mapping repeat filtering\n"); // deprecated option for backward compatibility fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); fprintf(stderr, " ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n"); fprintf(stderr, " Input/Output:\n"); @@ -130,6 +126,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "[M::%s::%.3f*%.2f] dumpped the (partial) index to disk\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0)); } if (argc != optind + 1) mm_mapopt_update(&opt, mi); + if (mm_verbose >= 3) mm_idx_stat(mi); for (i = optind + 1; i < argc; ++i) mm_map_file(mi, argv[i], &opt, n_threads, mini_batch_size); mm_idx_destroy(mi); diff --git a/map.c b/map.c index 2ab91a6..47e0a4d 100644 --- a/map.c +++ b/map.c @@ -18,7 +18,6 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->max_gap = 10000; opt->min_cnt = 4; opt->min_match = 40; - opt->flag = MM_F_WITH_REP; } void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) diff --git a/minimap.h b/minimap.h index 37139d7..b32e71e 100644 --- a/minimap.h +++ b/minimap.h @@ -8,7 +8,6 @@ #define MM_IDX_DEF_B 14 #define MM_DEREP_Q50 5.0 -#define MM_F_WITH_REP 0x1 #define MM_F_NO_SELF 0x2 #define MM_F_NO_ISO 0x4 #define MM_F_AVA 0x8 @@ -87,6 +86,7 @@ mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc); void mm_idx_destroy(mm_idx_t *mi); mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name); uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f); +void mm_idx_stat(const mm_idx_t *idx); const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n); mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);