added idx_stat

This commit is contained in:
Heng Li 2017-04-26 22:52:28 +08:00
parent de367a340c
commit 7b7fabef4d
4 changed files with 23 additions and 7 deletions

20
index.c
View File

@ -58,6 +58,26 @@ const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n)
}
}
void mm_idx_stat(const mm_idx_t *mi)
{
int i, n = 0;
uint64_t sum = 0, len = 0;
for (i = 0; i < mi->n_seq; ++i)
len += mi->seq[i].len;
for (i = 0; i < 1<<mi->b; ++i)
if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h);
for (i = 0; i < 1<<mi->b; ++i) {
idxhash_t *h = (idxhash_t*)mi->B[i].h;
khint_t k;
if (h == 0) continue;
for (k = 0; k < kh_end(h); ++k)
if (kh_exist(h, k))
sum += kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k);
}
fprintf(stderr, "[M::%s::%.3f*%.2f] distinct minimizers: %d; average occurrences: %.3lf; average spacing: %.3lf\n",
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), n, (double)sum / n, (double)len / sum);
}
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f)
{
int i;

7
main.c
View File

@ -34,7 +34,7 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime();
mm_mapopt_init(&opt);
while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lRPST:L:Dx:H")) >= 0) {
while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lST:L:Dx:H")) >= 0) {
if (c == 'w') w = atoi(optarg);
else if (c == 'k') k = atoi(optarg);
else if (c == 'b') b = atoi(optarg);
@ -48,8 +48,6 @@ int main(int argc, char *argv[])
else if (c == 'v') mm_verbose = atoi(optarg);
else if (c == 'g') opt.max_gap = atoi(optarg);
else if (c == 'N') keep_name = 0;
else if (c == 'R') opt.flag |= MM_F_WITH_REP;
else if (c == 'P') opt.flag &= ~MM_F_WITH_REP;
else if (c == 'D') opt.flag |= MM_F_NO_SELF;
else if (c == 'O') opt.flag |= MM_F_NO_ISO;
else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
@ -98,8 +96,6 @@ int main(int argc, char *argv[])
// fprintf(stderr, " -D skip self mappings but keep dual mappings\n"); // too confusing to expose to end users
fprintf(stderr, " -S skip self and dual mappings\n");
fprintf(stderr, " -O drop isolated hits before chaining (EXPERIMENTAL)\n");
fprintf(stderr, " -P filtering potential repeats after mapping (EXPERIMENTAL)\n");
// fprintf(stderr, " -R skip post-mapping repeat filtering\n"); // deprecated option for backward compatibility
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " Input/Output:\n");
@ -130,6 +126,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "[M::%s::%.3f*%.2f] dumpped the (partial) index to disk\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
}
if (argc != optind + 1) mm_mapopt_update(&opt, mi);
if (mm_verbose >= 3) mm_idx_stat(mi);
for (i = optind + 1; i < argc; ++i)
mm_map_file(mi, argv[i], &opt, n_threads, mini_batch_size);
mm_idx_destroy(mi);

1
map.c
View File

@ -18,7 +18,6 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->max_gap = 10000;
opt->min_cnt = 4;
opt->min_match = 40;
opt->flag = MM_F_WITH_REP;
}
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)

View File

@ -8,7 +8,6 @@
#define MM_IDX_DEF_B 14
#define MM_DEREP_Q50 5.0
#define MM_F_WITH_REP 0x1
#define MM_F_NO_SELF 0x2
#define MM_F_NO_ISO 0x4
#define MM_F_AVA 0x8
@ -87,6 +86,7 @@ mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc);
void mm_idx_destroy(mm_idx_t *mi);
mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name);
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
void mm_idx_stat(const mm_idx_t *idx);
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);