diff --git a/main.c b/main.c index 0e95bbc..1dae0ac 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.18-r1025-dirty" +#define MM_VERSION "2.18-r1026-dirty" #ifdef __linux__ #include diff --git a/options.c b/options.c index 7b87f1c..45212dd 100644 --- a/options.c +++ b/options.c @@ -26,7 +26,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->max_chain_skip = 25; opt->max_chain_iter = 5000; opt->chain_gap_scale = 1.0f; - opt->max_max_occ = 5000; + opt->max_max_occ = 4095; opt->occ_dist = 0; opt->mask_level = 0.5f; @@ -91,6 +91,12 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) io->flag |= MM_I_HPC, io->k = 19; } else if (strcmp(preset, "map-ont") == 0) { io->flag = 0, io->k = 15; + } else if (strcmp(preset, "hifi") == 0 || strcmp(preset, "ccs") == 0) { + io->flag = 0, io->k = 19, io->w = 19; + mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1; + mo->occ_dist = 100; + mo->min_mid_occ = 100; + mo->min_dp_max = 200; } else if (strcmp(preset, "asm5") == 0) { io->flag = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; diff --git a/seed.c b/seed.c index 27f7ffe..bd250d9 100644 --- a/seed.c +++ b/seed.c @@ -1,23 +1,28 @@ #include "mmpriv.h" #include "kalloc.h" +#include "ksort.h" -mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv) +mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv, int32_t *n_m_) { mm_seed_t *m; size_t i; + int32_t k; m = (mm_seed_t*)kmalloc(km, mv->n * sizeof(mm_seed_t)); - for (i = 0; i < mv->n; ++i) { + for (i = k = 0; i < mv->n; ++i) { const uint64_t *cr; - mm_seed_t *q = &m[i]; + mm_seed_t *q; mm128_t *p = &mv->a[i]; uint32_t q_pos = (uint32_t)p->y, q_span = p->x & 0xff; int t; cr = mm_idx_get(mi, p->x>>8, &t); + if (t == 0) continue; + q = &m[k++]; q->q_pos = q_pos, q->q_span = q_span, q->cr = cr, q->n = t, q->seg_id = p->y >> 32; q->is_tandem = q->flt = 0; if (i > 0 && p->x>>8 == mv->a[i - 1].x>>8) q->is_tandem = 1; if (i < mv->n - 1 && p->x>>8 == mv->a[i + 1].x>>8) q->is_tandem = 1; } + *n_m_ = k; return m; } @@ -37,18 +42,19 @@ void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_o for (i = 0, last0 = -1; i <= n; ++i) { if (i == n || a[i].n <= max_occ) { if (i - last0 > 1) { - int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos; - int32_t pe = i == n? len : (uint32_t)a[i].q_pos; + int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos>>1; + int32_t pe = i == n? len : (uint32_t)a[i].q_pos>>1; int32_t j, k, st = last0 + 1, en = i; int32_t max_high_occ = (int32_t)((double)(pe - ps) / dist + .499); + //fprintf(stderr, "Y\t%d\t%d\n", ps, pe); if (max_high_occ > MAX_MAX_HIGH_OCC) max_high_occ = MAX_MAX_HIGH_OCC; for (j = st, k = 0; j < en && k < max_high_occ; ++j, ++k) b[k] = (uint64_t)a[j].n<<32 | j; ks_heapmake_uint64_t(k, b); // initialize the binomial heap for (; j < en; ++j) { // if there are more, choose top max_high_occ - if (a[j].n < (uint32_t)b[0]) { // then update the heap - b[0] = (b[0]&0xFFFFFFFF00000000ULL) | j; + if (a[j].n < (int32_t)(b[0]>>32)) { // then update the heap + b[0] = (uint64_t)a[j].n<<32 | j; ks_heapdown_uint64_t(0, k, b); } } @@ -65,21 +71,22 @@ void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_o mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos) { - int rep_st = 0, rep_en = 0, n_m; + int rep_st = 0, rep_en = 0, n_m, n_m0; size_t i; mm_seed_t *m; *n_mini_pos = 0; *mini_pos = (uint64_t*)kmalloc(km, mv->n * sizeof(uint64_t)); - m = mm_seed_collect_all(km, mi, mv); + m = mm_seed_collect_all(km, mi, mv, &n_m0); if (dist > 0 && max_max_occ > max_occ) { - mm_seed_select(mv->n, m, qlen, max_occ, max_max_occ, dist); + mm_seed_select(n_m0, m, qlen, max_occ, max_max_occ, dist); } else { - for (i = 0; i < mv->n; ++i) + for (i = 0; i < n_m0; ++i) if (m[i].n > max_occ) m[i].flt = 1; } - for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < mv->n; ++i) { + for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < n_m0; ++i) { mm_seed_t *q = &m[i]; + //fprintf(stderr, "X\t%d\t%d\t%d\n", q->q_pos>>1, q->n, q->flt); if (q->flt) { int en = (q->q_pos >> 1) + 1, st = en - q->q_span; if (st > rep_en) {