From 2c9f1bce3e7d67d74285b6d4f355b3b83766053c Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 28 Apr 2017 18:24:45 +0800 Subject: [PATCH] k-mer thinning apparently working --- map.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/map.c b/map.c index 47e0a4d..6ef39b1 100644 --- a/map.c +++ b/map.c @@ -86,8 +86,69 @@ static void mm_dust_minier(mm128_v *mini, int l_seq, const char *seq, int sdust_ mini->n = k; } -void mm_pair_thin(mm_tbuf_t *b, mm_match_t *_m1, mm_match_t *_m2) +int mm_pair_thin_core(mm_tbuf_t *b, uint64_t x, int radius, int rel, int st0, int n, const uint64_t *z, uint64_v *a) { + int i, st = st0, en = n, mid = en - 1; + while (st < en) { + uint64_t y; + mid = st + ((en - st) >> 1); + y = z[mid]; + if (y < x && (x - y)>>1 > radius) st = mid + 1; + else if (y >= x && (y - x)>>1 > radius) en = mid; + else break; + } + if (st < en) { + for (en = mid + 1; en < n; ++en) + if (z[en] > x && (z[en] - x)>>1 > radius) + break; + for (st = mid - 1; st >= st0; --st) + if (z[st] < x && (x - z[st])>>1 > radius) + break; + ++st; + for (i = st; i < en; ++i) { + uint64_t y = z[i]; + if (((x ^ y) & 1) == rel) { + printf("* %d,%d\n", (uint32_t)x>>1, (uint32_t)y>>1); + kv_push(uint64_t, b->km_fixed, *a, y); + } + } + return en; + } else return z[st] < x? st + 1 : en; +} + +void mm_pair_thin(mm_tbuf_t *b, int radius, mm_match_t *m1, mm_match_t *m2) +{ + mm_match_t *m[2]; + const uint64_t *z[2]; + uint64_v a[2]; + int i, n[2], k[2], u = 0, rel = (m1->qpos ^ m2->qpos) & 1; + + m[0] = m1, m[1] = m2; + for (i = 0; i < 2; ++i) { + n[i] = m[i]->n; + z[i] = m[i]->x.cr; + k[i] = 0; + kv_init(a[i]); + kv_resize(uint64_t, b->km_fixed, a[i], 256); + } + while (k[0] < n[0] && k[1] < n[1]) { + //printf("%d; %d,%d\n", u, k[0], k[1]); + int v = u^1, dist = (int)(m[v]->qpos>>1) - (int)(m[u]->qpos>>1); + uint64_t x = z[u][k[u]]; + int uori = (x ^ m[u]->qpos) & 1, last; + int64_t tpos = x>>1 & 0x7fffffff; + tpos = uori == 0? tpos + dist : tpos - dist; + if (tpos < 0) tpos = 0; + x = x>>32<<32 | tpos<<1 | (x&1); + last = a[v].n; + k[v] = mm_pair_thin_core(b, x, radius, rel, k[v], n[v], z[v], &a[v]); + if (a[v].n > last) kv_push(uint64_t, b->km_fixed, a[u], z[u][k[u]]); + ++k[u]; + u ^= 1; + } + for (i = 0; i < 2; ++i) + m[i]->n = a[i].n, m[i]->x.r = a[i].a, m[i]->is_alloc = 1; + printf("%d,%d; %d,%d\n", m[0]->qpos>>1, m[1]->qpos>>1, m[0]->n, m[1]->n); } void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en) @@ -96,27 +157,32 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3 mm_match_t *m; // convert to local representation - m = (mm_match_t*)kmalloc(b->km_fixed, (m_en - m_st) * sizeof(mm_match_t)); + m = (mm_match_t*)kmalloc(b->km_fixed, n * sizeof(mm_match_t)); + if (mm_verbose >= 5) printf("%d\t", n); for (i = 0; i < n; ++i) { int t; mm128_t *p = &b->mini.a[i + m_st]; m[i].is_alloc = 0; m[i].qpos = (uint32_t)p->y; m[i].span = p->x & 0xff; - m[i].x.cr = mm_idx_get(mi, p->x, &t); + m[i].x.cr = mm_idx_get(mi, p->x>>8, &t); m[i].n = t; + if (i) printf("; "); + if (mm_verbose >= 5) printf("%d,%d", m[i].qpos>>1, t); } + if (mm_verbose >= 5) printf("\n"); // pair k-mer thinning for (i = 0; i < n; ++i) { - if (m[i].n >= opt->mid_occ && m[i].n < opt->max_occ) { +// if (m[i].n >= opt->mid_occ && m[i].n < opt->max_occ) { + if (m[i].n >= opt->mid_occ) { if (last2 < 0) last2 = i; if (last < 0 || m[last].n < m[i].n) last = i; - if (last >= 0 && m[last].span + (m[last].qpos>>1) <= m[i].qpos>>1) { - mm_pair_thin(b, &m[last], &m[i]); + if (last >= 0 && (m[last].qpos>>1) + (m[last].span>>1) <= m[i].qpos>>1) { + mm_pair_thin(b, opt->radius, &m[last], &m[i]); last2 = last = -1; - } else if (last2 >= 0 && m[last].span + (m[last].qpos>>1) <= m[i].qpos>>1) { - mm_pair_thin(b, &m[last2], &m[i]); + } else if (last2 >= 0 && (m[last2].qpos>>1) + (m[last2].span>>1) <= m[i].qpos>>1) { + mm_pair_thin(b, opt->radius, &m[last2], &m[i]); last2 = last = -1; } } @@ -127,6 +193,7 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3 const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name) { uint32_t proc_mini = 0; + if (mm_verbose >= 5) printf("=====> %s <=====\n", name); b->mini.n = 0; mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini); if (opt->sdust_thres > 0)