k-mer thinning apparently working
This commit is contained in:
parent
7b7fabef4d
commit
2c9f1bce3e
83
map.c
83
map.c
|
|
@ -86,8 +86,69 @@ static void mm_dust_minier(mm128_v *mini, int l_seq, const char *seq, int sdust_
|
|||
mini->n = k;
|
||||
}
|
||||
|
||||
void mm_pair_thin(mm_tbuf_t *b, mm_match_t *_m1, mm_match_t *_m2)
|
||||
int mm_pair_thin_core(mm_tbuf_t *b, uint64_t x, int radius, int rel, int st0, int n, const uint64_t *z, uint64_v *a)
|
||||
{
|
||||
int i, st = st0, en = n, mid = en - 1;
|
||||
while (st < en) {
|
||||
uint64_t y;
|
||||
mid = st + ((en - st) >> 1);
|
||||
y = z[mid];
|
||||
if (y < x && (x - y)>>1 > radius) st = mid + 1;
|
||||
else if (y >= x && (y - x)>>1 > radius) en = mid;
|
||||
else break;
|
||||
}
|
||||
if (st < en) {
|
||||
for (en = mid + 1; en < n; ++en)
|
||||
if (z[en] > x && (z[en] - x)>>1 > radius)
|
||||
break;
|
||||
for (st = mid - 1; st >= st0; --st)
|
||||
if (z[st] < x && (x - z[st])>>1 > radius)
|
||||
break;
|
||||
++st;
|
||||
for (i = st; i < en; ++i) {
|
||||
uint64_t y = z[i];
|
||||
if (((x ^ y) & 1) == rel) {
|
||||
printf("* %d,%d\n", (uint32_t)x>>1, (uint32_t)y>>1);
|
||||
kv_push(uint64_t, b->km_fixed, *a, y);
|
||||
}
|
||||
}
|
||||
return en;
|
||||
} else return z[st] < x? st + 1 : en;
|
||||
}
|
||||
|
||||
void mm_pair_thin(mm_tbuf_t *b, int radius, mm_match_t *m1, mm_match_t *m2)
|
||||
{
|
||||
mm_match_t *m[2];
|
||||
const uint64_t *z[2];
|
||||
uint64_v a[2];
|
||||
int i, n[2], k[2], u = 0, rel = (m1->qpos ^ m2->qpos) & 1;
|
||||
|
||||
m[0] = m1, m[1] = m2;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
n[i] = m[i]->n;
|
||||
z[i] = m[i]->x.cr;
|
||||
k[i] = 0;
|
||||
kv_init(a[i]);
|
||||
kv_resize(uint64_t, b->km_fixed, a[i], 256);
|
||||
}
|
||||
while (k[0] < n[0] && k[1] < n[1]) {
|
||||
//printf("%d; %d,%d\n", u, k[0], k[1]);
|
||||
int v = u^1, dist = (int)(m[v]->qpos>>1) - (int)(m[u]->qpos>>1);
|
||||
uint64_t x = z[u][k[u]];
|
||||
int uori = (x ^ m[u]->qpos) & 1, last;
|
||||
int64_t tpos = x>>1 & 0x7fffffff;
|
||||
tpos = uori == 0? tpos + dist : tpos - dist;
|
||||
if (tpos < 0) tpos = 0;
|
||||
x = x>>32<<32 | tpos<<1 | (x&1);
|
||||
last = a[v].n;
|
||||
k[v] = mm_pair_thin_core(b, x, radius, rel, k[v], n[v], z[v], &a[v]);
|
||||
if (a[v].n > last) kv_push(uint64_t, b->km_fixed, a[u], z[u][k[u]]);
|
||||
++k[u];
|
||||
u ^= 1;
|
||||
}
|
||||
for (i = 0; i < 2; ++i)
|
||||
m[i]->n = a[i].n, m[i]->x.r = a[i].a, m[i]->is_alloc = 1;
|
||||
printf("%d,%d; %d,%d\n", m[0]->qpos>>1, m[1]->qpos>>1, m[0]->n, m[1]->n);
|
||||
}
|
||||
|
||||
void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en)
|
||||
|
|
@ -96,27 +157,32 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3
|
|||
mm_match_t *m;
|
||||
|
||||
// convert to local representation
|
||||
m = (mm_match_t*)kmalloc(b->km_fixed, (m_en - m_st) * sizeof(mm_match_t));
|
||||
m = (mm_match_t*)kmalloc(b->km_fixed, n * sizeof(mm_match_t));
|
||||
if (mm_verbose >= 5) printf("%d\t", n);
|
||||
for (i = 0; i < n; ++i) {
|
||||
int t;
|
||||
mm128_t *p = &b->mini.a[i + m_st];
|
||||
m[i].is_alloc = 0;
|
||||
m[i].qpos = (uint32_t)p->y;
|
||||
m[i].span = p->x & 0xff;
|
||||
m[i].x.cr = mm_idx_get(mi, p->x, &t);
|
||||
m[i].x.cr = mm_idx_get(mi, p->x>>8, &t);
|
||||
m[i].n = t;
|
||||
if (i) printf("; ");
|
||||
if (mm_verbose >= 5) printf("%d,%d", m[i].qpos>>1, t);
|
||||
}
|
||||
if (mm_verbose >= 5) printf("\n");
|
||||
|
||||
// pair k-mer thinning
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (m[i].n >= opt->mid_occ && m[i].n < opt->max_occ) {
|
||||
// if (m[i].n >= opt->mid_occ && m[i].n < opt->max_occ) {
|
||||
if (m[i].n >= opt->mid_occ) {
|
||||
if (last2 < 0) last2 = i;
|
||||
if (last < 0 || m[last].n < m[i].n) last = i;
|
||||
if (last >= 0 && m[last].span + (m[last].qpos>>1) <= m[i].qpos>>1) {
|
||||
mm_pair_thin(b, &m[last], &m[i]);
|
||||
if (last >= 0 && (m[last].qpos>>1) + (m[last].span>>1) <= m[i].qpos>>1) {
|
||||
mm_pair_thin(b, opt->radius, &m[last], &m[i]);
|
||||
last2 = last = -1;
|
||||
} else if (last2 >= 0 && m[last].span + (m[last].qpos>>1) <= m[i].qpos>>1) {
|
||||
mm_pair_thin(b, &m[last2], &m[i]);
|
||||
} else if (last2 >= 0 && (m[last2].qpos>>1) + (m[last2].span>>1) <= m[i].qpos>>1) {
|
||||
mm_pair_thin(b, opt->radius, &m[last2], &m[i]);
|
||||
last2 = last = -1;
|
||||
}
|
||||
}
|
||||
|
|
@ -127,6 +193,7 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3
|
|||
const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
|
||||
{
|
||||
uint32_t proc_mini = 0;
|
||||
if (mm_verbose >= 5) printf("=====> %s <=====\n", name);
|
||||
b->mini.n = 0;
|
||||
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
|
||||
if (opt->sdust_thres > 0)
|
||||
|
|
|
|||
Loading…
Reference in New Issue