chaining
This commit is contained in:
parent
f2ae8eb670
commit
7b53f7aa84
26
ksort.h
26
ksort.h
|
|
@ -39,32 +39,6 @@ typedef struct {
|
||||||
#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
|
#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
|
||||||
|
|
||||||
#define KSORT_INIT(name, type_t, __sort_lt) \
|
#define KSORT_INIT(name, type_t, __sort_lt) \
|
||||||
size_t ks_lis_##name(size_t n, const type_t *a, size_t *b, size_t *_p) \
|
|
||||||
{ /* translated from: http://www.algorithmist.com/index.php/Longest_Increasing_Subsequence.cpp */ \
|
|
||||||
size_t i, u, v, *top = b, *p; \
|
|
||||||
if (n == 0) return 0; \
|
|
||||||
p = _p? _p : (size_t*)calloc(n, sizeof(size_t)); \
|
|
||||||
*top++ = 0; \
|
|
||||||
for (i = 1; i < n; i++) { \
|
|
||||||
if (__sort_lt(a[*(top-1)], a[i])) { \
|
|
||||||
p[i] = *(top-1); \
|
|
||||||
*top++ = i; \
|
|
||||||
continue; \
|
|
||||||
} \
|
|
||||||
for (u = 0, v = top - b - 1; u < v;) { \
|
|
||||||
size_t c = (u + v) >> 1; \
|
|
||||||
if (__sort_lt(a[b[c]], a[i])) u = c + 1; \
|
|
||||||
else v = c; \
|
|
||||||
} \
|
|
||||||
if (__sort_lt(a[i], a[b[u]])) { \
|
|
||||||
if (u > 0) p[i] = b[u-1]; \
|
|
||||||
b[u] = i; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
for (u = top - b, v = *(top-1); u--; v = p[v]) b[u] = v; \
|
|
||||||
if (!_p) free(p); \
|
|
||||||
return top - b; \
|
|
||||||
} \
|
|
||||||
type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
|
type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
|
||||||
{ \
|
{ \
|
||||||
type_t *low, *high, *k, *ll, *hh, *mid; \
|
type_t *low, *high, *k, *ll, *hh, *mid; \
|
||||||
|
|
|
||||||
94
map.c
94
map.c
|
|
@ -13,10 +13,10 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
||||||
opt->max_occ_frac = 1e-5f;
|
opt->max_occ_frac = 1e-5f;
|
||||||
opt->mid_occ_frac = 2e-4f;
|
opt->mid_occ_frac = 2e-4f;
|
||||||
opt->sdust_thres = 0;
|
opt->sdust_thres = 0;
|
||||||
|
opt->min_cnt = 2;
|
||||||
opt->radius = 500;
|
opt->radius = 500;
|
||||||
|
|
||||||
opt->max_gap = 10000;
|
opt->max_gap = 10000;
|
||||||
opt->min_cnt = 4;
|
|
||||||
opt->min_match = 40;
|
opt->min_match = 40;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -151,10 +151,49 @@ void mm_pair_thin(mm_tbuf_t *b, int radius, mm_match_t *m1, mm_match_t *m2)
|
||||||
// printf("%d,%d; %d,%d\n", m[0]->qpos>>1, m[1]->qpos>>1, m[0]->n, m[1]->n);
|
// printf("%d,%d; %d,%d\n", m[0]->qpos>>1, m[1]->qpos>>1, m[0]->n, m[1]->n);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en)
|
int mm_liss(int d, int min_L, void *km, int n, mm128_t *a, int *b)
|
||||||
{
|
{
|
||||||
int i, n = m_en - m_st, last = -1, last2 = -1;
|
int i, o = 0, L = 0, *M, *P, m = 0, off = 0;
|
||||||
|
M = (int*)kmalloc(km, (n + 1) * sizeof(int));
|
||||||
|
P = (int*)kmalloc(km, n * sizeof(int));
|
||||||
|
for (i = 0; i <= n; ++i) {
|
||||||
|
int lo, hi, newL;
|
||||||
|
if (L > 0 || i == n) {
|
||||||
|
int reset = 0, j = M[L]; // _j_ is the index of the last element in the longest chain
|
||||||
|
if (i == n) reset = 1; // reaching the end
|
||||||
|
else if (a[i].x - a[j].x > d) reset = 1; // large gap on target
|
||||||
|
else if ((uint32_t)a[i].y > (uint32_t)a[j].y && (uint32_t)a[i].y - (uint32_t)a[j].y > d) reset = 1; // large gap on query
|
||||||
|
if (reset) {
|
||||||
|
if (L >= min_L) { // save the chain if long enough
|
||||||
|
int k;
|
||||||
|
for (k = L - 1; k >= 0; --k)
|
||||||
|
a[o + k] = a[j], j = P[j];
|
||||||
|
o += L;
|
||||||
|
b[m++] = L;
|
||||||
|
}
|
||||||
|
off = i, L = 0; // reset
|
||||||
|
}
|
||||||
|
if (i == n) break;
|
||||||
|
}
|
||||||
|
lo = off + 1, hi = off + L; // a binary search; see wiki
|
||||||
|
while (lo <= hi) {
|
||||||
|
int mid = (lo + hi + 1) >> 1;
|
||||||
|
if (a[M[mid - off]].x < a[i].x) lo = mid + 1;
|
||||||
|
else hi = mid - 1;
|
||||||
|
}
|
||||||
|
newL = lo - off, P[i] = M[newL - 1], M[newL] = i;
|
||||||
|
if (newL > L) L = newL;
|
||||||
|
}
|
||||||
|
kfree(km, M);
|
||||||
|
kfree(km, P);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en, const char *qname, int qlen)
|
||||||
|
{
|
||||||
|
int i, n = m_en - m_st, last = -1, last2 = -1, j, n_a, *t, n_t;
|
||||||
mm_match_t *m;
|
mm_match_t *m;
|
||||||
|
mm128_t *a;
|
||||||
|
|
||||||
// convert to local representation
|
// convert to local representation
|
||||||
m = (mm_match_t*)kmalloc(b->km_fixed, n * sizeof(mm_match_t));
|
m = (mm_match_t*)kmalloc(b->km_fixed, n * sizeof(mm_match_t));
|
||||||
|
|
@ -176,7 +215,6 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3
|
||||||
|
|
||||||
// pair k-mer thinning
|
// pair k-mer thinning
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
// if (m[i].n >= opt->mid_occ && m[i].n < opt->max_occ) {
|
|
||||||
if (m[i].n >= opt->mid_occ) {
|
if (m[i].n >= opt->mid_occ) {
|
||||||
if (last2 < 0) last2 = i;
|
if (last2 < 0) last2 = i;
|
||||||
if (last < 0 || m[last].n < m[i].n) last = i;
|
if (last < 0 || m[last].n < m[i].n) last = i;
|
||||||
|
|
@ -190,30 +228,68 @@ void mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// find the length of _a_
|
||||||
|
for (i = 0, n_a = 0; i < n; ++i)
|
||||||
|
if (m[i].n < opt->mid_occ) n_a += m[i].n;
|
||||||
|
|
||||||
|
// fill the _a_ array
|
||||||
|
a = kmalloc(b->km_fixed, n_a * sizeof(mm128_t));
|
||||||
|
for (i = j = 0; i < n; ++i) {
|
||||||
|
mm_match_t *q = &m[i];
|
||||||
|
const uint64_t *r = q->x.cr;
|
||||||
|
int k;
|
||||||
|
if (q->n >= opt->mid_occ) continue;
|
||||||
|
for (k = 0; k < q->n; ++k) {
|
||||||
|
int32_t rpos = (uint32_t)r[k] >> 1;
|
||||||
|
mm128_t *p;
|
||||||
|
if (qname && (opt->flag&MM_F_NO_SELF) && strcmp(qname, mi->seq[r[k]>>32].name) == 0 && rpos == (q->qpos>>1)) // avoid the diagonal
|
||||||
|
continue;
|
||||||
|
if (qname && (opt->flag&MM_F_AVA) && strcmp(qname, mi->seq[r[k]>>32].name) > 0) // all-vs-all mode: map once
|
||||||
|
continue;
|
||||||
|
p = &a[j++];
|
||||||
|
if ((r[k]&1) == (q->qpos&1)) { // forward strand
|
||||||
|
p->x = r[k];
|
||||||
|
p->y = (uint64_t)i << 32 | q->qpos >> 1;
|
||||||
|
} else { // reverse strand
|
||||||
|
p->x = 1ULL<<63 | r[k];
|
||||||
|
p->y = (uint64_t)i << 32 | (qlen - ((q->qpos>>1) + 1 - q->span) - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
radix_sort_128x(a, a + n_a);
|
||||||
|
// for (i = 0; i < n_a; ++i) printf("%c\t%d\t%d\n", "+-"[a[i].x>>63], (uint32_t)a[i].x>>1, (uint32_t)a[i].y);
|
||||||
|
|
||||||
|
t = kmalloc(b->km_fixed, (n_a + opt->min_cnt - 1) / opt->min_cnt * sizeof(int));
|
||||||
|
n_t = mm_liss(opt->radius, opt->min_cnt, b->km_fixed, n_a, a, t);
|
||||||
|
|
||||||
int n_tot = 0, n_low = 0, s_low = 0;
|
int n_tot = 0, n_low = 0, s_low = 0;
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
if (m[i].n > 0) ++n_tot;
|
if (m[i].n > 0) ++n_tot;
|
||||||
if (m[i].n > 0 && m[i].n < opt->mid_occ) ++n_low, s_low += m[i].n;
|
if (m[i].n > 0 && m[i].n < opt->mid_occ) ++n_low, s_low += m[i].n;
|
||||||
}
|
}
|
||||||
printf("%d\t%d\t%d\t%d\n", n_tot, n_low, s_low, n);
|
printf("%d\t%d\t%d\t%d\t%d", n_tot, n_low, s_low, n, n_t);
|
||||||
|
for (i = 0; i < n_t; ++i) printf("\t%d", t[i]);
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
// free
|
// free
|
||||||
for (i = 0; i < n; ++i)
|
for (i = 0; i < n; ++i)
|
||||||
if (m[i].is_alloc) kfree(b->km_fixed, m[i].x.r);
|
if (m[i].is_alloc) kfree(b->km_fixed, m[i].x.r);
|
||||||
kfree(b->km_fixed, m);
|
kfree(b->km_fixed, m);
|
||||||
|
kfree(b->km_fixed, a);
|
||||||
|
kfree(b->km_fixed, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
|
const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||||
{
|
{
|
||||||
uint32_t proc_mini = 0;
|
uint32_t proc_mini = 0;
|
||||||
if (mm_verbose >= 5) printf("=====> %s <=====\n", name);
|
if (mm_verbose >= 5) printf("=====> %s <=====\n", qname);
|
||||||
b->mini.n = 0;
|
b->mini.n = 0;
|
||||||
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
|
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
|
||||||
if (opt->sdust_thres > 0)
|
if (opt->sdust_thres > 0)
|
||||||
mm_dust_minier(&b->mini, l_seq, seq, opt->sdust_thres, b->sdb);
|
mm_dust_minier(&b->mini, l_seq, seq, opt->sdust_thres, b->sdb);
|
||||||
while (proc_mini < b->mini.n) {
|
while (proc_mini < b->mini.n) {
|
||||||
uint32_t n = b->mini.n - proc_mini < opt->n_frag_mini * 1.5f? b->mini.n - proc_mini : opt->n_frag_mini;
|
uint32_t n = b->mini.n - proc_mini < opt->n_frag_mini * 1.5f? b->mini.n - proc_mini : opt->n_frag_mini;
|
||||||
mm_map_frag(opt, mi, b, proc_mini, proc_mini + n);
|
mm_map_frag(opt, mi, b, proc_mini, proc_mini + n, qname, l_seq);
|
||||||
proc_mini += n;
|
proc_mini += n;
|
||||||
}
|
}
|
||||||
*n_regs = b->reg.n;
|
*n_regs = b->reg.n;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue