prepare to backport unimap/minigraph chaining
This commit is contained in:
parent
f7dc5799c5
commit
e81927e7a1
5
Makefile
5
Makefile
|
|
@ -2,7 +2,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
|
|||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES=
|
||||
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
|
||||
chain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
||||
chain.o lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
|
||||
ksw2_ll_sse.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust minimap2-lite
|
||||
|
|
@ -120,6 +120,7 @@ ksw2_exts2_sse.o: ksw2.h kalloc.h
|
|||
ksw2_extz2_sse.o: ksw2.h kalloc.h
|
||||
ksw2_ll_sse.o: ksw2.h kalloc.h
|
||||
kthread.o: kthread.h
|
||||
lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
|
||||
main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
|
||||
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
|
||||
map.o: khash.h ksort.h
|
||||
|
|
@ -127,7 +128,7 @@ misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
|
|||
options.o: mmpriv.h minimap.h bseq.h kseq.h
|
||||
pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
|
||||
sdust.o: kalloc.h kdq.h kvec.h sdust.h
|
||||
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h
|
||||
seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
|
||||
self-chain.o: minimap.h kseq.h
|
||||
sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
|
||||
splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
|
||||
|
|
|
|||
39
kalloc.h
39
kalloc.h
|
|
@ -34,4 +34,43 @@ void km_stat(const void *_km, km_stat_t *s);
|
|||
KREALLOC((km), (a), (m)); \
|
||||
} while (0)
|
||||
|
||||
#ifndef klib_unused
|
||||
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||
#define klib_unused __attribute__ ((__unused__))
|
||||
#else
|
||||
#define klib_unused
|
||||
#endif
|
||||
#endif /* klib_unused */
|
||||
|
||||
#define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \
|
||||
typedef struct { \
|
||||
size_t cnt, n, max; \
|
||||
kmptype_t **buf; \
|
||||
void *km; \
|
||||
} kmp_##name##_t; \
|
||||
SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
|
||||
kmp_##name##_t *mp; \
|
||||
KCALLOC(km, mp, 1); \
|
||||
mp->km = km; \
|
||||
return mp; \
|
||||
} \
|
||||
SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \
|
||||
size_t k; \
|
||||
for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \
|
||||
kfree(mp->km, mp->buf); kfree(mp->km, mp); \
|
||||
} \
|
||||
SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \
|
||||
++mp->cnt; \
|
||||
if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \
|
||||
return mp->buf[--mp->n]; \
|
||||
} \
|
||||
SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
|
||||
--mp->cnt; \
|
||||
if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \
|
||||
mp->buf[mp->n++] = p; \
|
||||
}
|
||||
|
||||
#define KALLOC_POOL_INIT(name, kmptype_t) \
|
||||
KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t)
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,474 @@
|
|||
/* The MIT License
|
||||
|
||||
Copyright (c) 2019 by Attractive Chaos <attractor@live.co.uk>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/* An example:
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "krmq.h"
|
||||
|
||||
struct my_node {
|
||||
char key;
|
||||
KRMQ_HEAD(struct my_node) head;
|
||||
};
|
||||
#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key))
|
||||
KRMQ_INIT(my, struct my_node, head, my_cmp)
|
||||
|
||||
int main(void) {
|
||||
const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate
|
||||
struct my_node *root = 0;
|
||||
int i, l = strlen(str);
|
||||
for (i = 0; i < l; ++i) { // insert in the input order
|
||||
struct my_node *q, *p = malloc(sizeof(*p));
|
||||
p->key = str[i];
|
||||
q = krmq_insert(my, &root, p, 0);
|
||||
if (p != q) free(p); // if already present, free
|
||||
}
|
||||
krmq_itr_t(my) itr;
|
||||
krmq_itr_first(my, root, &itr); // place at first
|
||||
do { // traverse
|
||||
const struct my_node *p = krmq_at(&itr);
|
||||
putchar(p->key);
|
||||
free((void*)p); // free node
|
||||
} while (krmq_itr_next(my, &itr));
|
||||
putchar('\n');
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#ifndef KRMQ_H
|
||||
#define KRMQ_H
|
||||
|
||||
#ifdef __STRICT_ANSI__
|
||||
#define inline __inline__
|
||||
#endif
|
||||
|
||||
#define KRMQ_MAX_DEPTH 64
|
||||
|
||||
#define krmq_size(head, p) ((p)? (p)->head.size : 0)
|
||||
#define krmq_size_child(head, q, i) ((q)->head.p[(i)]? (q)->head.p[(i)]->head.size : 0)
|
||||
|
||||
#define KRMQ_HEAD(__type) \
|
||||
struct { \
|
||||
__type *p[2], *s; \
|
||||
signed char balance; /* balance factor */ \
|
||||
unsigned size; /* #elements in subtree */ \
|
||||
}
|
||||
|
||||
#define __KRMQ_FIND(suf, __scope, __type, __head, __cmp) \
|
||||
__scope __type *krmq_find_##suf(const __type *root, const __type *x, unsigned *cnt_) { \
|
||||
const __type *p = root; \
|
||||
unsigned cnt = 0; \
|
||||
while (p != 0) { \
|
||||
int cmp; \
|
||||
cmp = __cmp(x, p); \
|
||||
if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||
if (cmp < 0) p = p->__head.p[0]; \
|
||||
else if (cmp > 0) p = p->__head.p[1]; \
|
||||
else break; \
|
||||
} \
|
||||
if (cnt_) *cnt_ = cnt; \
|
||||
return (__type*)p; \
|
||||
} \
|
||||
__scope __type *krmq_interval_##suf(const __type *root, const __type *x, __type **lower, __type **upper) { \
|
||||
const __type *p = root, *l = 0, *u = 0; \
|
||||
while (p != 0) { \
|
||||
int cmp; \
|
||||
cmp = __cmp(x, p); \
|
||||
if (cmp < 0) u = p, p = p->__head.p[0]; \
|
||||
else if (cmp > 0) l = p, p = p->__head.p[1]; \
|
||||
else { l = u = p; break; } \
|
||||
} \
|
||||
if (lower) *lower = (__type*)l; \
|
||||
if (upper) *upper = (__type*)u; \
|
||||
return (__type*)p; \
|
||||
}
|
||||
|
||||
#define __KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__scope __type *krmq_rmq_##suf(const __type *root, const __type *lo, const __type *up) { /* CLOSED interval */ \
|
||||
const __type *p = root, *path[2][KRMQ_MAX_DEPTH], *min; \
|
||||
int plen[2] = {0, 0}, pcmp[2][KRMQ_MAX_DEPTH], i, cmp, lca; \
|
||||
if (root == 0) return 0; \
|
||||
while (p) { \
|
||||
cmp = __cmp(lo, p); \
|
||||
path[0][plen[0]] = p, pcmp[0][plen[0]++] = cmp; \
|
||||
if (cmp < 0) p = p->__head.p[0]; \
|
||||
else if (cmp > 0) p = p->__head.p[1]; \
|
||||
else break; \
|
||||
} \
|
||||
p = root; \
|
||||
while (p) { \
|
||||
cmp = __cmp(up, p); \
|
||||
path[1][plen[1]] = p, pcmp[1][plen[1]++] = cmp; \
|
||||
if (cmp < 0) p = p->__head.p[0]; \
|
||||
else if (cmp > 0) p = p->__head.p[1]; \
|
||||
else break; \
|
||||
} \
|
||||
for (i = 0; i < plen[0] && i < plen[1]; ++i) /* find the LCA */ \
|
||||
if (path[0][i] == path[1][i] && pcmp[0][i] <= 0 && pcmp[1][i] >= 0) \
|
||||
break; \
|
||||
if (i == plen[0] || i == plen[1]) return 0; /* no elements in the closed interval */ \
|
||||
lca = i, min = path[0][lca]; \
|
||||
for (i = lca + 1; i < plen[0]; ++i) { \
|
||||
if (pcmp[0][i] <= 0) { \
|
||||
if (__lt2(path[0][i], min)) min = path[0][i]; \
|
||||
if (path[0][i]->__head.p[1] && __lt2(path[0][i]->__head.p[1]->__head.s, min)) \
|
||||
min = path[0][i]->__head.p[1]->__head.s; \
|
||||
} \
|
||||
} \
|
||||
for (i = lca + 1; i < plen[1]; ++i) { \
|
||||
if (pcmp[1][i] >= 0) { \
|
||||
if (__lt2(path[1][i], min)) min = path[1][i]; \
|
||||
if (path[1][i]->__head.p[0] && __lt2(path[1][i]->__head.p[0]->__head.s, min)) \
|
||||
min = path[1][i]->__head.p[0]->__head.s; \
|
||||
} \
|
||||
} \
|
||||
return (__type*)min; \
|
||||
}
|
||||
|
||||
#define __KRMQ_ROTATE(suf, __type, __head, __lt2) \
|
||||
/* */ \
|
||||
static inline void krmq_update_min_##suf(__type *p, const __type *q, const __type *r) { \
|
||||
p->__head.s = !q || __lt2(p, q->__head.s)? p : q->__head.s; \
|
||||
p->__head.s = !r || __lt2(p->__head.s, r->__head.s)? p->__head.s : r->__head.s; \
|
||||
} \
|
||||
/* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \
|
||||
static inline __type *krmq_rotate1_##suf(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \
|
||||
int opp = 1 - dir; /* opposite direction */ \
|
||||
__type *q = p->__head.p[opp], *s = p->__head.s; \
|
||||
unsigned size_p = p->__head.size; \
|
||||
p->__head.size -= q->__head.size - krmq_size_child(__head, q, dir); \
|
||||
q->__head.size = size_p; \
|
||||
krmq_update_min_##suf(p, p->__head.p[dir], q->__head.p[dir]); \
|
||||
q->__head.s = s; \
|
||||
p->__head.p[opp] = q->__head.p[dir]; \
|
||||
q->__head.p[dir] = p; \
|
||||
return q; \
|
||||
} \
|
||||
/* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \
|
||||
static inline __type *krmq_rotate2_##suf(__type *p, int dir) { \
|
||||
int b1, opp = 1 - dir; \
|
||||
__type *q = p->__head.p[opp], *r = q->__head.p[dir], *s = p->__head.s; \
|
||||
unsigned size_x_dir = krmq_size_child(__head, r, dir); \
|
||||
r->__head.size = p->__head.size; \
|
||||
p->__head.size -= q->__head.size - size_x_dir; \
|
||||
q->__head.size -= size_x_dir + 1; \
|
||||
krmq_update_min_##suf(p, p->__head.p[dir], r->__head.p[dir]); \
|
||||
krmq_update_min_##suf(q, q->__head.p[opp], r->__head.p[opp]); \
|
||||
r->__head.s = s; \
|
||||
p->__head.p[opp] = r->__head.p[dir]; \
|
||||
r->__head.p[dir] = p; \
|
||||
q->__head.p[dir] = r->__head.p[opp]; \
|
||||
r->__head.p[opp] = q; \
|
||||
b1 = dir == 0? +1 : -1; \
|
||||
if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \
|
||||
else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \
|
||||
else q->__head.balance = b1, p->__head.balance = 0; \
|
||||
r->__head.balance = 0; \
|
||||
return r; \
|
||||
}
|
||||
|
||||
#define __KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__scope __type *krmq_insert_##suf(__type **root_, __type *x, unsigned *cnt_) { \
|
||||
unsigned char stack[KRMQ_MAX_DEPTH]; \
|
||||
__type *path[KRMQ_MAX_DEPTH]; \
|
||||
__type *bp, *bq; \
|
||||
__type *p, *q, *r = 0; /* _r_ is potentially the new root */ \
|
||||
int i, which = 0, top, b1, path_len; \
|
||||
unsigned cnt = 0; \
|
||||
bp = *root_, bq = 0; \
|
||||
/* find the insertion location */ \
|
||||
for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \
|
||||
int cmp; \
|
||||
cmp = __cmp(x, p); \
|
||||
if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||
if (cmp == 0) { \
|
||||
if (cnt_) *cnt_ = cnt; \
|
||||
return p; \
|
||||
} \
|
||||
if (p->__head.balance != 0) \
|
||||
bq = q, bp = p, top = 0; \
|
||||
stack[top++] = which = (cmp > 0); \
|
||||
path[path_len++] = p; \
|
||||
} \
|
||||
if (cnt_) *cnt_ = cnt; \
|
||||
x->__head.balance = 0, x->__head.size = 1, x->__head.p[0] = x->__head.p[1] = 0, x->__head.s = x; \
|
||||
if (q == 0) *root_ = x; \
|
||||
else q->__head.p[which] = x; \
|
||||
if (bp == 0) return x; \
|
||||
for (i = 0; i < path_len; ++i) ++path[i]->__head.size; \
|
||||
for (i = path_len - 1; i >= 0; --i) { \
|
||||
krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \
|
||||
if (path[i]->__head.s != x) break; \
|
||||
} \
|
||||
for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \
|
||||
if (stack[top] == 0) --p->__head.balance; \
|
||||
else ++p->__head.balance; \
|
||||
if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \
|
||||
/* re-balance */ \
|
||||
which = (bp->__head.balance < 0); \
|
||||
b1 = which == 0? +1 : -1; \
|
||||
q = bp->__head.p[1 - which]; \
|
||||
if (q->__head.balance == b1) { \
|
||||
r = krmq_rotate1_##suf(bp, which); \
|
||||
q->__head.balance = bp->__head.balance = 0; \
|
||||
} else r = krmq_rotate2_##suf(bp, which); \
|
||||
if (bq == 0) *root_ = r; \
|
||||
else bq->__head.p[bp != bq->__head.p[0]] = r; \
|
||||
return x; \
|
||||
}
|
||||
|
||||
#define __KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__scope __type *krmq_erase_##suf(__type **root_, const __type *x, unsigned *cnt_) { \
|
||||
__type *p, *path[KRMQ_MAX_DEPTH], fake; \
|
||||
unsigned char dir[KRMQ_MAX_DEPTH]; \
|
||||
int i, d = 0, cmp; \
|
||||
unsigned cnt = 0; \
|
||||
fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \
|
||||
if (cnt_) *cnt_ = 0; \
|
||||
if (x) { \
|
||||
for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \
|
||||
int which = (cmp > 0); \
|
||||
if (cmp > 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||
dir[d] = which; \
|
||||
path[d++] = p; \
|
||||
p = p->__head.p[which]; \
|
||||
if (p == 0) { \
|
||||
if (cnt_) *cnt_ = 0; \
|
||||
return 0; \
|
||||
} \
|
||||
} \
|
||||
cnt += krmq_size_child(__head, p, 0) + 1; /* because p==x is not counted */ \
|
||||
} else { \
|
||||
for (p = &fake, cnt = 1; p; p = p->__head.p[0]) \
|
||||
dir[d] = 0, path[d++] = p; \
|
||||
p = path[--d]; \
|
||||
} \
|
||||
if (cnt_) *cnt_ = cnt; \
|
||||
for (i = 1; i < d; ++i) --path[i]->__head.size; \
|
||||
if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \
|
||||
path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \
|
||||
} else { \
|
||||
__type *q = p->__head.p[1]; \
|
||||
if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3,q=2 */ \
|
||||
q->__head.p[0] = p->__head.p[0]; \
|
||||
q->__head.balance = p->__head.balance; \
|
||||
path[d-1]->__head.p[dir[d-1]] = q; \
|
||||
path[d] = q, dir[d++] = 1; \
|
||||
q->__head.size = p->__head.size - 1; \
|
||||
} else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \
|
||||
__type *r; \
|
||||
int e = d++; /* backup _d_ */\
|
||||
for (;;) { \
|
||||
dir[d] = 0; \
|
||||
path[d++] = q; \
|
||||
r = q->__head.p[0]; \
|
||||
if (r->__head.p[0] == 0) break; \
|
||||
q = r; \
|
||||
} \
|
||||
r->__head.p[0] = p->__head.p[0]; \
|
||||
q->__head.p[0] = r->__head.p[1]; \
|
||||
r->__head.p[1] = p->__head.p[1]; \
|
||||
r->__head.balance = p->__head.balance; \
|
||||
path[e-1]->__head.p[dir[e-1]] = r; \
|
||||
path[e] = r, dir[e] = 1; \
|
||||
for (i = e + 1; i < d; ++i) --path[i]->__head.size; \
|
||||
r->__head.size = p->__head.size - 1; \
|
||||
} \
|
||||
} \
|
||||
for (i = d - 1; i >= 0; --i) /* not sure why adding condition "path[i]->__head.s==p" doesn't work */ \
|
||||
krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \
|
||||
while (--d > 0) { \
|
||||
__type *q = path[d]; \
|
||||
int which, other, b1 = 1, b2 = 2; \
|
||||
which = dir[d], other = 1 - which; \
|
||||
if (which) b1 = -b1, b2 = -b2; \
|
||||
q->__head.balance += b1; \
|
||||
if (q->__head.balance == b1) break; \
|
||||
else if (q->__head.balance == b2) { \
|
||||
__type *r = q->__head.p[other]; \
|
||||
if (r->__head.balance == -b1) { \
|
||||
path[d-1]->__head.p[dir[d-1]] = krmq_rotate2_##suf(q, which); \
|
||||
} else { \
|
||||
path[d-1]->__head.p[dir[d-1]] = krmq_rotate1_##suf(q, which); \
|
||||
if (r->__head.balance == 0) { \
|
||||
r->__head.balance = -b1; \
|
||||
q->__head.balance = b1; \
|
||||
break; \
|
||||
} else r->__head.balance = q->__head.balance = 0; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
*root_ = fake.__head.p[0]; \
|
||||
return p; \
|
||||
}
|
||||
|
||||
#define krmq_free(__type, __head, __root, __free) do { \
|
||||
__type *_p, *_q; \
|
||||
for (_p = __root; _p; _p = _q) { \
|
||||
if (_p->__head.p[0] == 0) { \
|
||||
_q = _p->__head.p[1]; \
|
||||
__free(_p); \
|
||||
} else { \
|
||||
_q = _p->__head.p[0]; \
|
||||
_p->__head.p[0] = _q->__head.p[1]; \
|
||||
_q->__head.p[1] = _p; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __KRMQ_ITR(suf, __scope, __type, __head, __cmp) \
|
||||
struct krmq_itr_##suf { \
|
||||
const __type *stack[KRMQ_MAX_DEPTH], **top; \
|
||||
}; \
|
||||
__scope void krmq_itr_first_##suf(const __type *root, struct krmq_itr_##suf *itr) { \
|
||||
const __type *p; \
|
||||
for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \
|
||||
*++itr->top = p; \
|
||||
} \
|
||||
__scope int krmq_itr_find_##suf(const __type *root, const __type *x, struct krmq_itr_##suf *itr) { \
|
||||
const __type *p = root; \
|
||||
itr->top = itr->stack - 1; \
|
||||
while (p != 0) { \
|
||||
int cmp; \
|
||||
*++itr->top = p; \
|
||||
cmp = __cmp(x, p); \
|
||||
if (cmp < 0) p = p->__head.p[0]; \
|
||||
else if (cmp > 0) p = p->__head.p[1]; \
|
||||
else break; \
|
||||
} \
|
||||
return p? 1 : 0; \
|
||||
} \
|
||||
__scope int krmq_itr_next_bidir_##suf(struct krmq_itr_##suf *itr, int dir) { \
|
||||
const __type *p; \
|
||||
if (itr->top < itr->stack) return 0; \
|
||||
dir = !!dir; \
|
||||
p = (*itr->top)->__head.p[dir]; \
|
||||
if (p) { /* go down */ \
|
||||
for (; p; p = p->__head.p[!dir]) \
|
||||
*++itr->top = p; \
|
||||
return 1; \
|
||||
} else { /* go up */ \
|
||||
do { \
|
||||
p = *itr->top--; \
|
||||
} while (itr->top >= itr->stack && p == (*itr->top)->__head.p[dir]); \
|
||||
return itr->top < itr->stack? 0 : 1; \
|
||||
} \
|
||||
} \
|
||||
|
||||
/**
|
||||
* Insert a node to the tree
|
||||
*
|
||||
* @param suf name suffix used in KRMQ_INIT()
|
||||
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||
* @param x node to insert (in)
|
||||
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||
*
|
||||
* @return _x_ if not present in the tree, or the node equal to x.
|
||||
*/
|
||||
#define krmq_insert(suf, proot, x, cnt) krmq_insert_##suf(proot, x, cnt)
|
||||
|
||||
/**
|
||||
* Find a node in the tree
|
||||
*
|
||||
* @param suf name suffix used in KRMQ_INIT()
|
||||
* @param root root of the tree
|
||||
* @param x node value to find (in)
|
||||
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||
*
|
||||
* @return node equal to _x_ if present, or NULL if absent
|
||||
*/
|
||||
#define krmq_find(suf, root, x, cnt) krmq_find_##suf(root, x, cnt)
|
||||
#define krmq_interval(suf, root, x, lower, upper) krmq_interval_##suf(root, x, lower, upper)
|
||||
#define krmq_rmq(suf, root, lo, up) krmq_rmq_##suf(root, lo, up)
|
||||
|
||||
/**
|
||||
* Delete a node from the tree
|
||||
*
|
||||
* @param suf name suffix used in KRMQ_INIT()
|
||||
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||
* @param x node value to delete; if NULL, delete the first node (in)
|
||||
*
|
||||
* @return node removed from the tree if present, or NULL if absent
|
||||
*/
|
||||
#define krmq_erase(suf, proot, x, cnt) krmq_erase_##suf(proot, x, cnt)
|
||||
#define krmq_erase_first(suf, proot) krmq_erase_##suf(proot, 0, 0)
|
||||
|
||||
#define krmq_itr_t(suf) struct krmq_itr_##suf
|
||||
|
||||
/**
|
||||
* Place the iterator at the smallest object
|
||||
*
|
||||
* @param suf name suffix used in KRMQ_INIT()
|
||||
* @param root root of the tree
|
||||
* @param itr iterator
|
||||
*/
|
||||
#define krmq_itr_first(suf, root, itr) krmq_itr_first_##suf(root, itr)
|
||||
|
||||
/**
|
||||
* Place the iterator at the object equal to or greater than the query
|
||||
*
|
||||
* @param suf name suffix used in KRMQ_INIT()
|
||||
* @param root root of the tree
|
||||
* @param x query (in)
|
||||
* @param itr iterator (out)
|
||||
*
|
||||
* @return 1 if find; 0 otherwise. krmq_at(itr) is NULL if and only if query is
|
||||
* larger than all objects in the tree
|
||||
*/
|
||||
#define krmq_itr_find(suf, root, x, itr) krmq_itr_find_##suf(root, x, itr)
|
||||
|
||||
/**
|
||||
* Move to the next object in order
|
||||
*
|
||||
* @param itr iterator (modified)
|
||||
*
|
||||
* @return 1 if there is a next object; 0 otherwise
|
||||
*/
|
||||
#define krmq_itr_next(suf, itr) krmq_itr_next_bidir_##suf(itr, 1)
|
||||
#define krmq_itr_prev(suf, itr) krmq_itr_next_bidir_##suf(itr, 0)
|
||||
|
||||
/**
|
||||
* Return the pointer at the iterator
|
||||
*
|
||||
* @param itr iterator
|
||||
*
|
||||
* @return pointer if present; NULL otherwise
|
||||
*/
|
||||
#define krmq_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top)
|
||||
|
||||
#define KRMQ_INIT2(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__KRMQ_FIND(suf, __scope, __type, __head, __cmp) \
|
||||
__KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__KRMQ_ROTATE(suf, __type, __head, __lt2) \
|
||||
__KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||
__KRMQ_ITR(suf, __scope, __type, __head, __cmp)
|
||||
|
||||
#define KRMQ_INIT(suf, __type, __head, __cmp, __lt2) \
|
||||
KRMQ_INIT2(suf,, __type, __head, __cmp, __lt2)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,347 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "mmpriv.h"
|
||||
#include "kalloc.h"
|
||||
#include "krmq.h"
|
||||
|
||||
static inline float mg_log2(float x) // NB: this doesn't work when x<2
|
||||
{
|
||||
union { float f; uint32_t i; } z = { x };
|
||||
float log_2 = ((z.i >> 23) & 255) - 128;
|
||||
z.i &= ~(255 << 23);
|
||||
z.i += 127 << 23;
|
||||
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
|
||||
return log_2;
|
||||
}
|
||||
|
||||
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
|
||||
{
|
||||
mm128_t *z;
|
||||
uint64_t *u;
|
||||
int64_t i, k, n_z, n_v;
|
||||
int32_t n_u;
|
||||
|
||||
*n_u_ = *n_v_ = 0;
|
||||
for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
|
||||
if (f[i] >= min_sc) ++n_z;
|
||||
if (n_z == 0) return 0;
|
||||
KMALLOC(km, z, n_z);
|
||||
for (i = 0, k = 0; i < n; ++i) // populate z[]
|
||||
if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
|
||||
radix_sort_128x(z, z + n_z);
|
||||
|
||||
memset(t, 0, n * 4);
|
||||
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
|
||||
int64_t n_v0 = n_v;
|
||||
int32_t sc;
|
||||
for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
|
||||
++n_v, t[i] = 1;
|
||||
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
||||
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
||||
++n_u;
|
||||
else n_v = n_v0;
|
||||
}
|
||||
KMALLOC(km, u, n_u);
|
||||
memset(t, 0, n * 4);
|
||||
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
|
||||
int64_t n_v0 = n_v;
|
||||
int32_t sc;
|
||||
for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
|
||||
v[n_v++] = i, t[i] = 1;
|
||||
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
||||
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
||||
u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
|
||||
else n_v = n_v0;
|
||||
}
|
||||
kfree(km, z);
|
||||
assert(n_v < INT32_MAX);
|
||||
*n_u_ = n_u, *n_v_ = n_v;
|
||||
return u;
|
||||
}
|
||||
|
||||
static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32_t *v, mm128_t *a)
|
||||
{
|
||||
mm128_t *b, *w;
|
||||
uint64_t *u2;
|
||||
int64_t i, j, k;
|
||||
|
||||
// write the result to b[]
|
||||
KMALLOC(km, b, n_v);
|
||||
for (i = 0, k = 0; i < n_u; ++i) {
|
||||
int32_t k0 = k, ni = (int32_t)u[i];
|
||||
for (j = 0; j < ni; ++j)
|
||||
b[k++] = a[v[k0 + (ni - j - 1)]];
|
||||
}
|
||||
kfree(km, v);
|
||||
|
||||
// sort u[] and a[] by the target position, such that adjacent chains may be joined
|
||||
KMALLOC(km, w, n_u);
|
||||
for (i = k = 0; i < n_u; ++i) {
|
||||
w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
|
||||
k += (int32_t)u[i];
|
||||
}
|
||||
radix_sort_128x(w, w + n_u);
|
||||
KMALLOC(km, u2, n_u);
|
||||
for (i = k = 0; i < n_u; ++i) {
|
||||
int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
|
||||
u2[i] = u[j];
|
||||
memcpy(&a[k], &b[w[i].y>>32], n * sizeof(mm128_t));
|
||||
k += n;
|
||||
}
|
||||
memcpy(u, u2, n_u * 8);
|
||||
memcpy(b, a, k * sizeof(mm128_t)); // write _a_ to _b_ and deallocate _a_ because _a_ is oversized, sometimes a lot
|
||||
kfree(km, a); kfree(km, w); kfree(km, u2);
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t max_dist_x, int32_t max_dist_y, int32_t bw, float chn_pen_gap, float chn_pen_skip, int is_cdna, int n_seg)
|
||||
{
|
||||
int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
|
||||
int32_t sidi = (ai->y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT;
|
||||
int32_t sidj = (aj->y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT;
|
||||
if (dq <= 0 || dq > max_dist_x) return INT32_MIN;
|
||||
dr = (int32_t)(ai->x - aj->x);
|
||||
if (sidi == sidj && (dr == 0 || dq > max_dist_y)) return INT32_MIN;
|
||||
dd = dr > dq? dr - dq : dq - dr;
|
||||
if (sidi == sidj && dd > bw) return INT32_MIN;
|
||||
if (n_seg > 1 && !is_cdna && sidi == sidj && dr > max_dist_y) return INT32_MIN;
|
||||
dg = dr < dq? dr : dq;
|
||||
q_span = aj->y>>32&0xff;
|
||||
sc = q_span < dg? q_span : dg;
|
||||
if (dd || dg > q_span) {
|
||||
float lin_pen, log_pen;
|
||||
lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
|
||||
log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
|
||||
if (is_cdna) {
|
||||
if (dr > dq) sc -= (int)(lin_pen < log_pen? lin_pen : log_pen); // deletion or jump between paired ends
|
||||
else sc -= (int)(lin_pen + .5f * log_pen);
|
||||
} else sc -= (int)(lin_pen + .5f * log_pen);
|
||||
}
|
||||
return sc;
|
||||
}
|
||||
|
||||
/* Input:
|
||||
* a[].x: tid<<33 | rev<<32 | tpos
|
||||
* a[].y: flags<<40 | q_span<<32 | q_pos
|
||||
* Output:
|
||||
* n_u: #chains
|
||||
* u[]: score<<32 | #anchors (sum of lower 32 bits of u[] is the returned length of a[])
|
||||
* input a[] is deallocated on return
|
||||
*/
|
||||
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
||||
int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
||||
{ // TODO: make sure this works when n has more than 32 bits
|
||||
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0;
|
||||
int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
|
||||
uint64_t *u;
|
||||
|
||||
if (_u) *_u = 0, *n_u_ = 0;
|
||||
if (n == 0 || a == 0) return 0;
|
||||
if (max_dist_x < bw) max_dist_x = bw;
|
||||
if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
|
||||
KMALLOC(km, p, n);
|
||||
KMALLOC(km, f, n);
|
||||
KMALLOC(km, v, n);
|
||||
KCALLOC(km, t, n);
|
||||
|
||||
// fill the score and backtrack arrays
|
||||
for (i = 0, max_ii = -1; i < n; ++i) {
|
||||
int64_t max_j = -1, end_j;
|
||||
int32_t max_f = a[i].y>>32&0xff, n_skip = 0;
|
||||
while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist_x)) ++st;
|
||||
if (i - st > max_iter) st = i - max_iter;
|
||||
for (j = i - 1; j >= st; --j) {
|
||||
int32_t sc;
|
||||
sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
|
||||
++n_iter;
|
||||
if (sc == INT32_MIN) continue;
|
||||
sc += f[j];
|
||||
if (sc > max_f) {
|
||||
max_f = sc, max_j = j;
|
||||
if (n_skip > 0) --n_skip;
|
||||
} else if (t[j] == (int32_t)i) {
|
||||
if (++n_skip > max_skip)
|
||||
break;
|
||||
}
|
||||
if (p[j] >= 0) t[p[j]] = i;
|
||||
}
|
||||
end_j = j;
|
||||
if (max_ii < 0 || a[i].x - a[max_ii].x > (int64_t)max_dist_x) {
|
||||
int32_t max = INT32_MIN;
|
||||
max_ii = -1;
|
||||
for (j = i - 1; j >= st; --j)
|
||||
if (max < f[j]) max = f[j], max_ii = j;
|
||||
}
|
||||
if (max_ii >= 0 && max_ii < end_j) {
|
||||
int32_t tmp;
|
||||
tmp = comput_sc(&a[i], &a[max_ii], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
|
||||
if (tmp != INT32_MIN && max_f < tmp + f[max_ii])
|
||||
max_f = tmp + f[max_ii], max_j = max_ii;
|
||||
}
|
||||
f[i] = max_f, p[i] = max_j;
|
||||
v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
|
||||
if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i]))
|
||||
max_ii = i;
|
||||
if (mmax_f < max_f) mmax_f = max_f;
|
||||
}
|
||||
|
||||
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
|
||||
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
||||
kfree(km, p); kfree(km, f); kfree(km, t);
|
||||
if (n_u == 0) {
|
||||
kfree(km, a); kfree(km, v);
|
||||
return 0;
|
||||
}
|
||||
return compact_a(km, n_u, u, n_v, v, a);
|
||||
}
|
||||
|
||||
typedef struct lc_elem_s {
|
||||
int32_t y;
|
||||
int64_t i;
|
||||
double pri;
|
||||
KRMQ_HEAD(struct lc_elem_s) head;
|
||||
} lc_elem_t;
|
||||
|
||||
#define lc_elem_cmp(a, b) ((a)->y < (b)->y? -1 : (a)->y > (b)->y? 1 : ((a)->i > (b)->i) - ((a)->i < (b)->i))
|
||||
#define lc_elem_lt2(a, b) ((a)->pri < (b)->pri)
|
||||
KRMQ_INIT(lc_elem, lc_elem_t, head, lc_elem_cmp, lc_elem_lt2)
|
||||
|
||||
KALLOC_POOL_INIT(rmq, lc_elem_t)
|
||||
|
||||
static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, float chn_pen_gap, float chn_pen_skip, int32_t *exact, int32_t *width)
|
||||
{
|
||||
int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
|
||||
dr = (int32_t)(ai->x - aj->x);
|
||||
*width = dd = dr > dq? dr - dq : dq - dr;
|
||||
dg = dr < dq? dr : dq;
|
||||
q_span = aj->y>>32&0xff;
|
||||
sc = q_span < dg? q_span : dg;
|
||||
if (exact) *exact = (dd == 0 && dg <= q_span);
|
||||
if (dd || dq > q_span) {
|
||||
float lin_pen, log_pen;
|
||||
lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
|
||||
log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
|
||||
sc -= (int)(lin_pen + .5f * log_pen);
|
||||
}
|
||||
return sc;
|
||||
}
|
||||
|
||||
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
||||
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
||||
{
|
||||
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0;
|
||||
int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
|
||||
uint64_t *u;
|
||||
lc_elem_t *root = 0, *root_inner = 0;
|
||||
void *mem_mp = 0;
|
||||
kmp_rmq_t *mp;
|
||||
|
||||
if (_u) *_u = 0, *n_u_ = 0;
|
||||
if (n == 0 || a == 0) return 0;
|
||||
if (max_dist < bw) max_dist = bw;
|
||||
if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
|
||||
KMALLOC(km, p, n);
|
||||
KMALLOC(km, f, n);
|
||||
KCALLOC(km, t, n);
|
||||
KMALLOC(km, v, n);
|
||||
mem_mp = km_init2(km, 0x10000);
|
||||
mp = kmp_init_rmq(mem_mp);
|
||||
|
||||
// fill the score and backtrack arrays
|
||||
for (i = i0 = 0; i < n; ++i) {
|
||||
int64_t max_j = -1;
|
||||
int32_t q_span = a[i].y>>32&0xff, max_f = q_span;
|
||||
lc_elem_t s, *q, *r, lo, hi;
|
||||
// add in-range anchors
|
||||
if (i0 < i && a[i0].x != a[i].x) {
|
||||
int64_t j;
|
||||
for (j = i0; j < i; ++j) {
|
||||
q = kmp_alloc_rmq(mp);
|
||||
q->y = (int32_t)a[j].y, q->i = j, q->pri = -(f[j] + 0.5 * chn_pen_gap * ((int32_t)a[j].x + (int32_t)a[j].y));
|
||||
krmq_insert(lc_elem, &root, q, 0);
|
||||
if (max_dist_inner > 0) {
|
||||
r = kmp_alloc_rmq(mp);
|
||||
*r = *q;
|
||||
krmq_insert(lc_elem, &root_inner, r, 0);
|
||||
}
|
||||
}
|
||||
i0 = i;
|
||||
}
|
||||
// get rid of active chains out of range
|
||||
while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist || krmq_size(head, root) > cap_rmq_size)) {
|
||||
s.y = (int32_t)a[st].y, s.i = st;
|
||||
if ((q = krmq_find(lc_elem, root, &s, 0)) != 0) {
|
||||
q = krmq_erase(lc_elem, &root, q, 0);
|
||||
kmp_free_rmq(mp, q);
|
||||
}
|
||||
++st;
|
||||
}
|
||||
if (max_dist_inner > 0) { // similar to the block above, but applied to the inner tree
|
||||
while (st_inner < i && (a[i].x>>32 != a[st_inner].x>>32 || a[i].x > a[st_inner].x + max_dist_inner || krmq_size(head, root_inner) > cap_rmq_size)) {
|
||||
s.y = (int32_t)a[st_inner].y, s.i = st_inner;
|
||||
if ((q = krmq_find(lc_elem, root_inner, &s, 0)) != 0) {
|
||||
q = krmq_erase(lc_elem, &root_inner, q, 0);
|
||||
kmp_free_rmq(mp, q);
|
||||
}
|
||||
++st_inner;
|
||||
}
|
||||
}
|
||||
// RMQ
|
||||
lo.i = INT32_MAX, lo.y = (int32_t)a[i].y - max_dist;
|
||||
hi.i = 0, hi.y = (int32_t)a[i].y;
|
||||
if ((q = krmq_rmq(lc_elem, root, &lo, &hi)) != 0) {
|
||||
int32_t sc, exact, width, n_skip = 0;
|
||||
int64_t j = q->i;
|
||||
assert(q->y >= lo.y && q->y <= hi.y);
|
||||
sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, &exact, &width);
|
||||
if (width <= bw && sc > max_f) max_f = sc, max_j = j;
|
||||
if (!exact && root_inner && (int32_t)a[i].y > 0) {
|
||||
lc_elem_t *lo, *hi;
|
||||
s.y = (int32_t)a[i].y - 1, s.i = n;
|
||||
krmq_interval(lc_elem, root_inner, &s, &lo, &hi);
|
||||
if (lo) {
|
||||
const lc_elem_t *q;
|
||||
int32_t width, n_rmq_iter = 0;
|
||||
krmq_itr_t(lc_elem) itr;
|
||||
krmq_itr_find(lc_elem, root_inner, lo, &itr);
|
||||
while ((q = krmq_at(&itr)) != 0) {
|
||||
if (q->y < (int32_t)a[i].y - max_dist_inner) break;
|
||||
++n_rmq_iter;
|
||||
j = q->i;
|
||||
sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width);
|
||||
if (width <= bw) {
|
||||
if (sc > max_f) {
|
||||
max_f = sc, max_j = j;
|
||||
if (n_skip > 0) --n_skip;
|
||||
} else if (t[j] == (int32_t)i) {
|
||||
if (++n_skip > max_chn_skip)
|
||||
break;
|
||||
}
|
||||
if (p[j] >= 0) t[p[j]] = i;
|
||||
}
|
||||
if (!krmq_itr_prev(lc_elem, &itr)) break;
|
||||
}
|
||||
n_iter += n_rmq_iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
// set max
|
||||
assert(max_j < 0 || (a[max_j].x < a[i].x && (int32_t)a[max_j].y < (int32_t)a[i].y));
|
||||
f[i] = max_f, p[i] = max_j;
|
||||
v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
|
||||
if (mmax_f < max_f) mmax_f = max_f;
|
||||
if (max_rmq_size < krmq_size(head, root)) max_rmq_size = krmq_size(head, root);
|
||||
}
|
||||
km_destroy(mem_mp);
|
||||
|
||||
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
|
||||
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
||||
kfree(km, p); kfree(km, f); kfree(km, t);
|
||||
if (n_u == 0) {
|
||||
kfree(km, a); kfree(km, v);
|
||||
return 0;
|
||||
}
|
||||
return compact_a(km, n_u, u, n_v, v, a);
|
||||
}
|
||||
8
map.c
8
map.c
|
|
@ -271,7 +271,9 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap;
|
||||
} else max_chain_gap_ref = opt->max_gap;
|
||||
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
// a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
|
||||
opt->chain_gap_scale * 0.2f, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
|
||||
if (opt->max_occ > opt->mid_occ && rep_len > 0) {
|
||||
int rechain = 0;
|
||||
|
|
@ -293,7 +295,9 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
kfree(b->km, mini_pos);
|
||||
if (opt->flag & MM_F_HEAP_SORT) a = collect_seed_hits_heap(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||
else a = collect_seed_hits(b->km, opt, opt->max_occ, mi, qname, &mv, qlen_sum, &n_a, &rep_len, &n_mini_pos, &mini_pos);
|
||||
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
// a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score, opt->chain_gap_scale, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
|
||||
opt->chain_gap_scale * 0.2f, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
}
|
||||
}
|
||||
b->frag_gap = max_chain_gap_ref;
|
||||
|
|
|
|||
8
mmpriv.h
8
mmpriv.h
|
|
@ -72,9 +72,15 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
|
|||
void mm_idxopt_init(mm_idxopt_t *opt);
|
||||
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
||||
int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale, int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
||||
|
||||
mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale,
|
||||
int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
||||
int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
||||
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
|
||||
|
||||
mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a);
|
||||
void mm_mark_alt(const mm_idx_t *mi, int n, mm_reg1_t *r);
|
||||
void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
|
||||
|
|
|
|||
Loading…
Reference in New Issue