explicitly use bit to keep bt matrix
This also simplifies backtracking.
This commit is contained in:
parent
7e1466c885
commit
1bc9712cd8
51
ksw.c
51
ksw.c
|
|
@ -403,10 +403,6 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
||||||
|
|
||||||
#define MINUS_INF -0x40000000
|
#define MINUS_INF -0x40000000
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint8_t h:2, e:1, f:1;
|
|
||||||
} btmat_t;
|
|
||||||
|
|
||||||
static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len)
|
static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len)
|
||||||
{
|
{
|
||||||
if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
|
if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
|
||||||
|
|
@ -419,17 +415,15 @@ static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar,
|
||||||
return cigar;
|
return cigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define cal_j_(i_, k_, w_) ((k_) - ((i_) > (w_)? (i_) - (w_) : 0))
|
|
||||||
|
|
||||||
int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_)
|
int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_)
|
||||||
{
|
{
|
||||||
eh_t *eh;
|
eh_t *eh;
|
||||||
int8_t *qp;
|
int8_t *qp;
|
||||||
int i, j, k, gapoe = gapo + gape, score, n_col;
|
int i, j, k, gapoe = gapo + gape, score, n_col;
|
||||||
btmat_t *z;
|
uint8_t *z;
|
||||||
// allocate memory
|
// allocate memory
|
||||||
n_col = qlen < w? qlen : w;
|
n_col = qlen < 2*w+1? qlen : 2*w+1;
|
||||||
z = malloc(n_col * tlen * sizeof(btmat_t));
|
z = malloc(n_col * tlen);
|
||||||
qp = malloc(qlen * m);
|
qp = malloc(qlen * m);
|
||||||
eh = calloc(qlen + 1, 8);
|
eh = calloc(qlen + 1, 8);
|
||||||
// generate the query profile
|
// generate the query profile
|
||||||
|
|
@ -446,58 +440,51 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
||||||
for (i = 0; LIKELY(i < tlen); ++i) {
|
for (i = 0; LIKELY(i < tlen); ++i) {
|
||||||
int32_t f = MINUS_INF, h1, beg, end;
|
int32_t f = MINUS_INF, h1, beg, end;
|
||||||
int8_t *q = &qp[target[i] * qlen];
|
int8_t *q = &qp[target[i] * qlen];
|
||||||
btmat_t *zi = &z[i * n_col];
|
uint8_t *zi = &z[i * n_col];
|
||||||
beg = i > w? i - w : 0;
|
beg = i > w? i - w : 0;
|
||||||
end = i + w + 1 < qlen? i + w + 1 : qlen;
|
end = i + w + 1 < qlen? i + w + 1 : qlen;
|
||||||
h1 = beg == 0? -(gapo + gape * (i + 1)) : MINUS_INF;
|
h1 = beg == 0? -(gapo + gape * (i + 1)) : MINUS_INF;
|
||||||
printf("%d", i);
|
printf("%d", i);
|
||||||
for (j = beg; LIKELY(j < end); ++j) {
|
for (j = beg; LIKELY(j < end); ++j) {
|
||||||
eh_t *p = &eh[j];
|
eh_t *p = &eh[j];
|
||||||
btmat_t *zij = &zi[j - beg];
|
|
||||||
int32_t h = p->h, e = p->e;
|
int32_t h = p->h, e = p->e;
|
||||||
|
uint8_t d; // direction
|
||||||
p->h = h1;
|
p->h = h1;
|
||||||
h += q[j];
|
h += q[j];
|
||||||
zij->h = h > e? 0 : 1;
|
d = h > e? 0 : 1;
|
||||||
h = h > e? h : e;
|
h = h > e? h : e;
|
||||||
zij->h = h > f? zij->h : 2;
|
d = h > f? d : 2;
|
||||||
h = h > f? h : f;
|
h = h > f? h : f;
|
||||||
printf("\t%d:%d:%d", h<-99?-99:h, e<-99?-99:e, f<-99?-99:f);
|
printf("\t[%d],%d:%d:%d", j, h<-99?-99:h, e<-99?-99:e, f<-99?-99:f);
|
||||||
h1 = h;
|
h1 = h;
|
||||||
h -= gapoe;
|
h -= gapoe;
|
||||||
e -= gape;
|
e -= gape;
|
||||||
zij->e = (e > h); // NB: zij->e keeps the direction for the NEXT row, not the current one
|
d |= e > h? 1<<2 : 0;
|
||||||
e = e > h? e : h;
|
e = e > h? e : h;
|
||||||
p->e = e;
|
p->e = e;
|
||||||
f -= gape;
|
f -= gape;
|
||||||
zij->f = (f > h);
|
d |= f > h? 2<<4 : 0;
|
||||||
f = f > h? f : h;
|
f = f > h? f : h;
|
||||||
printf(",%d:%d:%d", zij->h, zij->e, zij->f);
|
zi[j - beg] = d;
|
||||||
|
printf(",%d:%d:%d", d>>0&3, d>>2&3, d>>4&3);
|
||||||
}
|
}
|
||||||
putchar('\n');
|
putchar('\n');
|
||||||
eh[end].h = h1; eh[end].e = MINUS_INF;
|
eh[end].h = h1; eh[end].e = MINUS_INF;
|
||||||
}
|
}
|
||||||
score = eh[qlen].h;
|
score = eh[qlen].h;
|
||||||
if (n_cigar_ && cigar_) { // backtrack
|
if (n_cigar_ && cigar_) { // backtrack
|
||||||
int n_cigar = 0, m_cigar = 0, which;
|
int n_cigar = 0, m_cigar = 0, which = 0;
|
||||||
uint32_t *cigar = 0, tmp;
|
uint32_t *cigar = 0, tmp;
|
||||||
i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
|
i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
|
||||||
which = z[i * n_col + cal_j_(i, k, w)].h;
|
|
||||||
while (i >= 0 && k >= 0) {
|
while (i >= 0 && k >= 0) {
|
||||||
|
which = z[i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
|
||||||
printf("(%d,%d)\t%d\n", i, k, which);
|
printf("(%d,%d)\t%d\n", i, k, which);
|
||||||
if (which == 0) {
|
if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
|
||||||
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1); --i, --k;
|
else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
|
||||||
if (i >= 0 && k >= 0) which = z[i * n_col + cal_j_(i, k, w)].h;
|
else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
|
||||||
} else if (which == 1) {
|
|
||||||
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1); --i;
|
|
||||||
if (i >= 0) which = z[i * n_col + cal_j_(i, k, w)].e? 1 : 0;
|
|
||||||
} else { // which == 2
|
|
||||||
cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1); --k;
|
|
||||||
if (k >= 0) which = z[i * n_col + cal_j_(i, k, w)].f? 2 : 0;
|
|
||||||
}
|
}
|
||||||
}
|
if (i >= 0) push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1);
|
||||||
printf("(%d,%d)\t%d\n", i, k, which);
|
if (k >= 0) push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
|
||||||
if (i > 0) push_cigar(&n_cigar, &m_cigar, cigar, 2, i);
|
|
||||||
if (k > 0) push_cigar(&n_cigar, &m_cigar, cigar, 1, k);
|
|
||||||
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
|
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
|
||||||
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
|
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
|
||||||
*n_cigar_ = n_cigar, *cigar_ = cigar;
|
*n_cigar_ = n_cigar, *cigar_ = cigar;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue