From 1bc9712cd827244159c4bde528fb05bebef5abf9 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 5 Feb 2013 16:28:15 -0500 Subject: [PATCH] explicitly use bit to keep bt matrix This also simplifies backtracking. --- ksw.c | 51 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/ksw.c b/ksw.c index 7d97b2c..654ef3e 100644 --- a/ksw.c +++ b/ksw.c @@ -403,10 +403,6 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, #define MINUS_INF -0x40000000 -typedef struct { - uint8_t h:2, e:1, f:1; -} btmat_t; - static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len) { if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) { @@ -419,17 +415,15 @@ static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, return cigar; } -#define cal_j_(i_, k_, w_) ((k_) - ((i_) > (w_)? (i_) - (w_) : 0)) - int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_) { eh_t *eh; int8_t *qp; int i, j, k, gapoe = gapo + gape, score, n_col; - btmat_t *z; + uint8_t *z; // allocate memory - n_col = qlen < w? qlen : w; - z = malloc(n_col * tlen * sizeof(btmat_t)); + n_col = qlen < 2*w+1? qlen : 2*w+1; + z = malloc(n_col * tlen); qp = malloc(qlen * m); eh = calloc(qlen + 1, 8); // generate the query profile @@ -446,58 +440,51 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, for (i = 0; LIKELY(i < tlen); ++i) { int32_t f = MINUS_INF, h1, beg, end; int8_t *q = &qp[target[i] * qlen]; - btmat_t *zi = &z[i * n_col]; + uint8_t *zi = &z[i * n_col]; beg = i > w? i - w : 0; end = i + w + 1 < qlen? i + w + 1 : qlen; h1 = beg == 0? -(gapo + gape * (i + 1)) : MINUS_INF; printf("%d", i); for (j = beg; LIKELY(j < end); ++j) { eh_t *p = &eh[j]; - btmat_t *zij = &zi[j - beg]; int32_t h = p->h, e = p->e; + uint8_t d; // direction p->h = h1; h += q[j]; - zij->h = h > e? 0 : 1; + d = h > e? 0 : 1; h = h > e? h : e; - zij->h = h > f? zij->h : 2; + d = h > f? d : 2; h = h > f? h : f; - printf("\t%d:%d:%d", h<-99?-99:h, e<-99?-99:e, f<-99?-99:f); + printf("\t[%d],%d:%d:%d", j, h<-99?-99:h, e<-99?-99:e, f<-99?-99:f); h1 = h; h -= gapoe; e -= gape; - zij->e = (e > h); // NB: zij->e keeps the direction for the NEXT row, not the current one + d |= e > h? 1<<2 : 0; e = e > h? e : h; p->e = e; f -= gape; - zij->f = (f > h); + d |= f > h? 2<<4 : 0; f = f > h? f : h; - printf(",%d:%d:%d", zij->h, zij->e, zij->f); + zi[j - beg] = d; + printf(",%d:%d:%d", d>>0&3, d>>2&3, d>>4&3); } putchar('\n'); eh[end].h = h1; eh[end].e = MINUS_INF; } score = eh[qlen].h; if (n_cigar_ && cigar_) { // backtrack - int n_cigar = 0, m_cigar = 0, which; + int n_cigar = 0, m_cigar = 0, which = 0; uint32_t *cigar = 0, tmp; i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell - which = z[i * n_col + cal_j_(i, k, w)].h; while (i >= 0 && k >= 0) { + which = z[i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3; printf("(%d,%d)\t%d\n", i, k, which); - if (which == 0) { - cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1); --i, --k; - if (i >= 0 && k >= 0) which = z[i * n_col + cal_j_(i, k, w)].h; - } else if (which == 1) { - cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1); --i; - if (i >= 0) which = z[i * n_col + cal_j_(i, k, w)].e? 1 : 0; - } else { // which == 2 - cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1); --k; - if (k >= 0) which = z[i * n_col + cal_j_(i, k, w)].f? 2 : 0; - } + if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k; + else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i; + else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k; } - printf("(%d,%d)\t%d\n", i, k, which); - if (i > 0) push_cigar(&n_cigar, &m_cigar, cigar, 2, i); - if (k > 0) push_cigar(&n_cigar, &m_cigar, cigar, 1, k); + if (i >= 0) push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1); + if (k >= 0) push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1); for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp; *n_cigar_ = n_cigar, *cigar_ = cigar;