From 954cfd766dc8932763fb03e801e0f22418d6578a Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 23 Apr 2014 15:14:52 -0400 Subject: [PATCH] improved ksw_extend2() 1. the first cell in a row is not always right 2. prevent from H->H extension from H=0 cells 3. replaced the band narrowing heuristic with always correct one --- ksw.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ksw.c b/ksw.c index 74123cb..743f449 100644 --- a/ksw.c +++ b/ksw.c @@ -25,6 +25,7 @@ #include #include +#include #include #include "ksw.h" @@ -381,7 +382,7 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, eh_t *eh; // score array int8_t *qp; // query profile int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; - if (h0 < 0) h0 = 0; + assert(h0 > 0); // allocate memory qp = malloc(qlen * m); eh = calloc(qlen + 1, 8); @@ -411,13 +412,15 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, for (i = 0; LIKELY(i < tlen); ++i) { int t, f = 0, h1, m = 0, mj = -1; int8_t *q = &qp[target[i] * qlen]; - // compute the first column - h1 = h0 - (o_del + e_del * (i + 1)); - if (h1 < 0) h1 = 0; // apply the band and the constraint (if provided) if (beg < i - w) beg = i - w; if (end > i + w + 1) end = i + w + 1; if (end > qlen) end = qlen; + // compute the first column + if (beg == 0) { + h1 = h0 - (o_del + e_del * (i + 1)); + if (h1 < 0) h1 = 0; + } else h1 = 0; for (j = beg; LIKELY(j < end); ++j) { // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Similar to SSE2-SW, cells are computed in the following order: @@ -427,8 +430,8 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, eh_t *p = &eh[j]; int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) p->h = h1; // set H(i,j-1) for the next row - M += q[j]; // separating H and M to disallow a cigar like "100M3I3D20M" - h = M > e? M : e; + M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M" + h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0 h = h > f? h : f; h1 = h; // save H(i,j) to h1 for the next column mj = m > h? mj : j; // record the position where max score is achieved @@ -460,10 +463,10 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, } } // update beg and end for the next round - for (j = mj; j >= beg && eh[j].h; --j); - beg = j + 1; - for (j = mj + 2; j <= end && eh[j].h; ++j); - end = j; + for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); + beg = j; + for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j); + end = j + 2 < qlen? j + 2 : qlen; //beg = 0; end = qlen; // uncomment this line for debugging } free(eh); free(qp);