improved ksw_extend2()

1. the first cell in a row is not always right
2. prevent from H->H extension from H=0 cells
3. replaced the band narrowing heuristic with always correct one
This commit is contained in:
Heng Li 2014-04-23 15:14:52 -04:00
parent b93fca2b2e
commit 954cfd766d
1 changed files with 13 additions and 10 deletions

23
ksw.c
View File

@ -25,6 +25,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <emmintrin.h>
#include "ksw.h"
@ -381,7 +382,7 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
eh_t *eh; // score array
int8_t *qp; // query profile
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
if (h0 < 0) h0 = 0;
assert(h0 > 0);
// allocate memory
qp = malloc(qlen * m);
eh = calloc(qlen + 1, 8);
@ -411,13 +412,15 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
for (i = 0; LIKELY(i < tlen); ++i) {
int t, f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[target[i] * qlen];
// compute the first column
h1 = h0 - (o_del + e_del * (i + 1));
if (h1 < 0) h1 = 0;
// apply the band and the constraint (if provided)
if (beg < i - w) beg = i - w;
if (end > i + w + 1) end = i + w + 1;
if (end > qlen) end = qlen;
// compute the first column
if (beg == 0) {
h1 = h0 - (o_del + e_del * (i + 1));
if (h1 < 0) h1 = 0;
} else h1 = 0;
for (j = beg; LIKELY(j < end); ++j) {
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order:
@ -427,8 +430,8 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
eh_t *p = &eh[j];
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
p->h = h1; // set H(i,j-1) for the next row
M += q[j]; // separating H and M to disallow a cigar like "100M3I3D20M"
h = M > e? M : e;
M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M"
h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
h = h > f? h : f;
h1 = h; // save H(i,j) to h1 for the next column
mj = m > h? mj : j; // record the position where max score is achieved
@ -460,10 +463,10 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
}
}
// update beg and end for the next round
for (j = mj; j >= beg && eh[j].h; --j);
beg = j + 1;
for (j = mj + 2; j <= end && eh[j].h; ++j);
end = j;
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j);
beg = j;
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
end = j + 2 < qlen? j + 2 : qlen;
//beg = 0; end = qlen; // uncomment this line for debugging
}
free(eh); free(qp);