Don't use narrow band.

I may retry this feature if the profilter indicates that this greatly helps.
This commit is contained in:
Heng Li 2013-02-07 21:22:54 -05:00
parent d890c7997c
commit d8e4d57956
3 changed files with 7 additions and 26 deletions

View File

@ -376,35 +376,17 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
tmp = s->rbeg - rmax[0];
rs = malloc(tmp);
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, s->len * opt->a, 0, &qle, &tle);
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, s->len * opt->a, &qle, &tle);
a->qb = s->qbeg - qle; a->rb = s->rbeg - tle;
free(qs); free(rs);
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
if (s->qbeg + s->len != l_query) { // right extension of the first seed
int qle, tle, qe, re;
int16_t *qw = 0;
qe = s->qbeg + s->len;
re = s->rbeg + s->len - rmax[0];
#if 0 // FIXME: I am not sure if the following block works. Comment it out if SW extension gives unexpected result.
if (c->n > 1) { // generate $qw
int j, l = rmax[1] - (s->rbeg + s->len);
qw = malloc(l * 2);
for (i = 0; i < l; ++i) qw[i] = -1; // no constraint by default
for (i = 1; i < c->n; ++i) {
const mem_seed_t *t = &c->seeds[i];
for (j = 0; j < t->len; ++j) {
int x = t->rbeg + j - (s->rbeg + s->len), y = t->qbeg + j - (s->qbeg + s->len);
if (x < 0) continue; // overlap with the first seed
if (qw[x] == -1) qw[x] = (x > y? x - y : y - x) + 1; // FIXME: in principle, we should not need +1
else if (qw[x] >= 0) qw[x] = -2; // in a seed overlap, do not set any constraint
}
}
}
#endif
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, qw, &qle, &tle);
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, &qle, &tle);
a->qe = qe + qle; a->re = rmax[0] + re + tle;
free(qw);
} else a->qe = l_query, a->re = s->rbeg + s->len;
if (mem_debug >= 2) printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re);
// check how many seeds have been covered

9
ksw.c
View File

@ -319,7 +319,7 @@ typedef struct {
int32_t h, e;
} eh_t;
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, const int16_t *qw, int *_qle, int *_tle)
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle)
{
eh_t *eh; // score array
int8_t *qp; // query profile
@ -348,15 +348,14 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
max = h0, max_i = max_j = -1;
beg = 0, end = qlen;
for (i = 0; LIKELY(i < tlen); ++i) {
int f = 0, h1, m = 0, mj = -1, t;
int f = 0, h1, m = 0, mj = -1;
int8_t *q = &qp[target[i] * qlen];
// compute the first column
h1 = h0 - (gapo + gape * (i + 1));
if (h1 < 0) h1 = 0;
// apply the band and the constraint (if provided)
t = (qw && qw[i] >= 0 && qw[i] < w)? qw[i] : w; // this is the band width at $i
if (beg < i - t) beg = i - t;
if (end > i + t + 1) end = i + t + 1;
if (beg < i - w) beg = i - w;
if (end > i + w + 1) end = i + w + 1;
if (end > qlen) end = qlen;
for (j = beg; LIKELY(j < end); ++j) {
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)

2
ksw.h
View File

@ -49,7 +49,7 @@ extern "C" {
/** Unified interface for ksw_sse2_8() and ksw_sse2_16() */
int ksw_sse2(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a);
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, const int16_t *qw, int *_qle, int *_tle);
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle);
int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *_n_cigar, uint32_t **_cigar);
#ifdef __cplusplus