From 1e16f3e701b670508b890407cbf9ce2995b4c091 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 5 Feb 2013 17:13:12 -0500 Subject: [PATCH] calling ksw_global(); ksw_extend() is buggy! --- bwamem.c | 13 +++++++++++-- fastmap.c | 1 + ksw.c | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bwamem.c b/bwamem.c index fd7caa2..fd164be 100644 --- a/bwamem.c +++ b/bwamem.c @@ -278,7 +278,7 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen) void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain1_t *c, mem_aln_t *a) { // FIXME: in general, we SHOULD check funny seed patterns such as contained seeds. When that happens, we should use a SW or extend more seeds - int i, j, qbeg; + int i, j, qbeg, w, nw_score; int64_t rlen, rbeg, rmax[2], tmp; const mem_seed_t *s; uint8_t *rseq = 0; @@ -342,7 +342,16 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int if (s->qbeg + s->len > a->qe) a->is_all = 0; } - printf("[%d] score=%d\t[%d,%d) <=> [%lld,%lld)\tis_all=%d\n", c->n, a->score, a->qb, a->qe, a->rb, a->re, a->is_all); + w = (int)((double)(l_query * opt->a - opt->q) / opt->r + 1.); + w = w < opt->w? w : opt->w; + w += abs((a->re - a->rb) - (a->qe - a->qb)); + nw_score = ksw_global(a->qe - a->qb, query + a->qb, a->re - a->rb, rseq + (a->rb - rmax[0]), 5, opt->mat, opt->q, opt->r, w, &a->n_cigar, &a->cigar); + + printf("[%d] ", c->n); for (i = a->qb; i < a->qe; ++i) putchar("ACGTN"[(int)query[i]]); putchar('\n'); + printf("[%d] ", c->n); for (i = a->rb; i < a->re; ++i) putchar("ACGTN"[(int)rseq[i - rmax[0]]]); putchar('\n'); + printf("[%d] score=%d,%d\t[%d,%d) <=> [%lld,%lld)\tis_all=%d\t", c->n, a->score, nw_score, a->qb, a->qe, a->rb, a->re, a->is_all); + for (i = 0; i < a->n_cigar; ++i) printf("%d%c", a->cigar[i]>>4, "MIDS"[a->cigar[i]&0xf]); + putchar('\n'); free(rseq); } diff --git a/fastmap.c b/fastmap.c index f02224a..811149f 100644 --- a/fastmap.c +++ b/fastmap.c @@ -66,6 +66,7 @@ int main_mem(int argc, char *argv[]) printf("\t%d,%d,%s:%c%ld", p->seeds[j].len, p->seeds[j].qbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); } putchar('\n'); + free(a.cigar); } puts("//"); for (i = 0; i < chain.n; ++i) free(chain.chains[i].seeds); diff --git a/ksw.c b/ksw.c index 66728d5..6708e40 100644 --- a/ksw.c +++ b/ksw.c @@ -421,6 +421,7 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t *qp; // query profile int i, j, k, gapoe = gapo + gape, score, n_col; uint8_t *z; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be a little more complex + if (n_cigar_) *n_cigar_ = 0; // allocate memory n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix z = malloc(n_col * tlen);