From be11e27e126fd680835eb5287c5d66c9f0d4a32d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 19 Apr 2013 12:00:37 -0400 Subject: [PATCH] r378: bugfix - wrong CIGAR This is actually caused by a bug in SSE2-SW, where the query begin may be smaller than the true one if there is an exact tandem repeat. --- bwape.c | 6 +++--- ksw.c | 4 +++- main.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bwape.c b/bwape.c index 9fd12b1..92161a5 100644 --- a/bwape.c +++ b/bwape.c @@ -404,7 +404,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u bwa_cigar_t *cigar = 0; ubyte_t *ref_seq; bwtint_t k, x, y, l; - int xtra; + int xtra, gscore; int8_t mat[25]; bwa_fill_scmat(1, 3, mat); @@ -422,12 +422,12 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u // do alignment xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0); r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0); - ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32); + gscore = ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32); cigar = (bwa_cigar_t*)cigar32; for (k = 0; k < *n_cigar; ++k) cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4)); - if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits + if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score || gscore != r.score) { // poor hit or tandem hits or weird alignment free(cigar); free(ref_seq); *n_cigar = 0; return 0; } diff --git a/ksw.c b/ksw.c index 2daf809..a786a2b 100644 --- a/ksw.c +++ b/ksw.c @@ -201,10 +201,11 @@ end_loop16: r.score = gmax + q->shift < 255? gmax : 255; r.te = te; if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score - int max = -1, low, high, qlen = slen * 16; + int max = -1, tmp, low, high, qlen = slen * 16; uint8_t *t = (uint8_t*)Hmax; for (i = 0; i < qlen; ++i, ++t) if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen; + else ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp; //printf("%d,%d\n", max, gmax); if (b) { i = (r.score + q->max - 1) / q->max; @@ -306,6 +307,7 @@ end_loop8: uint16_t *t = (uint16_t*)Hmax; for (i = 0, r.qe = -1; i < qlen; ++i, ++t) if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen; + else ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp; if (b) { i = (r.score + q->max - 1) / q->max; low = te - i; high = te + i; diff --git a/main.c b/main.c index ea92d81..75e4e6b 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.3-r377-beta" +#define PACKAGE_VERSION "0.7.3-r378-beta" #endif int bwa_fa2pac(int argc, char *argv[]);