r378: bugfix - wrong CIGAR
This is actually caused by a bug in SSE2-SW, where the query begin may be smaller than the true one if there is an exact tandem repeat.
This commit is contained in:
parent
2087dc162f
commit
be11e27e12
6
bwape.c
6
bwape.c
|
|
@ -404,7 +404,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
||||||
bwa_cigar_t *cigar = 0;
|
bwa_cigar_t *cigar = 0;
|
||||||
ubyte_t *ref_seq;
|
ubyte_t *ref_seq;
|
||||||
bwtint_t k, x, y, l;
|
bwtint_t k, x, y, l;
|
||||||
int xtra;
|
int xtra, gscore;
|
||||||
int8_t mat[25];
|
int8_t mat[25];
|
||||||
|
|
||||||
bwa_fill_scmat(1, 3, mat);
|
bwa_fill_scmat(1, 3, mat);
|
||||||
|
|
@ -422,12 +422,12 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
||||||
// do alignment
|
// do alignment
|
||||||
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
|
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
|
||||||
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
|
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
|
||||||
ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
|
gscore = ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
|
||||||
cigar = (bwa_cigar_t*)cigar32;
|
cigar = (bwa_cigar_t*)cigar32;
|
||||||
for (k = 0; k < *n_cigar; ++k)
|
for (k = 0; k < *n_cigar; ++k)
|
||||||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||||
|
|
||||||
if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
|
if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score || gscore != r.score) { // poor hit or tandem hits or weird alignment
|
||||||
free(cigar); free(ref_seq); *n_cigar = 0;
|
free(cigar); free(ref_seq); *n_cigar = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
4
ksw.c
4
ksw.c
|
|
@ -201,10 +201,11 @@ end_loop16:
|
||||||
r.score = gmax + q->shift < 255? gmax : 255;
|
r.score = gmax + q->shift < 255? gmax : 255;
|
||||||
r.te = te;
|
r.te = te;
|
||||||
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
||||||
int max = -1, low, high, qlen = slen * 16;
|
int max = -1, tmp, low, high, qlen = slen * 16;
|
||||||
uint8_t *t = (uint8_t*)Hmax;
|
uint8_t *t = (uint8_t*)Hmax;
|
||||||
for (i = 0; i < qlen; ++i, ++t)
|
for (i = 0; i < qlen; ++i, ++t)
|
||||||
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
|
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
|
||||||
|
else ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp;
|
||||||
//printf("%d,%d\n", max, gmax);
|
//printf("%d,%d\n", max, gmax);
|
||||||
if (b) {
|
if (b) {
|
||||||
i = (r.score + q->max - 1) / q->max;
|
i = (r.score + q->max - 1) / q->max;
|
||||||
|
|
@ -306,6 +307,7 @@ end_loop8:
|
||||||
uint16_t *t = (uint16_t*)Hmax;
|
uint16_t *t = (uint16_t*)Hmax;
|
||||||
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
||||||
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
|
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
|
||||||
|
else ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp;
|
||||||
if (b) {
|
if (b) {
|
||||||
i = (r.score + q->max - 1) / q->max;
|
i = (r.score + q->max - 1) / q->max;
|
||||||
low = te - i; high = te + i;
|
low = te - i; high = te + i;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue