r888: fixed incorrect CIGAR when --eqx in use
This was caused by mm_fix_cigar() which may change query/target offset in very rare cases. Generating EQX has to beware of this change. Resolves #266
This commit is contained in:
parent
62bbadf668
commit
83a8ee7038
151
align.c
151
align.c
|
|
@ -147,78 +147,6 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
|||
}
|
||||
}
|
||||
|
||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e)
|
||||
{
|
||||
uint32_t k, l;
|
||||
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
||||
mm_extra_t *p = r->p;
|
||||
if (p == 0) return;
|
||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
||||
r->blen = r->mlen = 0;
|
||||
for (k = 0; k < p->n_cigar; ++k) {
|
||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
int n_ambi = 0, n_diff = 0;
|
||||
for (l = 0; l < len; ++l) {
|
||||
int cq = qseq[qoff + l], ct = tseq[toff + l];
|
||||
if (ct > 3 || cq > 3) ++n_ambi;
|
||||
else if (ct != cq) ++n_diff;
|
||||
s += mat[ct * 5 + cq];
|
||||
if (s < 0) s = 0;
|
||||
else max = max > s? max : s;
|
||||
}
|
||||
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1) { // insertion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
p->dp_max = max;
|
||||
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
||||
}
|
||||
|
||||
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
||||
{
|
||||
mm_extra_t *p;
|
||||
if (n_cigar == 0) return;
|
||||
if (r->p == 0) {
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
||||
kroundup32(capacity);
|
||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||
r->p->capacity = capacity;
|
||||
} else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) {
|
||||
r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4;
|
||||
kroundup32(r->p->capacity);
|
||||
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
||||
}
|
||||
p = r->p;
|
||||
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
||||
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
||||
if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4);
|
||||
p->n_cigar += n_cigar - 1;
|
||||
} else {
|
||||
memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4);
|
||||
p->n_cigar += n_cigar;
|
||||
}
|
||||
}
|
||||
|
||||
static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq) // written by @armintoepfer
|
||||
{
|
||||
uint32_t n_EQX = 0;
|
||||
|
|
@ -290,6 +218,79 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
r->p = p;
|
||||
}
|
||||
|
||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx)
|
||||
{
|
||||
uint32_t k, l;
|
||||
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
||||
mm_extra_t *p = r->p;
|
||||
if (p == 0) return;
|
||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
||||
r->blen = r->mlen = 0;
|
||||
for (k = 0; k < p->n_cigar; ++k) {
|
||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
int n_ambi = 0, n_diff = 0;
|
||||
for (l = 0; l < len; ++l) {
|
||||
int cq = qseq[qoff + l], ct = tseq[toff + l];
|
||||
if (ct > 3 || cq > 3) ++n_ambi;
|
||||
else if (ct != cq) ++n_diff;
|
||||
s += mat[ct * 5 + cq];
|
||||
if (s < 0) s = 0;
|
||||
else max = max > s? max : s;
|
||||
}
|
||||
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1) { // insertion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
p->dp_max = max;
|
||||
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
||||
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
||||
}
|
||||
|
||||
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
||||
{
|
||||
mm_extra_t *p;
|
||||
if (n_cigar == 0) return;
|
||||
if (r->p == 0) {
|
||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
||||
kroundup32(capacity);
|
||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||
r->p->capacity = capacity;
|
||||
} else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) {
|
||||
r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4;
|
||||
kroundup32(r->p->capacity);
|
||||
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
||||
}
|
||||
p = r->p;
|
||||
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
||||
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
||||
if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4);
|
||||
p->n_cigar += n_cigar - 1;
|
||||
} else {
|
||||
memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4);
|
||||
p->n_cigar += n_cigar;
|
||||
}
|
||||
}
|
||||
|
||||
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
||||
|
|
@ -751,8 +752,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
assert(re1 - rs1 <= re0 - rs0);
|
||||
if (r->p) {
|
||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||
mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e);
|
||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r, &qseq0[r->rev][qs1], tseq);
|
||||
mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||
if (rev && r->p->trans_strand)
|
||||
r->p->trans_strand ^= 3; // flip to the read strand
|
||||
}
|
||||
|
|
@ -810,8 +810,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
}
|
||||
r_inv->rs = r1->re + t_off;
|
||||
r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e);
|
||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r_inv, &qseq[q_off], &tseq[t_off]);
|
||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||
ret = 1;
|
||||
end_align1_inv:
|
||||
kfree(km, tseq);
|
||||
|
|
|
|||
Loading…
Reference in New Issue