r888: fixed incorrect CIGAR when --eqx in use
This was caused by mm_fix_cigar() which may change query/target offset in very rare cases. Generating EQX has to beware of this change. Resolves #266
This commit is contained in:
parent
62bbadf668
commit
83a8ee7038
151
align.c
151
align.c
|
|
@ -147,78 +147,6 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e)
|
|
||||||
{
|
|
||||||
uint32_t k, l;
|
|
||||||
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
|
||||||
mm_extra_t *p = r->p;
|
|
||||||
if (p == 0) return;
|
|
||||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
|
||||||
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
|
||||||
r->blen = r->mlen = 0;
|
|
||||||
for (k = 0; k < p->n_cigar; ++k) {
|
|
||||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
|
||||||
if (op == 0) { // match/mismatch
|
|
||||||
int n_ambi = 0, n_diff = 0;
|
|
||||||
for (l = 0; l < len; ++l) {
|
|
||||||
int cq = qseq[qoff + l], ct = tseq[toff + l];
|
|
||||||
if (ct > 3 || cq > 3) ++n_ambi;
|
|
||||||
else if (ct != cq) ++n_diff;
|
|
||||||
s += mat[ct * 5 + cq];
|
|
||||||
if (s < 0) s = 0;
|
|
||||||
else max = max > s? max : s;
|
|
||||||
}
|
|
||||||
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
|
||||||
toff += len, qoff += len;
|
|
||||||
} else if (op == 1) { // insertion
|
|
||||||
int n_ambi = 0;
|
|
||||||
for (l = 0; l < len; ++l)
|
|
||||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
|
||||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
|
||||||
s -= q + e * len;
|
|
||||||
if (s < 0) s = 0;
|
|
||||||
qoff += len;
|
|
||||||
} else if (op == 2) { // deletion
|
|
||||||
int n_ambi = 0;
|
|
||||||
for (l = 0; l < len; ++l)
|
|
||||||
if (tseq[toff + l] > 3) ++n_ambi;
|
|
||||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
|
||||||
s -= q + e * len;
|
|
||||||
if (s < 0) s = 0;
|
|
||||||
toff += len;
|
|
||||||
} else if (op == 3) { // intron
|
|
||||||
toff += len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p->dp_max = max;
|
|
||||||
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
|
||||||
{
|
|
||||||
mm_extra_t *p;
|
|
||||||
if (n_cigar == 0) return;
|
|
||||||
if (r->p == 0) {
|
|
||||||
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
|
||||||
kroundup32(capacity);
|
|
||||||
r->p = (mm_extra_t*)calloc(capacity, 4);
|
|
||||||
r->p->capacity = capacity;
|
|
||||||
} else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) {
|
|
||||||
r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4;
|
|
||||||
kroundup32(r->p->capacity);
|
|
||||||
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
|
||||||
}
|
|
||||||
p = r->p;
|
|
||||||
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
|
||||||
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
|
||||||
if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4);
|
|
||||||
p->n_cigar += n_cigar - 1;
|
|
||||||
} else {
|
|
||||||
memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4);
|
|
||||||
p->n_cigar += n_cigar;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq) // written by @armintoepfer
|
static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq) // written by @armintoepfer
|
||||||
{
|
{
|
||||||
uint32_t n_EQX = 0;
|
uint32_t n_EQX = 0;
|
||||||
|
|
@ -290,6 +218,79 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
||||||
r->p = p;
|
r->p = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx)
|
||||||
|
{
|
||||||
|
uint32_t k, l;
|
||||||
|
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
||||||
|
mm_extra_t *p = r->p;
|
||||||
|
if (p == 0) return;
|
||||||
|
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||||
|
qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D
|
||||||
|
r->blen = r->mlen = 0;
|
||||||
|
for (k = 0; k < p->n_cigar; ++k) {
|
||||||
|
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||||
|
if (op == 0) { // match/mismatch
|
||||||
|
int n_ambi = 0, n_diff = 0;
|
||||||
|
for (l = 0; l < len; ++l) {
|
||||||
|
int cq = qseq[qoff + l], ct = tseq[toff + l];
|
||||||
|
if (ct > 3 || cq > 3) ++n_ambi;
|
||||||
|
else if (ct != cq) ++n_diff;
|
||||||
|
s += mat[ct * 5 + cq];
|
||||||
|
if (s < 0) s = 0;
|
||||||
|
else max = max > s? max : s;
|
||||||
|
}
|
||||||
|
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
||||||
|
toff += len, qoff += len;
|
||||||
|
} else if (op == 1) { // insertion
|
||||||
|
int n_ambi = 0;
|
||||||
|
for (l = 0; l < len; ++l)
|
||||||
|
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||||
|
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||||
|
s -= q + e * len;
|
||||||
|
if (s < 0) s = 0;
|
||||||
|
qoff += len;
|
||||||
|
} else if (op == 2) { // deletion
|
||||||
|
int n_ambi = 0;
|
||||||
|
for (l = 0; l < len; ++l)
|
||||||
|
if (tseq[toff + l] > 3) ++n_ambi;
|
||||||
|
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||||
|
s -= q + e * len;
|
||||||
|
if (s < 0) s = 0;
|
||||||
|
toff += len;
|
||||||
|
} else if (op == 3) { // intron
|
||||||
|
toff += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p->dp_max = max;
|
||||||
|
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
||||||
|
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
||||||
|
{
|
||||||
|
mm_extra_t *p;
|
||||||
|
if (n_cigar == 0) return;
|
||||||
|
if (r->p == 0) {
|
||||||
|
uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4;
|
||||||
|
kroundup32(capacity);
|
||||||
|
r->p = (mm_extra_t*)calloc(capacity, 4);
|
||||||
|
r->p->capacity = capacity;
|
||||||
|
} else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) {
|
||||||
|
r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4;
|
||||||
|
kroundup32(r->p->capacity);
|
||||||
|
r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4);
|
||||||
|
}
|
||||||
|
p = r->p;
|
||||||
|
if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary
|
||||||
|
p->cigar[p->n_cigar-1] += cigar[0]>>4<<4;
|
||||||
|
if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4);
|
||||||
|
p->n_cigar += n_cigar - 1;
|
||||||
|
} else {
|
||||||
|
memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4);
|
||||||
|
p->n_cigar += n_cigar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
|
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez)
|
||||||
{
|
{
|
||||||
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
||||||
|
|
@ -751,8 +752,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
assert(re1 - rs1 <= re0 - rs0);
|
assert(re1 - rs1 <= re0 - rs0);
|
||||||
if (r->p) {
|
if (r->p) {
|
||||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||||
mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e);
|
mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r, &qseq0[r->rev][qs1], tseq);
|
|
||||||
if (rev && r->p->trans_strand)
|
if (rev && r->p->trans_strand)
|
||||||
r->p->trans_strand ^= 3; // flip to the read strand
|
r->p->trans_strand ^= 3; // flip to the read strand
|
||||||
}
|
}
|
||||||
|
|
@ -810,8 +810,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
||||||
}
|
}
|
||||||
r_inv->rs = r1->re + t_off;
|
r_inv->rs = r1->re + t_off;
|
||||||
r_inv->re = r_inv->rs + ez->max_t + 1;
|
r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e);
|
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||||
if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r_inv, &qseq[q_off], &tseq[t_off]);
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
end_align1_inv:
|
end_align1_inv:
|
||||||
kfree(km, tseq);
|
kfree(km, tseq);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue