diff --git a/align.c b/align.c index 724debe..21808b9 100644 --- a/align.c +++ b/align.c @@ -147,78 +147,6 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, } } -static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e) -{ - uint32_t k, l; - int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0; - mm_extra_t *p = r->p; - if (p == 0) return; - mm_fix_cigar(r, qseq, tseq, &qshift, &tshift); - qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D - r->blen = r->mlen = 0; - for (k = 0; k < p->n_cigar; ++k) { - uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4; - if (op == 0) { // match/mismatch - int n_ambi = 0, n_diff = 0; - for (l = 0; l < len; ++l) { - int cq = qseq[qoff + l], ct = tseq[toff + l]; - if (ct > 3 || cq > 3) ++n_ambi; - else if (ct != cq) ++n_diff; - s += mat[ct * 5 + cq]; - if (s < 0) s = 0; - else max = max > s? max : s; - } - r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi; - toff += len, qoff += len; - } else if (op == 1) { // insertion - int n_ambi = 0; - for (l = 0; l < len; ++l) - if (qseq[qoff + l] > 3) ++n_ambi; - r->blen += len - n_ambi, p->n_ambi += n_ambi; - s -= q + e * len; - if (s < 0) s = 0; - qoff += len; - } else if (op == 2) { // deletion - int n_ambi = 0; - for (l = 0; l < len; ++l) - if (tseq[toff + l] > 3) ++n_ambi; - r->blen += len - n_ambi, p->n_ambi += n_ambi; - s -= q + e * len; - if (s < 0) s = 0; - toff += len; - } else if (op == 3) { // intron - toff += len; - } - } - p->dp_max = max; - assert(qoff == r->qe - r->qs && toff == r->re - r->rs); -} - -static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc() -{ - mm_extra_t *p; - if (n_cigar == 0) return; - if (r->p == 0) { - uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4; - kroundup32(capacity); - r->p = (mm_extra_t*)calloc(capacity, 4); - r->p->capacity = capacity; - } else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) { - r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4; - kroundup32(r->p->capacity); - r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4); - } - p = r->p; - if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary - p->cigar[p->n_cigar-1] += cigar[0]>>4<<4; - if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4); - p->n_cigar += n_cigar - 1; - } else { - memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4); - p->n_cigar += n_cigar; - } -} - static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq) // written by @armintoepfer { uint32_t n_EQX = 0; @@ -290,6 +218,79 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t r->p = p; } +static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx) +{ + uint32_t k, l; + int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0; + mm_extra_t *p = r->p; + if (p == 0) return; + mm_fix_cigar(r, qseq, tseq, &qshift, &tshift); + qseq += qshift, tseq += tshift; // qseq and tseq may be shifted due to the removal of leading I/D + r->blen = r->mlen = 0; + for (k = 0; k < p->n_cigar; ++k) { + uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4; + if (op == 0) { // match/mismatch + int n_ambi = 0, n_diff = 0; + for (l = 0; l < len; ++l) { + int cq = qseq[qoff + l], ct = tseq[toff + l]; + if (ct > 3 || cq > 3) ++n_ambi; + else if (ct != cq) ++n_diff; + s += mat[ct * 5 + cq]; + if (s < 0) s = 0; + else max = max > s? max : s; + } + r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi; + toff += len, qoff += len; + } else if (op == 1) { // insertion + int n_ambi = 0; + for (l = 0; l < len; ++l) + if (qseq[qoff + l] > 3) ++n_ambi; + r->blen += len - n_ambi, p->n_ambi += n_ambi; + s -= q + e * len; + if (s < 0) s = 0; + qoff += len; + } else if (op == 2) { // deletion + int n_ambi = 0; + for (l = 0; l < len; ++l) + if (tseq[toff + l] > 3) ++n_ambi; + r->blen += len - n_ambi, p->n_ambi += n_ambi; + s -= q + e * len; + if (s < 0) s = 0; + toff += len; + } else if (op == 3) { // intron + toff += len; + } + } + p->dp_max = max; + assert(qoff == r->qe - r->qs && toff == r->re - r->rs); + if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned +} + +static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc() +{ + mm_extra_t *p; + if (n_cigar == 0) return; + if (r->p == 0) { + uint32_t capacity = n_cigar + sizeof(mm_extra_t)/4; + kroundup32(capacity); + r->p = (mm_extra_t*)calloc(capacity, 4); + r->p->capacity = capacity; + } else if (r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4 > r->p->capacity) { + r->p->capacity = r->p->n_cigar + n_cigar + sizeof(mm_extra_t)/4; + kroundup32(r->p->capacity); + r->p = (mm_extra_t*)realloc(r->p, r->p->capacity * 4); + } + p = r->p; + if (p->n_cigar > 0 && (p->cigar[p->n_cigar-1]&0xf) == (cigar[0]&0xf)) { // same CIGAR op at the boundary + p->cigar[p->n_cigar-1] += cigar[0]>>4<<4; + if (n_cigar > 1) memcpy(p->cigar + p->n_cigar, cigar + 1, (n_cigar - 1) * 4); + p->n_cigar += n_cigar - 1; + } else { + memcpy(p->cigar + p->n_cigar, cigar, n_cigar * 4); + p->n_cigar += n_cigar; + } +} + static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int zdrop, int flag, ksw_extz_t *ez) { if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) { @@ -751,8 +752,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int assert(re1 - rs1 <= re0 - rs0); if (r->p) { mm_idx_getseq(mi, rid, rs1, re1, tseq); - mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e); - if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r, &qseq0[r->rev][qs1], tseq); + mm_update_extra(r, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX); if (rev && r->p->trans_strand) r->p->trans_strand ^= 3; // flip to the read strand } @@ -810,8 +810,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i } r_inv->rs = r1->re + t_off; r_inv->re = r_inv->rs + ez->max_t + 1; - mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e); - if (opt->flag & MM_F_EQX) mm_update_cigar_eqx(r_inv, &qseq[q_off], &tseq[t_off]); + mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX); ret = 1; end_align1_inv: kfree(km, tseq); diff --git a/main.c b/main.c index 763ed9e..894e503 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.14-r886-dirty" +#define MM_VERSION "2.14-r888-dirty" #ifdef __linux__ #include