Use #defines for CIGAR operators in C code
Give the CIGAR constants names to clarify the code. So that ksw2.h remains self-contained, define KSW_* versions of the CIGAR operators it needs for use within ksw2.h. Other code should in general use the full set of MM_CIGAR_* constants in minimap.h.
This commit is contained in:
parent
177eef259d
commit
260a68d232
60
align.c
60
align.c
|
|
@ -53,16 +53,16 @@ static int mm_test_zdrop(void *km, const mm_mapopt_t *opt, const uint8_t *qseq,
|
|||
// find the score and the region where score drops most along diagonal
|
||||
for (k = 0, score = 0; k < n_cigar; ++k) {
|
||||
uint32_t l, op = cigar[k]&0xf, len = cigar[k]>>4;
|
||||
if (op == 0) {
|
||||
if (op == MM_CIGAR_MATCH) {
|
||||
for (l = 0; l < len; ++l) {
|
||||
score += mat[tseq[i + l] * 5 + qseq[j + l]];
|
||||
update_max_zdrop(score, i+l, j+l, &max, &max_i, &max_j, opt->e, &max_zdrop, pos);
|
||||
}
|
||||
i += len, j += len;
|
||||
} else if (op == 1 || op == 2 || op == 3) {
|
||||
} else if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL || op == MM_CIGAR_N_SKIP) {
|
||||
score -= opt->q + opt->e * len;
|
||||
if (op == 1) j += len; // insertion
|
||||
else i += len; // deletion
|
||||
if (op == MM_CIGAR_INS) j += len;
|
||||
else i += len;
|
||||
update_max_zdrop(score, i, j, &max, &max_i, &max_j, opt->e, &max_zdrop, pos);
|
||||
}
|
||||
}
|
||||
|
|
@ -98,12 +98,12 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
|||
for (k = 0; k < p->n_cigar; ++k) { // indel left alignment
|
||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||
if (len == 0) to_shrink = 1;
|
||||
if (op == 0) {
|
||||
if (op == MM_CIGAR_MATCH) {
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1 || op == 2) { // insertion or deletion
|
||||
} else if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL) {
|
||||
if (k > 0 && k < p->n_cigar - 1 && (p->cigar[k-1]&0xf) == 0 && (p->cigar[k+1]&0xf) == 0) {
|
||||
int l, prev_len = p->cigar[k-1] >> 4;
|
||||
if (op == 1) {
|
||||
if (op == MM_CIGAR_INS) {
|
||||
for (l = 0; l < prev_len; ++l)
|
||||
if (qseq[qoff - 1 - l] != qseq[qoff + len - 1 - l])
|
||||
break;
|
||||
|
|
@ -116,9 +116,9 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
|||
p->cigar[k-1] -= l<<4, p->cigar[k+1] += l<<4, qoff -= l, toff -= l;
|
||||
if (l == prev_len) to_shrink = 1;
|
||||
}
|
||||
if (op == 1) qoff += len;
|
||||
if (op == MM_CIGAR_INS) qoff += len;
|
||||
else toff += len;
|
||||
} else if (op == 3) {
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
|
|
@ -128,13 +128,13 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
|||
uint32_t l, s[3] = {0,0,0};
|
||||
for (l = k; l < p->n_cigar; ++l) { // count number of adjacent I and D
|
||||
uint32_t op = p->cigar[l]&0xf;
|
||||
if (op == 1 || op == 2 || p->cigar[l]>>4 == 0)
|
||||
if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL || p->cigar[l]>>4 == 0)
|
||||
s[op] += p->cigar[l] >> 4;
|
||||
else break;
|
||||
}
|
||||
if (s[1] > 0 && s[2] > 0 && l - k > 2) { // turn to a single I and a single D
|
||||
p->cigar[k] = s[1]<<4|1;
|
||||
p->cigar[k+1] = s[2]<<4|2;
|
||||
p->cigar[k] = s[1]<<4|MM_CIGAR_INS;
|
||||
p->cigar[k+1] = s[2]<<4|MM_CIGAR_DEL;
|
||||
for (k += 2; k < l; ++k)
|
||||
p->cigar[k] &= 0xf;
|
||||
to_shrink = 1;
|
||||
|
|
@ -154,9 +154,9 @@ static void mm_fix_cigar(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq,
|
|||
else p->cigar[k+1] += p->cigar[k]>>4<<4; // add length to the next CIGAR operator
|
||||
p->n_cigar = l;
|
||||
}
|
||||
if ((p->cigar[0]&0xf) == 1 || (p->cigar[0]&0xf) == 2) { // get rid of leading I or D
|
||||
if ((p->cigar[0]&0xf) == MM_CIGAR_INS || (p->cigar[0]&0xf) == MM_CIGAR_DEL) { // get rid of leading I or D
|
||||
int32_t l = p->cigar[0] >> 4;
|
||||
if ((p->cigar[0]&0xf) == 1) {
|
||||
if ((p->cigar[0]&0xf) == MM_CIGAR_INS) {
|
||||
if (r->rev) r->qe -= l;
|
||||
else r->qs += l;
|
||||
*qshift = l;
|
||||
|
|
@ -174,7 +174,7 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
if (r->p == 0) return;
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) {
|
||||
if (op == MM_CIGAR_MATCH) {
|
||||
while (len > 0) {
|
||||
for (l = 0; l < len && qseq[qoff + l] == tseq[toff + l]; ++l) {} // run of "="; TODO: N<=>N is converted to "="
|
||||
if (l > 0) { ++n_EQX; len -= l; toff += l; qoff += l; }
|
||||
|
|
@ -183,11 +183,11 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
if (l > 0) { ++n_EQX; len -= l; toff += l; qoff += l; }
|
||||
}
|
||||
++n_M;
|
||||
} else if (op == 1) { // insertion
|
||||
} else if (op == MM_CIGAR_INS) {
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
|
|
@ -195,7 +195,7 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
if (n_EQX == n_M) {
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) r->p->cigar[k] = len << 4 | 7;
|
||||
if (op == MM_CIGAR_MATCH) r->p->cigar[k] = len << 4 | MM_CIGAR_EQ_MATCH;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -209,25 +209,25 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
toff = qoff = m = 0;
|
||||
for (k = 0; k < r->p->n_cigar; ++k) {
|
||||
uint32_t op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
if (op == MM_CIGAR_MATCH) {
|
||||
while (len > 0) {
|
||||
// match
|
||||
for (l = 0; l < len && qseq[qoff + l] == tseq[toff + l]; ++l) {}
|
||||
if (l > 0) p->cigar[m++] = l << 4 | 7;
|
||||
if (l > 0) p->cigar[m++] = l << 4 | MM_CIGAR_EQ_MATCH;
|
||||
len -= l;
|
||||
toff += l, qoff += l;
|
||||
// mismatch
|
||||
for (l = 0; l < len && qseq[qoff + l] != tseq[toff + l]; ++l) {}
|
||||
if (l > 0) p->cigar[m++] = l << 4 | 8;
|
||||
if (l > 0) p->cigar[m++] = l << 4 | MM_CIGAR_X_MISMATCH;
|
||||
len -= l;
|
||||
toff += l, qoff += l;
|
||||
}
|
||||
continue;
|
||||
} else if (op == 1) { // insertion
|
||||
} else if (op == MM_CIGAR_INS) {
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
toff += len;
|
||||
}
|
||||
p->cigar[m++] = r->p->cigar[k];
|
||||
|
|
@ -248,7 +248,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
r->blen = r->mlen = 0;
|
||||
for (k = 0; k < p->n_cigar; ++k) {
|
||||
uint32_t op = p->cigar[k]&0xf, len = p->cigar[k]>>4;
|
||||
if (op == 0) { // match/mismatch
|
||||
if (op == MM_CIGAR_MATCH) {
|
||||
int n_ambi = 0, n_diff = 0;
|
||||
for (l = 0; l < len; ++l) {
|
||||
int cq = qseq[qoff + l], ct = tseq[toff + l];
|
||||
|
|
@ -260,7 +260,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
}
|
||||
r->blen += len - n_ambi, r->mlen += len - (n_ambi + n_diff), p->n_ambi += n_ambi;
|
||||
toff += len, qoff += len;
|
||||
} else if (op == 1) { // insertion
|
||||
} else if (op == MM_CIGAR_INS) {
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
|
|
@ -268,7 +268,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
qoff += len;
|
||||
} else if (op == 2) { // deletion
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
int n_ambi = 0;
|
||||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
|
|
@ -276,7 +276,7 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
toff += len;
|
||||
} else if (op == 3) { // intron
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
|
|
@ -730,7 +730,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
if (qseq[j] >= 4 || tseq[j] >= 4) ez->score += opt->e2;
|
||||
else ez->score += qseq[j] == tseq[j]? opt->a : -opt->b;
|
||||
}
|
||||
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, 0, qe - qs);
|
||||
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, MM_CIGAR_MATCH, qe - qs);
|
||||
} else { // perform normal gapped alignment
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, junc, mat, bw1, -1, opt->zdrop, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
|
||||
}
|
||||
|
|
|
|||
20
format.c
20
format.c
|
|
@ -144,8 +144,8 @@ static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
|
|||
if (write_tag) mm_sprintf_lite(s, "\tcs:Z:");
|
||||
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
|
||||
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
|
||||
assert((op >= 0 && op <= 3) || op == 7 || op == 8);
|
||||
if (op == 0 || op == 7 || op == 8) { // match
|
||||
assert((op >= MM_CIGAR_MATCH && op <= MM_CIGAR_N_SKIP) || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH);
|
||||
if (op == MM_CIGAR_MATCH || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH) {
|
||||
int l_tmp = 0;
|
||||
for (j = 0; j < len; ++j) {
|
||||
if (qseq[q_off + j] != tseq[t_off + j]) {
|
||||
|
|
@ -166,12 +166,12 @@ static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
|
|||
} else mm_sprintf_lite(s, ":%d", l_tmp);
|
||||
}
|
||||
q_off += len, t_off += len;
|
||||
} else if (op == 1) { // insertion to ref
|
||||
} else if (op == MM_CIGAR_INS) {
|
||||
for (j = 0, tmp[len] = 0; j < len; ++j)
|
||||
tmp[j] = "acgtn"[qseq[q_off + j]];
|
||||
mm_sprintf_lite(s, "+%s", tmp);
|
||||
q_off += len;
|
||||
} else if (op == 2) { // deletion from ref
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
for (j = 0, tmp[len] = 0; j < len; ++j)
|
||||
tmp[j] = "acgtn"[tseq[t_off + j]];
|
||||
mm_sprintf_lite(s, "-%s", tmp);
|
||||
|
|
@ -192,8 +192,8 @@ static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
|
|||
if (write_tag) mm_sprintf_lite(s, "\tMD:Z:");
|
||||
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
|
||||
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
|
||||
assert((op >= 0 && op <= 3) || op == 7 || op == 8);
|
||||
if (op == 0 || op == 7 || op == 8) { // match
|
||||
assert((op >= MM_CIGAR_MATCH && op <= MM_CIGAR_N_SKIP) || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH);
|
||||
if (op == MM_CIGAR_MATCH || op == MM_CIGAR_EQ_MATCH || op == MM_CIGAR_X_MISMATCH) {
|
||||
for (j = 0; j < len; ++j) {
|
||||
if (qseq[q_off + j] != tseq[t_off + j]) {
|
||||
mm_sprintf_lite(s, "%d%c", l_MD, "ACGTN"[tseq[t_off + j]]);
|
||||
|
|
@ -201,15 +201,15 @@ static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
|
|||
} else ++l_MD;
|
||||
}
|
||||
q_off += len, t_off += len;
|
||||
} else if (op == 1) { // insertion to ref
|
||||
} else if (op == MM_CIGAR_INS) {
|
||||
q_off += len;
|
||||
} else if (op == 2) { // deletion from ref
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
for (j = 0, tmp[len] = 0; j < len; ++j)
|
||||
tmp[j] = "ACGTN"[tseq[t_off + j]];
|
||||
mm_sprintf_lite(s, "%d^%s", l_MD, tmp);
|
||||
l_MD = 0;
|
||||
t_off += len;
|
||||
} else if (op == 3) { // reference skip
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
t_off += len;
|
||||
}
|
||||
}
|
||||
|
|
@ -271,7 +271,7 @@ double mm_event_identity(const mm_reg1_t *r)
|
|||
if (r->p == 0) return -1.0f;
|
||||
for (i = 0; i < r->p->n_cigar; ++i) {
|
||||
int32_t op = r->p->cigar[i] & 0xf, len = r->p->cigar[i] >> 4;
|
||||
if (op == 1 || op == 2)
|
||||
if (op == MM_CIGAR_INS || op == MM_CIGAR_DEL)
|
||||
++n_gapo, n_gap += len;
|
||||
}
|
||||
return (double)r->mlen / (r->blen + r->p->n_ambi - n_gap + n_gapo);
|
||||
|
|
|
|||
19
ksw2.h
19
ksw2.h
|
|
@ -16,6 +16,13 @@
|
|||
#define KSW_EZ_SPLICE_REV 0x200
|
||||
#define KSW_EZ_SPLICE_FLANK 0x400
|
||||
|
||||
// The subset of CIGAR operators used by ksw code.
|
||||
// Use MM_CIGAR_* from minimap.h if you need the full list.
|
||||
#define KSW_CIGAR_MATCH 0
|
||||
#define KSW_CIGAR_INS 1
|
||||
#define KSW_CIGAR_DEL 2
|
||||
#define KSW_CIGAR_N_SKIP 3
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -137,13 +144,13 @@ static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int min_intro
|
|||
else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H
|
||||
if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure
|
||||
if (force_state >= 0) state = force_state;
|
||||
if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match
|
||||
else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion
|
||||
else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 3, 1), --i; // intron
|
||||
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion
|
||||
if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_MATCH, 1), --i, --j;
|
||||
else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_DEL, 1), --i;
|
||||
else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_N_SKIP, 1), --i;
|
||||
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, 1), --j;
|
||||
}
|
||||
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? 3 : 2, i + 1); // first deletion
|
||||
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion
|
||||
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? KSW_CIGAR_N_SKIP : KSW_CIGAR_DEL, i + 1); // first deletion
|
||||
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, j + 1); // first insertion
|
||||
if (!is_rev)
|
||||
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
|
||||
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
|
||||
|
|
|
|||
10
minimap.h
10
minimap.h
|
|
@ -46,6 +46,16 @@
|
|||
|
||||
#define MM_MAX_SEG 255
|
||||
|
||||
#define MM_CIGAR_MATCH 0
|
||||
#define MM_CIGAR_INS 1
|
||||
#define MM_CIGAR_DEL 2
|
||||
#define MM_CIGAR_N_SKIP 3
|
||||
#define MM_CIGAR_SOFTCLIP 4
|
||||
#define MM_CIGAR_HARDCLIP 5
|
||||
#define MM_CIGAR_PADDING 6
|
||||
#define MM_CIGAR_EQ_MATCH 7
|
||||
#define MM_CIGAR_X_MISMATCH 8
|
||||
|
||||
#define MM_CIGAR_STR "MIDNSHP=XB"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
Loading…
Reference in New Issue