Use the full MIDNSHP=X string whenever printing CIGAR strings

Define MM_CIGAR_STR to the full string of CIGAR operators (including
the 'B' operator as well) and use it throughout the C code.

It would be possible to use it from the Cython code too, but it's easier
to keep that as a Cython string literal to avoid adding extra runtime
code to handle locale conversion.
This commit is contained in:
John Marshall 2021-04-09 19:34:53 +01:00 committed by Heng Li
parent 459ce04c84
commit 177eef259d
6 changed files with 10 additions and 8 deletions

View File

@ -333,7 +333,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
int i; int i;
fprintf(stderr, "score=%d, cigar=", ez->score); fprintf(stderr, "score=%d, cigar=", ez->score);
for (i = 0; i < ez->n_cigar; ++i) for (i = 0; i < ez->n_cigar; ++i)
fprintf(stderr, "%d%c", ez->cigar[i]>>4, "MIDN"[ez->cigar[i]&0xf]); fprintf(stderr, "%d%c", ez->cigar[i]>>4, MM_CIGAR_STR[ez->cigar[i]&0xf]);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
} }

View File

@ -47,7 +47,7 @@ int main(int argc, char *argv[])
printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq); printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq);
for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
printf("%d%c", r->p->cigar[i]>>4, "MIDNSH"[r->p->cigar[i]&0xf]); printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]);
putchar('\n'); putchar('\n');
free(r->p); free(r->p);
} }

View File

@ -325,7 +325,7 @@ void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const
uint32_t k; uint32_t k;
mm_sprintf_lite(s, "\tcg:Z:"); mm_sprintf_lite(s, "\tcg:Z:");
for (k = 0; k < r->p->n_cigar; ++k) for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]); mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, MM_CIGAR_STR[r->p->cigar[k]&0xf]);
} }
if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD))) if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD)))
write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD, 1); write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD, 1);
@ -382,7 +382,7 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
assert(clip_len[0] < qlen && clip_len[1] < qlen); assert(clip_len[0] < qlen && clip_len[1] < qlen);
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char); if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
for (k = 0; k < r->p->n_cigar; ++k) for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]); mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, MM_CIGAR_STR[r->p->cigar[k]&0xf]);
if (clip_len[1]) mm_sprintf_lite(s, "%d%c", clip_len[1], clip_char); if (clip_len[1]) mm_sprintf_lite(s, "%d%c", clip_len[1], clip_char);
} }
} }

View File

@ -46,6 +46,8 @@
#define MM_MAX_SEG 255 #define MM_MAX_SEG 255
#define MM_CIGAR_STR "MIDNSHP=XB"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

View File

@ -1419,7 +1419,7 @@ function paf_view(args)
var s_ref = new Bytes(), s_qry = new Bytes(), s_mid = new Bytes(); // these are used to show padded alignment var s_ref = new Bytes(), s_qry = new Bytes(), s_mid = new Bytes(); // these are used to show padded alignment
var re_cs = /([:=\-\+\*])(\d+|[A-Za-z]+)/g; var re_cs = /([:=\-\+\*])(\d+|[A-Za-z]+)/g;
var re_cg = /(\d+)([MIDNSH])/g; var re_cg = /(\d+)([MIDNSHP=X])/g;
var buf = new Bytes(); var buf = new Bytes();
var file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]); var file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]);
@ -1899,7 +1899,7 @@ function paf_splice2bed(args)
a.length = 0; a.length = 0;
} }
var re = /(\d+)([MIDNSH])/g; var re = /(\d+)([MIDNSHP=X])/g;
var c, fmt = "bed", fn_name_conv = null, keep_multi = false; var c, fmt = "bed", fn_name_conv = null, keep_multi = false;
while ((c = getopt(args, "f:n:m")) != null) { while ((c = getopt(args, "f:n:m")) != null) {
if (c == 'f') fmt = getopt.arg; if (c == 'f') fmt = getopt.arg;
@ -2369,7 +2369,7 @@ function paf_junceval(args)
file = getopt.ind+1 >= args.length || args[getopt.ind+1] == '-'? new File() : new File(args[getopt.ind+1]); file = getopt.ind+1 >= args.length || args[getopt.ind+1] == '-'? new File() : new File(args[getopt.ind+1]);
var last_qname = null; var last_qname = null;
var re_cigar = /(\d+)([MIDNSHX=])/g; var re_cigar = /(\d+)([MIDNSHP=X])/g;
while (file.readline(buf) >= 0) { while (file.readline(buf) >= 0) {
var m, t = buf.toString().split("\t"); var m, t = buf.toString().split("\t");
var ctg_name = null, cigar = null, pos = null, qname = t[0]; var ctg_name = null, cigar = null, pos = null, qname = t[0];

View File

@ -82,7 +82,7 @@ cdef class Alignment:
@property @property
def cigar_str(self): def cigar_str(self):
return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar)) return "".join(map(lambda x: str(x[0]) + 'MIDNSHP=XB'[x[1]], self._cigar))
def __str__(self): def __str__(self):
if self._strand > 0: strand = '+' if self._strand > 0: strand = '+'