From 177eef259d1d3f3aa453dd6bb7222e1cc3a0ce52 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Fri, 9 Apr 2021 19:34:53 +0100 Subject: [PATCH] Use the full MIDNSHP=X string whenever printing CIGAR strings Define MM_CIGAR_STR to the full string of CIGAR operators (including the 'B' operator as well) and use it throughout the C code. It would be possible to use it from the Cython code too, but it's easier to keep that as a Cython string literal to avoid adding extra runtime code to handle locale conversion. --- align.c | 2 +- example.c | 2 +- format.c | 4 ++-- minimap.h | 2 ++ misc/paftools.js | 6 +++--- python/mappy.pyx | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/align.c b/align.c index f2f154f..3b5ff9a 100644 --- a/align.c +++ b/align.c @@ -333,7 +333,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint int i; fprintf(stderr, "score=%d, cigar=", ez->score); for (i = 0; i < ez->n_cigar; ++i) - fprintf(stderr, "%d%c", ez->cigar[i]>>4, "MIDN"[ez->cigar[i]&0xf]); + fprintf(stderr, "%d%c", ez->cigar[i]>>4, MM_CIGAR_STR[ez->cigar[i]&0xf]); fprintf(stderr, "\n"); } } diff --git a/example.c b/example.c index ca1fdbe..b495051 100644 --- a/example.c +++ b/example.c @@ -47,7 +47,7 @@ int main(int argc, char *argv[]) printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq); for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! - printf("%d%c", r->p->cigar[i]>>4, "MIDNSH"[r->p->cigar[i]&0xf]); + printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]); putchar('\n'); free(r->p); } diff --git a/format.c b/format.c index 11f24b5..a33c3b6 100644 --- a/format.c +++ b/format.c @@ -325,7 +325,7 @@ void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const uint32_t k; mm_sprintf_lite(s, "\tcg:Z:"); for (k = 0; k < r->p->n_cigar; ++k) - mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]); + mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, MM_CIGAR_STR[r->p->cigar[k]&0xf]); } if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD))) write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD, 1); @@ -382,7 +382,7 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co assert(clip_len[0] < qlen && clip_len[1] < qlen); if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char); for (k = 0; k < r->p->n_cigar; ++k) - mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]); + mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, MM_CIGAR_STR[r->p->cigar[k]&0xf]); if (clip_len[1]) mm_sprintf_lite(s, "%d%c", clip_len[1], clip_char); } } diff --git a/minimap.h b/minimap.h index 7386632..d095f46 100644 --- a/minimap.h +++ b/minimap.h @@ -46,6 +46,8 @@ #define MM_MAX_SEG 255 +#define MM_CIGAR_STR "MIDNSHP=XB" + #ifdef __cplusplus extern "C" { #endif diff --git a/misc/paftools.js b/misc/paftools.js index f048b07..999aa78 100755 --- a/misc/paftools.js +++ b/misc/paftools.js @@ -1419,7 +1419,7 @@ function paf_view(args) var s_ref = new Bytes(), s_qry = new Bytes(), s_mid = new Bytes(); // these are used to show padded alignment var re_cs = /([:=\-\+\*])(\d+|[A-Za-z]+)/g; - var re_cg = /(\d+)([MIDNSH])/g; + var re_cg = /(\d+)([MIDNSHP=X])/g; var buf = new Bytes(); var file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]); @@ -1899,7 +1899,7 @@ function paf_splice2bed(args) a.length = 0; } - var re = /(\d+)([MIDNSH])/g; + var re = /(\d+)([MIDNSHP=X])/g; var c, fmt = "bed", fn_name_conv = null, keep_multi = false; while ((c = getopt(args, "f:n:m")) != null) { if (c == 'f') fmt = getopt.arg; @@ -2369,7 +2369,7 @@ function paf_junceval(args) file = getopt.ind+1 >= args.length || args[getopt.ind+1] == '-'? new File() : new File(args[getopt.ind+1]); var last_qname = null; - var re_cigar = /(\d+)([MIDNSHX=])/g; + var re_cigar = /(\d+)([MIDNSHP=X])/g; while (file.readline(buf) >= 0) { var m, t = buf.toString().split("\t"); var ctg_name = null, cigar = null, pos = null, qname = t[0]; diff --git a/python/mappy.pyx b/python/mappy.pyx index d0bf7f8..920bc0d 100644 --- a/python/mappy.pyx +++ b/python/mappy.pyx @@ -82,7 +82,7 @@ cdef class Alignment: @property def cigar_str(self): - return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar)) + return "".join(map(lambda x: str(x[0]) + 'MIDNSHP=XB'[x[1]], self._cigar)) def __str__(self): if self._strand > 0: strand = '+'