diff --git a/format.c b/format.c index c6b7272..203ec08 100644 --- a/format.c +++ b/format.c @@ -127,7 +127,7 @@ void mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int free(str.s); } -static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r) +static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int no_iden) { extern unsigned char seq_nt4_table[256]; int i, q_off, t_off; @@ -150,22 +150,26 @@ static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_ } for (i = q_off = t_off = 0; i < r->p->n_cigar; ++i) { int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4; - assert(op >= 0 && op <= 2); + assert(op >= 0 && op <= 3); if (op == 0) { int l_tmp = 0; for (j = 0; j < len; ++j) { if (qseq[q_off + j] != tseq[t_off + j]) { if (l_tmp > 0) { - tmp[l_tmp] = 0; - mm_sprintf_lite(s, "=%s", tmp); + if (!no_iden) { + tmp[l_tmp] = 0; + mm_sprintf_lite(s, "=%s", tmp); + } else mm_sprintf_lite(s, ":%d", l_tmp); l_tmp = 0; } mm_sprintf_lite(s, "*%c%c", "acgtn"[tseq[t_off + j]], "acgtn"[qseq[q_off + j]]); } else tmp[l_tmp++] = "ACGTN"[qseq[q_off + j]]; } if (l_tmp > 0) { - tmp[l_tmp] = 0; - mm_sprintf_lite(s, "=%s", tmp); + if (!no_iden) { + tmp[l_tmp] = 0; + mm_sprintf_lite(s, "=%s", tmp); + } else mm_sprintf_lite(s, ":%d", l_tmp); } q_off += len, t_off += len; } else if (op == 1) { @@ -173,11 +177,15 @@ static void write_cs(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_ tmp[j] = "acgtn"[qseq[q_off + j]]; mm_sprintf_lite(s, "+%s", tmp); q_off += len; - } else if (op == 2) { + } else if (op == 2 || (op == 3 && len < 4)) { for (j = 0, tmp[len] = 0; j < len; ++j) tmp[j] = "acgtn"[tseq[t_off + j]]; mm_sprintf_lite(s, "-%s", tmp); t_off += len; + } else { // op == 3 && len >= 4 + mm_sprintf_lite(s, "^%c%c%d%c%c", "acgtn"[tseq[t_off]], "acgtn"[tseq[t_off+1]], + len, "acgtn"[tseq[t_off+len-2]], "acgtn"[tseq[t_off+len-1]]); + t_off += len; } } assert(t_off == r->re - r->rs && q_off == r->qe - r->qs); @@ -215,7 +223,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]); } if (r->p && (opt_flag & MM_F_OUT_CS)) - write_cs(km, s, mi, t, r); + write_cs(km, s, mi, t, r, opt_flag&MM_F_CS_NO_EQUAL); } static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp) diff --git a/main.c b/main.c index 43d6ee3..b9ec332 100644 --- a/main.c +++ b/main.c @@ -39,6 +39,7 @@ static struct option long_options[] = { { "sr", no_argument, 0, 0 }, { "multi", optional_argument, 0, 0 }, { "print-2nd", optional_argument, 0, 0 }, + { "cs-no-equal", no_argument, 0, 0 }, { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -129,6 +130,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx ==11) opt.noncan = atoi(optarg); // --cost-non-gt-ag else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr + else if (c == 0 && long_idx ==16) opt.flag |= MM_F_CS_NO_EQUAL | MM_F_OUT_CS | MM_F_CIGAR; // --cs-no-equal else if (c == 0 && long_idx ==14) { // --multi if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) opt.flag |= MM_F_MULTI_SEG; diff --git a/minimap.h b/minimap.h index 47ae44f..c0a645b 100644 --- a/minimap.h +++ b/minimap.h @@ -16,6 +16,7 @@ #define MM_F_SPLICE_FOR 0x100 // match GT-AG #define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG #define MM_F_NO_LJOIN 0x400 +#define MM_F_CS_NO_EQUAL 0x800 #define MM_F_SR 0x1000 #define MM_F_MULTI_SEG 0x2000 #define MM_F_NO_PRINT_2ND 0x4000