From 1cb0bf4befc52735898a234b74d38fde6d79b940 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 9 Nov 2017 17:14:10 +0100 Subject: [PATCH] Implement -Y for soft clipping of supp. alignments I tried to base this on bwa-mem and it seems to work for sam alignments. --- format.c | 15 ++++++++++----- main.c | 2 ++ minimap.h | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/format.c b/format.c index 2425c05..c71cfea 100644 --- a/format.c +++ b/format.c @@ -257,7 +257,7 @@ static inline const mm_reg1_t *get_sam_pri(int n_regs, const mm_reg1_t *regs) return NULL; } -static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, const mm_reg1_t *r) +static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, const mm_reg1_t *r, int opt_flag) { if (r->p == 0) { mm_sprintf_lite(s, "*"); @@ -266,14 +266,14 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co clip_len[0] = r->rev? qlen - r->qe : r->qs; clip_len[1] = r->rev? r->qs : qlen - r->qe; if (in_tag) { - int clip_char = (sam_flag&0x800)? 5 : 4; + int clip_char = (sam_flag&0x800 && !(opt_flag&MM_F_SOFTCLIP))? 5 : 4; mm_sprintf_lite(s, "\tCG:B:I"); if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char); for (k = 0; k < r->p->n_cigar; ++k) mm_sprintf_lite(s, ",%u", r->p->cigar[k]); if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char); } else { - int clip_char = (sam_flag&0x800)? 'H' : 'S'; + int clip_char = (sam_flag&0x800 && !(opt_flag&MM_F_SOFTCLIP))? 'H' : 'S'; if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char); for (k = 0; k < r->p->n_cigar; ++k) mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDN"[r->p->cigar[k]&0xf]); @@ -348,7 +348,7 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se if (flag & 0x100) mm_sprintf_lite(s, "0S"); // secondary alignment else if (flag & 0x800) mm_sprintf_lite(s, "%dS", r->re - r->rs); // supplementary alignment else mm_sprintf_lite(s, "%dS", t->l_seq); - } else write_sam_cigar(s, flag, 0, t->l_seq, r); + } else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag); } // write mate positions @@ -388,6 +388,11 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se else mm_sprintf_lite(s, "*"); } else if (flag & 0x100) { mm_sprintf_lite(s, "*\t*"); + } else if (opt_flag & MM_F_SOFTCLIP) { + sam_write_sq(s, t->seq, t->l_seq, r->rev, r->rev); + mm_sprintf_lite(s, "\t"); + if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0); + else mm_sprintf_lite(s, "*"); } else { sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev); mm_sprintf_lite(s, "\t"); @@ -429,7 +434,7 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se if (r->p && (opt_flag & MM_F_OUT_CS)) write_cs(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG)); if (cigar_in_tag) - write_sam_cigar(s, flag, 1, t->l_seq, r); + write_sam_cigar(s, flag, 1, t->l_seq, r, opt_flag); } s->s[s->l] = 0; // we always have room for an extra byte (see str_enlarge) diff --git a/main.c b/main.c index 9bc5bed..d7f4132 100644 --- a/main.c +++ b/main.c @@ -109,6 +109,7 @@ int main(int argc, char *argv[]) else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR; else if (c == 'Q') opt.flag |= MM_F_NO_QUAL; + else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP; else if (c == 'L') opt.flag |= MM_F_LONG_CIGAR; else if (c == 'T') opt.sdust_thres = atoi(optarg); else if (c == 'n') opt.min_cnt = atoi(optarg); @@ -232,6 +233,7 @@ int main(int argc, char *argv[]) fprintf(fp_help, " -R STR SAM read group line in a format like '@RG\\tID:foo\\tSM:bar' []\n"); fprintf(fp_help, " -c output CIGAR in PAF\n"); fprintf(fp_help, " --cs[=STR] output the cs tag; STR is 'short' (if absent) or 'long' [none]\n"); + fprintf(fp_help, " -Y use soft clipping for supplementary alignments\n"); fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads); fprintf(fp_help, " -K NUM minibatch size for mapping [500M]\n"); // fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose); diff --git a/minimap.h b/minimap.h index a7e375c..0eb279f 100644 --- a/minimap.h +++ b/minimap.h @@ -24,6 +24,7 @@ #define MM_F_LONG_CIGAR 0x10000 #define MM_F_INDEPEND_SEG 0x20000 #define MM_F_SPLICE_FLANK 0x40000 +#define MM_F_SOFTCLIP 0x80000 #define MM_IDX_MAGIC "MMI\2"