From 704fbc6f5ced15bc8891bba2b89f1cb607c7054b Mon Sep 17 00:00:00 2001 From: Mikhail Kolmogorov Date: Fri, 21 Apr 2023 08:06:13 -0700 Subject: [PATCH] An option to output SEQ field for secondary alignment (#687) * a new option --secondary-seq to output SEQ field for secondary alignments * comments removed * Fixed a conflict in #687 --------- Co-authored-by: Heng Li --- format.c | 10 ++++++---- main.c | 2 ++ minimap.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/format.c b/format.c index e588814..f2d4899 100644 --- a/format.c +++ b/format.c @@ -370,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co clip_len[0] = r->rev? qlen - r->qe : r->qs; clip_len[1] = r->rev? r->qs : qlen - r->qe; if (in_tag) { - int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4; + int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) && + !(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4; mm_sprintf_lite(s, "\tCG:B:I"); if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char); for (k = 0; k < r->p->n_cigar; ++k) mm_sprintf_lite(s, ",%u", r->p->cigar[k]); if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char); } else { - int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S'; + int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) && + !(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S'; assert(clip_len[0] < qlen && clip_len[1] < qlen); if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char); for (k = 0; k < r->p->n_cigar; ++k) @@ -452,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se if (cigar_in_tag) { int slen; if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq; - else if (flag & 0x100) slen = 0; + else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0; else slen = r->qe - r->qs; mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs); } else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag); @@ -493,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se mm_sprintf_lite(s, "\t"); if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0); else mm_sprintf_lite(s, "*"); - } else if (flag & 0x100) { + } else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){ mm_sprintf_lite(s, "*\t*"); } else { sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev); diff --git a/main.c b/main.c index b68381a..31d874c 100644 --- a/main.c +++ b/main.c @@ -76,6 +76,7 @@ static ko_longopt_t long_options[] = { { "chain-skip-scale",ko_required_argument,351 }, { "print-chains", ko_no_argument, 352 }, { "no-hash-name", ko_no_argument, 353 }, + { "secondary-seq", ko_no_argument, 354 }, { "help", ko_no_argument, 'h' }, { "max-intron-len", ko_required_argument, 'G' }, { "version", ko_no_argument, 'V' }, @@ -241,6 +242,7 @@ int main(int argc, char *argv[]) else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name + else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq else if (c == 330) { fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n"); } else if (c == 314) { // --frag diff --git a/minimap.h b/minimap.h index 34c6c54..0e22792 100644 --- a/minimap.h +++ b/minimap.h @@ -43,6 +43,7 @@ #define MM_F_NO_INV (0x200000000LL) #define MM_F_NO_HASH_NAME (0x400000000LL) #define MM_F_SPLICE_OLD (0x800000000LL) +#define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping #define MM_I_HPC 0x1 #define MM_I_NO_SEQ 0x2