An option to output SEQ field for secondary alignment (#687)

* a new option --secondary-seq to output SEQ field for secondary alignments

* comments removed

* Fixed a conflict in #687

---------

Co-authored-by: Heng Li <lh3@me.com>
This commit is contained in:
Mikhail Kolmogorov 2023-04-21 08:06:13 -07:00 committed by GitHub
parent fc24c8a348
commit 704fbc6f5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 4 deletions

View File

@ -370,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
clip_len[0] = r->rev? qlen - r->qe : r->qs;
clip_len[1] = r->rev? r->qs : qlen - r->qe;
if (in_tag) {
int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4;
int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) &&
!(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
mm_sprintf_lite(s, "\tCG:B:I");
if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
} else {
int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) &&
!(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
assert(clip_len[0] < qlen && clip_len[1] < qlen);
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
for (k = 0; k < r->p->n_cigar; ++k)
@ -452,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
if (cigar_in_tag) {
int slen;
if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
else if (flag & 0x100) slen = 0;
else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
else slen = r->qe - r->qs;
mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
} else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
@ -493,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
mm_sprintf_lite(s, "\t");
if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
else mm_sprintf_lite(s, "*");
} else if (flag & 0x100) {
} else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
mm_sprintf_lite(s, "*\t*");
} else {
sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);

2
main.c
View File

@ -76,6 +76,7 @@ static ko_longopt_t long_options[] = {
{ "chain-skip-scale",ko_required_argument,351 },
{ "print-chains", ko_no_argument, 352 },
{ "no-hash-name", ko_no_argument, 353 },
{ "secondary-seq", ko_no_argument, 354 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@ -241,6 +242,7 @@ int main(int argc, char *argv[])
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
else if (c == 354) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
else if (c == 330) {
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
} else if (c == 314) { // --frag

View File

@ -43,6 +43,7 @@
#define MM_F_NO_INV (0x200000000LL)
#define MM_F_NO_HASH_NAME (0x400000000LL)
#define MM_F_SPLICE_OLD (0x800000000LL)
#define MM_F_SECONDARY_SEQ (0x1000000000LL) //output SEQ field for seqondary alignments using hard clipping
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2