From 56026158d8e1a62b89672fc53aab8db967d47b9a Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Tue, 14 Dec 2021 08:02:05 -0700 Subject: [PATCH] Add the header line to the output SAM In particular, this defines the output SAM to be unsorted BUT also query grouped. The latter is very important to explicitly define so downstream tools that don't make assumptions know that reads from the same template are grouped. --- bwa.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bwa.c b/bwa.c index 8aacde3..104c95c 100644 --- a/bwa.c +++ b/bwa.c @@ -406,10 +406,17 @@ int bwa_idx2mem(bwaidx_t *idx) void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line) { - int i, n_SQ = 0; + int i, n_HD = 0, n_SQ = 0; extern char *bwa_pg; + if (hdr_line) { + // check for HD line const char *p = hdr_line; + if ((p = strstr(p, "@HD")) != 0) { + ++n_HD; + } + // check for SQ lines + p = hdr_line; while ((p = strstr(p, "@SQ\t")) != 0) { if (p == hdr_line || *(p-1) == '\n') ++n_SQ; p += 4; @@ -423,6 +430,9 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line) } } else if (n_SQ != bns->n_seqs && bwa_verbose >= 2) fprintf(stderr, "[W::%s] %d @SQ lines provided with -H; %d sequences in the index. Continue anyway.\n", __func__, n_SQ, bns->n_seqs); + if (n_HD == 0) { + err_printf("@HD\tVN:1.5\tSO:unsorted\tGO:query\n"); + } if (hdr_line) err_printf("%s\n", hdr_line); if (bwa_pg) err_printf("%s\n", bwa_pg); }