From 20dc9dd41add925d7770b27f9f0588afec5f519c Mon Sep 17 00:00:00 2001 From: John Marshall Date: Fri, 14 Jun 2013 13:57:22 +0100 Subject: [PATCH] Check that paired reads have the same QNAME This detects desynchronised input files, which occasionally happens due to user error or system failure. Checking the names just after printing them has no real performance implications because the strings are already in cache. (It might be better to check while reading the input, but that would be more complicated in the two-input-files case.) --- bwamem_pair.c | 2 ++ bwape.c | 1 + 2 files changed, 3 insertions(+) diff --git a/bwamem_pair.c b/bwamem_pair.c index 06aacff..c218925 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -301,6 +301,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0; mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s; + if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); free(h[1].cigar); } else goto no_pairing; return n; @@ -319,6 +320,7 @@ no_pairing: } mem_reg2sam_se(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]); mem_reg2sam_se(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]); + if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); free(h[1].cigar); return n; } diff --git a/bwape.c b/bwape.c index 08490e7..2c96e06 100644 --- a/bwape.c +++ b/bwape.c @@ -706,6 +706,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f } bwa_print_sam1(bns, p[0], p[1], opt.mode, opt.max_top2); bwa_print_sam1(bns, p[1], p[0], opt.mode, opt.max_top2); + if (strcmp(p[0]->name, p[1]->name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", p[0]->name, p[1]->name); } fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();