From b9bbf2d3df55c7c5e65e13e2113de46dc19455b4 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 24 Dec 2014 00:49:50 -0500 Subject: [PATCH 1/4] r1036: PE ALT mapping incorrect No SA/XA tags! --- bwamem_pair.c | 77 ++++++++++++++++++++++++++------------------------- main.c | 2 +- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/bwamem_pair.c b/bwamem_pair.c index f8b5d46..9325abe 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -244,17 +244,6 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); -static inline void mem_reg2sam_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, kstring_t *str, bseq1_t *s, mem_alnreg_t *p, int extra_flag, const mem_aln_t *m) -{ // a simplified version of mem_reg2sam() - mem_aln_t t; - if (p->score < opt->T || p->secondary >= 0 || !p->is_alt) return; - t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p); - t.flag |= extra_flag; - t.flag |= (opt->flag&MEM_F_NO_MULTI)? 0x10000 : 0x800; - mem_aln2sam(opt, bns, str, s, 1, &t, 0, m); - free(t.cigar); -} - #define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499)) int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]) @@ -264,11 +253,14 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query); - int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2]; + int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2], n_aa[2]; kstring_t str; - mem_aln_t h[2]; + mem_aln_t h[2], g[2], aa[2][2]; str.l = str.m = 0; str.s = 0; + memset(h, 0, sizeof(mem_aln_t) * 2); + memset(g, 0, sizeof(mem_aln_t) * 2); + n_aa[0] = n_aa[1] = 0; if (!(opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment mem_alnreg_v b[2]; kv_init(b[0]); kv_init(b[1]); @@ -324,38 +316,47 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]); q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]); } - // suboptimal hits - if (!(opt->flag & MEM_F_ALL)) { - for (i = 0; i < 2; ++i) { - int k = a[i].a[z[i]].secondary_all; - if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT - assert(a[i].a[k].secondary_all < 0); - for (j = 0; j < a[i].n; ++j) - if (a[i].a[j].secondary_all == k || j == k) - a[i].a[j].secondary_all = z[i]; - a[i].a[z[i]].secondary_all = -1; - } - XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq); + for (i = 0; i < 2; ++i) { + int k = a[i].a[z[i]].secondary_all; + if (k >= 0 && k < n_pri[i]) { // switch secondary and primary if both of them are non-ALT + assert(a[i].a[k].secondary_all < 0); + for (j = 0; j < a[i].n; ++j) + if (a[i].a[j].secondary_all == k || j == k) + a[i].a[j].secondary_all = z[i]; + a[i].a[z[i]].secondary_all = -1; } + } + if (!(opt->flag & MEM_F_ALL)) { + for (i = 0; i < 2; ++i) + XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq); } else XA[0] = XA[1] = 0; // write SAM - h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0; - h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0; - mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); // write the primary hit for read1 - if (n_pri[0] < a[0].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[0], &a[0].a[n_pri[0]], 0x41|extra_flag, &h[1]); + for (i = 0; i < 2; ++i) { + h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]); + h[i].mapq = q_se[i]; + h[i].flag |= 0x40 | extra_flag; + h[i].XA = XA[i]? XA[i][z[i]] : 0; + aa[i][n_aa[i]++] = h[i]; + if (n_pri[i] < a[i].n) { // the read has ALT hits + g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[n_pri[i]]); + g[i].flag |= 0x40 | extra_flag; + g[i].XA = XA[i]? XA[i][n_pri[i]] : 0; + aa[i][n_aa[i]++] = g[i]; + } + } + for (i = 0; i < n_aa[0]; ++i) + mem_aln2sam(opt, bns, &str, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits s[0].sam = strdup(str.s); str.l = 0; - mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); // write the primary hit for read2 - if (n_pri[1] < a[1].n) mem_reg2sam_alt(opt, bns, pac, &str, &s[1], &a[1].a[n_pri[1]], 0x81|extra_flag, &h[0]); + for (i = 0; i < n_aa[1]; ++i) + mem_aln2sam(opt, bns, &str, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits s[1].sam = str.s; if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); - free(h[0].cigar); // h[0].XA will be freed in the following block - free(h[1].cigar); - // free XA + // free for (i = 0; i < 2; ++i) { - if (XA[i]) { - for (j = 0; j < a[i].n; ++j) free(XA[i][j]); - free(XA[i]); - } + free(h[i].cigar); free(g[i].cigar); + if (XA[i] == 0) continue; + for (j = 0; j < a[i].n; ++j) free(XA[i][j]); + free(XA[i]); } } else goto no_pairing; return n; diff --git a/main.c b/main.c index e157df6..c34328e 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.11-r1034" +#define PACKAGE_VERSION "0.7.11-r1036-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From 3ae8d4b80b6c10578fb76bb306765a0844c523a3 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 24 Dec 2014 00:55:33 -0500 Subject: [PATCH 2/4] r1037: wrong read number flag --- bwamem_pair.c | 4 ++-- main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwamem_pair.c b/bwamem_pair.c index 9325abe..a85c28a 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -334,12 +334,12 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co for (i = 0; i < 2; ++i) { h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]); h[i].mapq = q_se[i]; - h[i].flag |= 0x40 | extra_flag; + h[i].flag |= 0x40< Date: Wed, 24 Dec 2014 13:22:46 -0500 Subject: [PATCH 3/4] r1038: wrong 0x100|0x800 flags in PE mode --- bwamem_pair.c | 6 ++++-- main.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bwamem_pair.c b/bwamem_pair.c index a85c28a..7950736 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -338,8 +338,10 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co h[i].XA = XA[i]? XA[i][z[i]] : 0; aa[i][n_aa[i]++] = h[i]; if (n_pri[i] < a[i].n) { // the read has ALT hits - g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[n_pri[i]]); - g[i].flag |= 0x40<score < opt->T || p->secondary >= 0 || !p->is_alt) continue; + g[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, p); + g[i].flag |= 0x800 | 0x40< Date: Sun, 28 Dec 2014 15:44:59 -0500 Subject: [PATCH 4/4] Released 0.7.12 --- NEWS.md | 10 ++++++++++ bwa.1 | 2 +- main.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index fd42fa3..4692889 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +Release 0.7.12 (28 December 2014) +--------------------------------- + +This release fixed a bug in the pair-end mode when ALT contigs are present. It +leads to undercalling in regions overlapping ALT contigs. + +(0.7.12: 28 December 2014, r1039) + + + Release 0.7.11 (23 December, 2014) ---------------------------------- diff --git a/bwa.1 b/bwa.1 index 9b681b7..e30f382 100644 --- a/bwa.1 +++ b/bwa.1 @@ -1,4 +1,4 @@ -.TH bwa 1 "23 December 2014" "bwa-0.7.11-r1034" "Bioinformatics tools" +.TH bwa 1 "23 December 2014" "bwa-0.7.12-r1034" "Bioinformatics tools" .SH NAME .PP bwa - Burrows-Wheeler Alignment Tool diff --git a/main.c b/main.c index c29dc6b..9f776d4 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.11-r1038-dirty" +#define PACKAGE_VERSION "0.7.12-r1039" #endif int bwa_fa2pac(int argc, char *argv[]);