Merge branch 'master' into master_fixes

This commit is contained in:
Rob Davies 2013-03-18 13:35:12 +00:00
commit c862a1a396
6 changed files with 67 additions and 15 deletions

41
NEWS
View File

@ -1,3 +1,44 @@
Release 0.7.3a (15 March, 2013)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In 0.7.3, the wrong CIGAR bug was only fixed in one scenario, but not fixed
in another corner case.
(0.7.3a: 15 March 2013, r367)
Release 0.7.3 (15 March, 2013)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Changes to BWA-MEM:
* Bugfix: pairing score is inaccurate when option -A does not take the default
value. This is a very minor issue even if it happens.
* Bugfix: occasionally wrong CIGAR. This happens when in the alignment there
is a 1bp deletion and a 1bp insertion which are close to the end of the
reads, and there are no other substitutions or indels. BWA-MEM would not do
a gapped alignment due to the bug.
* New feature: output other non-overlapping alignments in the XP tag such that
we can see the entire picture of alignment from one SAM line. XP gives the
position, CIGAR, NM and mapQ of each aligned subsequence of the query.
BWA-MEM has been used to align ~300Gbp 100-700bp SE/PE reads. SNP/indel calling
has also been evaluated on part of these data. BWA-MEM generally gives better
pre-filtered SNP calls than BWA. No significant issues have been observed since
0.7.2, though minor improvements or bugs (e.g. the bug fixed in this release)
are still possible. If you find potential issues, please send bug reports to
<bio-bwa-help@lists.sourceforge.net> (free registration required).
In addition, more detailed description of the BWA-MEM algorithm can be found at
<https://github.com/lh3/mem-paper>.
(0.7.3: 15 March 2013, r366)
Release 0.7.2 (9 March, 2013)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

4
bwa.1
View File

@ -1,4 +1,4 @@
.TH bwa 1 "13 March 2013" "bwa-0.7.3" "Bioinformatics tools"
.TH bwa 1 "15 March 2013" "bwa-0.7.3a" "Bioinformatics tools"
.SH NAME
.PP
bwa - Burrows-Wheeler Alignment Tool
@ -580,7 +580,7 @@ XS Suboptimal alignment score
XF Support from forward/reverse alignment
XE Number of supporting seeds
_
XP Alt primary hits; format: /(chr,pos,CIGAR;mapQ,NM;)+/
XP Alt primary hits; format: /(chr,pos,CIGAR,mapQ,NM;)+/
.TE
.PP

View File

@ -542,7 +542,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
a = kv_pushp(mem_alnreg_t, *av);
memset(a, 0, sizeof(mem_alnreg_t));
a->w = aw[0] = aw[1] = opt->w;
a->score = -1;
a->score = a->truesc = -1;
if (s->qbeg) { // left extension
uint8_t *rs, *qs;
@ -556,14 +556,19 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
int prev = a->score;
aw[0] = opt->w << i;
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout);
if (bwa_verbose >= 4) { printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
}
// check whether we prefer to reach the end of the query
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; // local hits
else a->qb = 0, a->rb = s->rbeg - gtle; // reach the end
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
a->qb = s->qbeg - qle, a->rb = s->rbeg - tle;
a->truesc = a->score;
} else { // to-end extension
a->qb = 0, a->rb = s->rbeg - gtle;
a->truesc = gscore;
}
free(qs); free(rs);
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
if (s->qbeg + s->len != l_query) { // right extension
int qle, tle, qe, re, gtle, gscore, sc0 = a->score;
@ -574,12 +579,17 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
int prev = a->score;
aw[1] = opt->w << i;
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout);
if (bwa_verbose >= 4) { printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
}
// similar to the above
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qe = qe + qle, a->re = rmax[0] + re + tle;
else a->qe = l_query, a->re = rmax[0] + re + gtle;
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
a->qe = qe + qle, a->re = rmax[0] + re + tle;
a->truesc += a->score - sc0;
} else { // to-end extension
a->qe = l_query, a->re = rmax[0] + re + gtle;
a->truesc += gscore - sc0;
}
} else a->qe = l_query, a->re = s->rbeg + s->len;
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
@ -601,7 +611,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
static inline int infer_bw(int l1, int l2, int score, int a, int q, int r)
{
int w;
if (l1 == l2 && l1 * a - score < (q + r)<<1) return 0; // to get equal alignment length, we need at least two gaps
if (l1 == l2 && l1 * a - score < (q + r - a)<<1) return 0; // to get equal alignment length, we need at least two gaps
w = ((double)((l1 < l2? l1 : l2) * a - score - q) / r + 1.);
if (w < abs(l1 - l2)) w = abs(l1 - l2);
return w;
@ -839,7 +849,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
if (ar->secondary >= 0) a.flag |= 0x20000;
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r);
w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->q, opt->r);
w2 = w2 < opt->w? w2 : opt->w;
a.cigar = bwa_gen_cigar(opt->mat, opt->q, opt->r, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
a.NM = NM;

View File

@ -43,7 +43,8 @@ typedef struct {
typedef struct {
int64_t rb, re; // [rb,re): reference sequence in the alignment
int qb, qe; // [qb,qe): query sequence in the alignment
int score; // best SW score
int score; // best local SW score
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
int sub; // 2nd best SW score
int csub; // SW score of a tandem hit
int sub_n; // approximate number of suboptimal hits

View File

@ -201,7 +201,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
if (dist > pes[dir].high) break;
if (dist < pes[dir].low) continue;
ns = (dist - pes[dir].avg) / pes[dir].std;
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) + .499); // .721 = 1/log(4)
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4)
if (q < 0) q = 0;
p = kv_pushp(pair64_t, u);
p->y = (uint64_t)k<<32 | i;

2
main.c
View File

@ -3,7 +3,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.2-r363-beta"
#define PACKAGE_VERSION "0.7.3a-r367"
#endif
int bwa_fa2pac(int argc, char *argv[]);