Merge branch 'master' into master_fixes
This commit is contained in:
commit
c862a1a396
41
NEWS
41
NEWS
|
|
@ -1,3 +1,44 @@
|
||||||
|
Release 0.7.3a (15 March, 2013)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
In 0.7.3, the wrong CIGAR bug was only fixed in one scenario, but not fixed
|
||||||
|
in another corner case.
|
||||||
|
|
||||||
|
(0.7.3a: 15 March 2013, r367)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Release 0.7.3 (15 March, 2013)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Changes to BWA-MEM:
|
||||||
|
|
||||||
|
* Bugfix: pairing score is inaccurate when option -A does not take the default
|
||||||
|
value. This is a very minor issue even if it happens.
|
||||||
|
|
||||||
|
* Bugfix: occasionally wrong CIGAR. This happens when in the alignment there
|
||||||
|
is a 1bp deletion and a 1bp insertion which are close to the end of the
|
||||||
|
reads, and there are no other substitutions or indels. BWA-MEM would not do
|
||||||
|
a gapped alignment due to the bug.
|
||||||
|
|
||||||
|
* New feature: output other non-overlapping alignments in the XP tag such that
|
||||||
|
we can see the entire picture of alignment from one SAM line. XP gives the
|
||||||
|
position, CIGAR, NM and mapQ of each aligned subsequence of the query.
|
||||||
|
|
||||||
|
BWA-MEM has been used to align ~300Gbp 100-700bp SE/PE reads. SNP/indel calling
|
||||||
|
has also been evaluated on part of these data. BWA-MEM generally gives better
|
||||||
|
pre-filtered SNP calls than BWA. No significant issues have been observed since
|
||||||
|
0.7.2, though minor improvements or bugs (e.g. the bug fixed in this release)
|
||||||
|
are still possible. If you find potential issues, please send bug reports to
|
||||||
|
<bio-bwa-help@lists.sourceforge.net> (free registration required).
|
||||||
|
|
||||||
|
In addition, more detailed description of the BWA-MEM algorithm can be found at
|
||||||
|
<https://github.com/lh3/mem-paper>.
|
||||||
|
|
||||||
|
(0.7.3: 15 March 2013, r366)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Release 0.7.2 (9 March, 2013)
|
Release 0.7.2 (9 March, 2013)
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|
|
||||||
4
bwa.1
4
bwa.1
|
|
@ -1,4 +1,4 @@
|
||||||
.TH bwa 1 "13 March 2013" "bwa-0.7.3" "Bioinformatics tools"
|
.TH bwa 1 "15 March 2013" "bwa-0.7.3a" "Bioinformatics tools"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.PP
|
.PP
|
||||||
bwa - Burrows-Wheeler Alignment Tool
|
bwa - Burrows-Wheeler Alignment Tool
|
||||||
|
|
@ -580,7 +580,7 @@ XS Suboptimal alignment score
|
||||||
XF Support from forward/reverse alignment
|
XF Support from forward/reverse alignment
|
||||||
XE Number of supporting seeds
|
XE Number of supporting seeds
|
||||||
_
|
_
|
||||||
XP Alt primary hits; format: /(chr,pos,CIGAR;mapQ,NM;)+/
|
XP Alt primary hits; format: /(chr,pos,CIGAR,mapQ,NM;)+/
|
||||||
.TE
|
.TE
|
||||||
|
|
||||||
.PP
|
.PP
|
||||||
|
|
|
||||||
30
bwamem.c
30
bwamem.c
|
|
@ -542,7 +542,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
a = kv_pushp(mem_alnreg_t, *av);
|
a = kv_pushp(mem_alnreg_t, *av);
|
||||||
memset(a, 0, sizeof(mem_alnreg_t));
|
memset(a, 0, sizeof(mem_alnreg_t));
|
||||||
a->w = aw[0] = aw[1] = opt->w;
|
a->w = aw[0] = aw[1] = opt->w;
|
||||||
a->score = -1;
|
a->score = a->truesc = -1;
|
||||||
|
|
||||||
if (s->qbeg) { // left extension
|
if (s->qbeg) { // left extension
|
||||||
uint8_t *rs, *qs;
|
uint8_t *rs, *qs;
|
||||||
|
|
@ -556,14 +556,19 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
int prev = a->score;
|
int prev = a->score;
|
||||||
aw[0] = opt->w << i;
|
aw[0] = opt->w << i;
|
||||||
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||||
if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout);
|
if (bwa_verbose >= 4) { printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||||
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||||
}
|
}
|
||||||
// check whether we prefer to reach the end of the query
|
// check whether we prefer to reach the end of the query
|
||||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; // local hits
|
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
|
||||||
else a->qb = 0, a->rb = s->rbeg - gtle; // reach the end
|
a->qb = s->qbeg - qle, a->rb = s->rbeg - tle;
|
||||||
|
a->truesc = a->score;
|
||||||
|
} else { // to-end extension
|
||||||
|
a->qb = 0, a->rb = s->rbeg - gtle;
|
||||||
|
a->truesc = gscore;
|
||||||
|
}
|
||||||
free(qs); free(rs);
|
free(qs); free(rs);
|
||||||
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||||
|
|
||||||
if (s->qbeg + s->len != l_query) { // right extension
|
if (s->qbeg + s->len != l_query) { // right extension
|
||||||
int qle, tle, qe, re, gtle, gscore, sc0 = a->score;
|
int qle, tle, qe, re, gtle, gscore, sc0 = a->score;
|
||||||
|
|
@ -574,12 +579,17 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
int prev = a->score;
|
int prev = a->score;
|
||||||
aw[1] = opt->w << i;
|
aw[1] = opt->w << i;
|
||||||
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||||
if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout);
|
if (bwa_verbose >= 4) { printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
||||||
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||||
}
|
}
|
||||||
// similar to the above
|
// similar to the above
|
||||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qe = qe + qle, a->re = rmax[0] + re + tle;
|
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
|
||||||
else a->qe = l_query, a->re = rmax[0] + re + gtle;
|
a->qe = qe + qle, a->re = rmax[0] + re + tle;
|
||||||
|
a->truesc += a->score - sc0;
|
||||||
|
} else { // to-end extension
|
||||||
|
a->qe = l_query, a->re = rmax[0] + re + gtle;
|
||||||
|
a->truesc += gscore - sc0;
|
||||||
|
}
|
||||||
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
||||||
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
|
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
|
||||||
|
|
||||||
|
|
@ -601,7 +611,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
||||||
static inline int infer_bw(int l1, int l2, int score, int a, int q, int r)
|
static inline int infer_bw(int l1, int l2, int score, int a, int q, int r)
|
||||||
{
|
{
|
||||||
int w;
|
int w;
|
||||||
if (l1 == l2 && l1 * a - score < (q + r)<<1) return 0; // to get equal alignment length, we need at least two gaps
|
if (l1 == l2 && l1 * a - score < (q + r - a)<<1) return 0; // to get equal alignment length, we need at least two gaps
|
||||||
w = ((double)((l1 < l2? l1 : l2) * a - score - q) / r + 1.);
|
w = ((double)((l1 < l2? l1 : l2) * a - score - q) / r + 1.);
|
||||||
if (w < abs(l1 - l2)) w = abs(l1 - l2);
|
if (w < abs(l1 - l2)) w = abs(l1 - l2);
|
||||||
return w;
|
return w;
|
||||||
|
|
@ -839,7 +849,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
||||||
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
|
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
|
||||||
if (ar->secondary >= 0) a.flag |= 0x20000;
|
if (ar->secondary >= 0) a.flag |= 0x20000;
|
||||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||||
w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r);
|
w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->q, opt->r);
|
||||||
w2 = w2 < opt->w? w2 : opt->w;
|
w2 = w2 < opt->w? w2 : opt->w;
|
||||||
a.cigar = bwa_gen_cigar(opt->mat, opt->q, opt->r, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
|
a.cigar = bwa_gen_cigar(opt->mat, opt->q, opt->r, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
|
||||||
a.NM = NM;
|
a.NM = NM;
|
||||||
|
|
|
||||||
3
bwamem.h
3
bwamem.h
|
|
@ -43,7 +43,8 @@ typedef struct {
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int64_t rb, re; // [rb,re): reference sequence in the alignment
|
int64_t rb, re; // [rb,re): reference sequence in the alignment
|
||||||
int qb, qe; // [qb,qe): query sequence in the alignment
|
int qb, qe; // [qb,qe): query sequence in the alignment
|
||||||
int score; // best SW score
|
int score; // best local SW score
|
||||||
|
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
|
||||||
int sub; // 2nd best SW score
|
int sub; // 2nd best SW score
|
||||||
int csub; // SW score of a tandem hit
|
int csub; // SW score of a tandem hit
|
||||||
int sub_n; // approximate number of suboptimal hits
|
int sub_n; // approximate number of suboptimal hits
|
||||||
|
|
|
||||||
|
|
@ -201,7 +201,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
||||||
if (dist > pes[dir].high) break;
|
if (dist > pes[dir].high) break;
|
||||||
if (dist < pes[dir].low) continue;
|
if (dist < pes[dir].low) continue;
|
||||||
ns = (dist - pes[dir].avg) / pes[dir].std;
|
ns = (dist - pes[dir].avg) / pes[dir].std;
|
||||||
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) + .499); // .721 = 1/log(4)
|
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4)
|
||||||
if (q < 0) q = 0;
|
if (q < 0) q = 0;
|
||||||
p = kv_pushp(pair64_t, u);
|
p = kv_pushp(pair64_t, u);
|
||||||
p->y = (uint64_t)k<<32 | i;
|
p->y = (uint64_t)k<<32 | i;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue