Merge branch 'master' into master_fixes
This commit is contained in:
commit
c862a1a396
41
NEWS
41
NEWS
|
|
@ -1,3 +1,44 @@
|
|||
Release 0.7.3a (15 March, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In 0.7.3, the wrong CIGAR bug was only fixed in one scenario, but not fixed
|
||||
in another corner case.
|
||||
|
||||
(0.7.3a: 15 March 2013, r367)
|
||||
|
||||
|
||||
|
||||
Release 0.7.3 (15 March, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Changes to BWA-MEM:
|
||||
|
||||
* Bugfix: pairing score is inaccurate when option -A does not take the default
|
||||
value. This is a very minor issue even if it happens.
|
||||
|
||||
* Bugfix: occasionally wrong CIGAR. This happens when in the alignment there
|
||||
is a 1bp deletion and a 1bp insertion which are close to the end of the
|
||||
reads, and there are no other substitutions or indels. BWA-MEM would not do
|
||||
a gapped alignment due to the bug.
|
||||
|
||||
* New feature: output other non-overlapping alignments in the XP tag such that
|
||||
we can see the entire picture of alignment from one SAM line. XP gives the
|
||||
position, CIGAR, NM and mapQ of each aligned subsequence of the query.
|
||||
|
||||
BWA-MEM has been used to align ~300Gbp 100-700bp SE/PE reads. SNP/indel calling
|
||||
has also been evaluated on part of these data. BWA-MEM generally gives better
|
||||
pre-filtered SNP calls than BWA. No significant issues have been observed since
|
||||
0.7.2, though minor improvements or bugs (e.g. the bug fixed in this release)
|
||||
are still possible. If you find potential issues, please send bug reports to
|
||||
<bio-bwa-help@lists.sourceforge.net> (free registration required).
|
||||
|
||||
In addition, more detailed description of the BWA-MEM algorithm can be found at
|
||||
<https://github.com/lh3/mem-paper>.
|
||||
|
||||
(0.7.3: 15 March 2013, r366)
|
||||
|
||||
|
||||
|
||||
Release 0.7.2 (9 March, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
|||
4
bwa.1
4
bwa.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH bwa 1 "13 March 2013" "bwa-0.7.3" "Bioinformatics tools"
|
||||
.TH bwa 1 "15 March 2013" "bwa-0.7.3a" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
bwa - Burrows-Wheeler Alignment Tool
|
||||
|
|
@ -580,7 +580,7 @@ XS Suboptimal alignment score
|
|||
XF Support from forward/reverse alignment
|
||||
XE Number of supporting seeds
|
||||
_
|
||||
XP Alt primary hits; format: /(chr,pos,CIGAR;mapQ,NM;)+/
|
||||
XP Alt primary hits; format: /(chr,pos,CIGAR,mapQ,NM;)+/
|
||||
.TE
|
||||
|
||||
.PP
|
||||
|
|
|
|||
30
bwamem.c
30
bwamem.c
|
|
@ -542,7 +542,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
a = kv_pushp(mem_alnreg_t, *av);
|
||||
memset(a, 0, sizeof(mem_alnreg_t));
|
||||
a->w = aw[0] = aw[1] = opt->w;
|
||||
a->score = -1;
|
||||
a->score = a->truesc = -1;
|
||||
|
||||
if (s->qbeg) { // left extension
|
||||
uint8_t *rs, *qs;
|
||||
|
|
@ -556,14 +556,19 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
int prev = a->score;
|
||||
aw[0] = opt->w << i;
|
||||
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout);
|
||||
if (bwa_verbose >= 4) { printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||
}
|
||||
// check whether we prefer to reach the end of the query
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; // local hits
|
||||
else a->qb = 0, a->rb = s->rbeg - gtle; // reach the end
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
|
||||
a->qb = s->qbeg - qle, a->rb = s->rbeg - tle;
|
||||
a->truesc = a->score;
|
||||
} else { // to-end extension
|
||||
a->qb = 0, a->rb = s->rbeg - gtle;
|
||||
a->truesc = gscore;
|
||||
}
|
||||
free(qs); free(rs);
|
||||
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||
|
||||
if (s->qbeg + s->len != l_query) { // right extension
|
||||
int qle, tle, qe, re, gtle, gscore, sc0 = a->score;
|
||||
|
|
@ -574,12 +579,17 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
int prev = a->score;
|
||||
aw[1] = opt->w << i;
|
||||
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout);
|
||||
if (bwa_verbose >= 4) { printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||
}
|
||||
// similar to the above
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qe = qe + qle, a->re = rmax[0] + re + tle;
|
||||
else a->qe = l_query, a->re = rmax[0] + re + gtle;
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension
|
||||
a->qe = qe + qle, a->re = rmax[0] + re + tle;
|
||||
a->truesc += a->score - sc0;
|
||||
} else { // to-end extension
|
||||
a->qe = l_query, a->re = rmax[0] + re + gtle;
|
||||
a->truesc += gscore - sc0;
|
||||
}
|
||||
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
||||
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
|
||||
|
||||
|
|
@ -601,7 +611,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
static inline int infer_bw(int l1, int l2, int score, int a, int q, int r)
|
||||
{
|
||||
int w;
|
||||
if (l1 == l2 && l1 * a - score < (q + r)<<1) return 0; // to get equal alignment length, we need at least two gaps
|
||||
if (l1 == l2 && l1 * a - score < (q + r - a)<<1) return 0; // to get equal alignment length, we need at least two gaps
|
||||
w = ((double)((l1 < l2? l1 : l2) * a - score - q) / r + 1.);
|
||||
if (w < abs(l1 - l2)) w = abs(l1 - l2);
|
||||
return w;
|
||||
|
|
@ -839,7 +849,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
|||
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
|
||||
if (ar->secondary >= 0) a.flag |= 0x20000;
|
||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re);
|
||||
w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r);
|
||||
w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->q, opt->r);
|
||||
w2 = w2 < opt->w? w2 : opt->w;
|
||||
a.cigar = bwa_gen_cigar(opt->mat, opt->q, opt->r, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
|
||||
a.NM = NM;
|
||||
|
|
|
|||
3
bwamem.h
3
bwamem.h
|
|
@ -43,7 +43,8 @@ typedef struct {
|
|||
typedef struct {
|
||||
int64_t rb, re; // [rb,re): reference sequence in the alignment
|
||||
int qb, qe; // [qb,qe): query sequence in the alignment
|
||||
int score; // best SW score
|
||||
int score; // best local SW score
|
||||
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
|
||||
int sub; // 2nd best SW score
|
||||
int csub; // SW score of a tandem hit
|
||||
int sub_n; // approximate number of suboptimal hits
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
if (dist > pes[dir].high) break;
|
||||
if (dist < pes[dir].low) continue;
|
||||
ns = (dist - pes[dir].avg) / pes[dir].std;
|
||||
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) + .499); // .721 = 1/log(4)
|
||||
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) * opt->a + .499); // .721 = 1/log(4)
|
||||
if (q < 0) q = 0;
|
||||
p = kv_pushp(pair64_t, u);
|
||||
p->y = (uint64_t)k<<32 | i;
|
||||
|
|
|
|||
Loading…
Reference in New Issue