Merge branch 'master' into master_fixes
Conflicts: Makefile
This commit is contained in:
commit
9228e48efd
7
Makefile
7
Makefile
|
|
@ -1,8 +1,7 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -O2 -msse2
|
||||
CXXFLAGS= $(CFLAGS)
|
||||
CFLAGS= -g -Wall -O2
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
||||
DFLAGS= -DHAVE_PTHREAD
|
||||
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
|
||||
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
is.o bwtindex.o bwape.o kopen.o pemerge.o \
|
||||
|
|
@ -17,8 +16,6 @@ SUBDIRS= .
|
|||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
||||
.cc.o:
|
||||
$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
|
||||
|
||||
all:$(PROG)
|
||||
|
||||
|
|
|
|||
60
NEWS
60
NEWS
|
|
@ -1,3 +1,49 @@
|
|||
Release 0.7.2 (9 March, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Emergent bug fix: 0.7.0 and 0.7.1 give a wrong sign to TLEN. In addition,
|
||||
flagging `properly paired' also gets improved a little.
|
||||
|
||||
(0.7.2: 9 March 2013, r351)
|
||||
|
||||
|
||||
|
||||
Release 0.7.1 (8 March, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Changes to BWA-MEM:
|
||||
|
||||
* Bugfix: rare segmentation fault caused by a partial hit to the end of the
|
||||
last sequence.
|
||||
|
||||
* Bugfix: occasional mis-pairing given an interleaved fastq.
|
||||
|
||||
* Bugfix: wrong mate information when the mate is unmapped. SAM generated by
|
||||
BWA-MEM can now be validated with Picard.
|
||||
|
||||
* Improved the performance and accuracy for ultra-long query sequences.
|
||||
Short-read alignment is not affected.
|
||||
|
||||
Changes to other components:
|
||||
|
||||
* In BWA-backtrack and BWA-SW, replaced the code for global alignment,
|
||||
Smith-Waterman and SW extension. The performance and accuracy of the two
|
||||
algorithms stay the same.
|
||||
|
||||
* Added an experimental subcommand to merge overlapping paired ends. The
|
||||
algorithm is very conservative: it may miss true overlaps but rarely makes
|
||||
mistakes.
|
||||
|
||||
An important note is that like BWA-SW, BWA-MEM may output multiple primary
|
||||
alignments for a read, which may cause problems to some tools. For aligning
|
||||
sequence reads, it is advised to use `-M' to flag extra hits as secondary. This
|
||||
option is not the default because multiple primary alignments are theoretically
|
||||
possible in sequence alignment.
|
||||
|
||||
(0.7.1: 8 March 2013, r347)
|
||||
|
||||
|
||||
|
||||
Beta Release 0.7.0 (28 Feburary, 2013)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
@ -16,14 +62,14 @@ In addition to the algorithmic improvements, BWA-MEM also implements a few
|
|||
handy features in practical aspects:
|
||||
|
||||
1. BWA-MEM automatically switches between local and glocal (global wrt reads;
|
||||
local wrt reference) alignment. It reports the end-to-end glocal alignment
|
||||
if the glocal alignment is not much worse than the optimal local alignment.
|
||||
Glocal alignment reduces reference bias.
|
||||
local wrt reference) alignment. It reports the end-to-end glocal alignment
|
||||
if the glocal alignment is not much worse than the optimal local alignment.
|
||||
Glocal alignment reduces reference bias.
|
||||
|
||||
2. BWA-MEM automatically infers pair orientation from a batch of single-end
|
||||
alignments. It allows more than one orientations if there are sufficient
|
||||
supporting reads. This feature has not been tested on reads from Illumina
|
||||
jumping library yet. (EXPERIMENTAL)
|
||||
supporting reads. This feature has not been tested on reads from Illumina
|
||||
jumping library yet. (EXPERIMENTAL)
|
||||
|
||||
3. BWA-MEM optionally takes one interleaved fastq for paired-end mapping. It
|
||||
is possible to convert a name-sorted BAM to an interleaved fastq on the fly
|
||||
|
|
@ -37,8 +83,8 @@ handy features in practical aspects:
|
|||
files without replying on bash features.
|
||||
|
||||
6. BWA-MEM provides a few basic APIs for single-end mapping. The `example.c'
|
||||
program in the source code directory implements a full single-end mapper in
|
||||
50 lines of code.
|
||||
program in the source code directory implements a full single-end mapper in
|
||||
50 lines of code.
|
||||
|
||||
The BWA-MEM algorithm is in the beta phase. It is not advised to use BWA-MEM
|
||||
for production use yet. However, when the implementation becomes stable after a
|
||||
|
|
|
|||
4
bwa.1
4
bwa.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH bwa 1 "10 March 2013" "bwa-0.7.1" "Bioinformatics tools"
|
||||
.TH bwa 1 "9 March 2013" "bwa-0.7.2" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
bwa - Burrows-Wheeler Alignment Tool
|
||||
|
|
@ -235,7 +235,7 @@ attached to every read in the output. An example is '@RG\\tID:foo\\tSM:bar'.
|
|||
.BI -T \ INT
|
||||
Don't output alignment with score lower than
|
||||
.IR INT .
|
||||
This option only affects output. [30]
|
||||
This option affects output and occasionally SAM flag 2. [30]
|
||||
.TP
|
||||
.B -a
|
||||
Output all found alignments for single-end or unpaired paired-end reads. These
|
||||
|
|
|
|||
4
bwa.c
4
bwa.c
|
|
@ -75,9 +75,9 @@ void bwa_fill_scmat(int a, int b, int8_t mat[25])
|
|||
for (i = k = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
mat[k++] = i == j? a : -b;
|
||||
mat[k++] = 0; // ambiguous base
|
||||
mat[k++] = -1; // ambiguous base
|
||||
}
|
||||
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||
for (j = 0; j < 5; ++j) mat[k++] = -1;
|
||||
}
|
||||
|
||||
// Generate CIGAR when the alignment end points are known
|
||||
|
|
|
|||
16
bwamem.c
16
bwamem.c
|
|
@ -612,7 +612,7 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
|
|||
#define is_mapped(x) ((x)->rb >= 0 && (x)->rb < (x)->re && (x)->re <= bns->l_pac<<1)
|
||||
int score, n_cigar, is_rev = 0, rid, mid, copy_mate = 0, NM = -1;
|
||||
uint32_t *cigar = 0;
|
||||
int64_t pos;
|
||||
int64_t pos = -1;
|
||||
bwahit_t ptmp, *p = &ptmp;
|
||||
|
||||
if (!p_) { // in this case, generate an unmapped alignment
|
||||
|
|
@ -628,6 +628,7 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
|
|||
}
|
||||
p->flag |= p->rb >= bns->l_pac? 0x10 : 0; // is reverse strand
|
||||
p->flag |= m && m->rb >= bns->l_pac? 0x20 : 0; // is mate on reverse strand
|
||||
if (is_mapped(p) && m && !is_mapped(m) && (p->flag&0x10)) p->flag |= 0x20; // if mate is unmapped, it takes the strand of the current read
|
||||
kputs(s->name, str); kputc('\t', str);
|
||||
if (is_mapped(p)) { // has a coordinate, no matter whether it is mapped or copied from the mate
|
||||
int sam_flag = p->flag&0xff; // the flag that will be outputed to SAM; it is not always the same as p->flag
|
||||
|
|
@ -670,9 +671,13 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
|
|||
if (mid == rid) {
|
||||
int64_t p0 = p->rb < bns->l_pac? p->rb : (bns->l_pac<<1) - 1 - p->rb;
|
||||
int64_t p1 = m->rb < bns->l_pac? m->rb : (bns->l_pac<<1) - 1 - m->rb;
|
||||
kputw(p0 - p1 + (p0 > p1? 1 : -1), str);
|
||||
kputw(-(p0 - p1 + (p0 > p1? 1 : p0 < p1? -1 : 0)), str);
|
||||
} else kputw(0, str);
|
||||
kputc('\t', str);
|
||||
} else if (m && is_mapped(p)) { // then copy the position
|
||||
kputsn("\t=\t", 3, str);
|
||||
kputuw(pos - bns->anns[rid].offset + 1, str);
|
||||
kputsn("\t0\t", 3, str);
|
||||
} else kputsn("\t*\t0\t0\t", 7, str);
|
||||
if (p->flag&0x100) { // for secondary alignments, don't write SEQ and QUAL
|
||||
kputsn("*\t*", 3, str);
|
||||
|
|
@ -760,7 +765,12 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
|||
else if (h.qual > mapq0) h.qual = mapq0;
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, p->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
||||
}
|
||||
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP, m);
|
||||
} else {
|
||||
bwahit_t h;
|
||||
memset(&h, 0, sizeof(bwahit_t));
|
||||
h.rb = h.re = -1; h.flag = extra_flag;
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
||||
}
|
||||
s->sam = str.s;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
// check if an end has multiple hits even after mate-SW
|
||||
for (i = 0; i < 2; ++i) {
|
||||
for (j = 1; j < a[i].n; ++j)
|
||||
if (a[i].a[j].secondary < 0) break;
|
||||
if (a[i].a[j].secondary < 0 && a[i].a[j].score >= opt->T) break;
|
||||
is_multi[i] = j < a[i].n? 1 : 0;
|
||||
}
|
||||
if (is_multi[0] || is_multi[1]) goto no_pairing; // TODO: in rare cases, the true hit may be long but with low score
|
||||
|
|
@ -306,7 +306,11 @@ no_pairing:
|
|||
if (a[i].n && a[i].a[0].score >= opt->T) {
|
||||
mem_alnreg2hit(&a[i].a[0], &h[i]);
|
||||
bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)s[i].seq, &h[i].qb, &h[i].qe, &h[i].rb, &h[i].re);
|
||||
} else h[i].rb = h[i].re = -1;
|
||||
} else {
|
||||
memset(&h[i], 0, sizeof(bwahit_t));
|
||||
h[i].rb = h[i].re = -1;
|
||||
h[i].flag = 1<<(6+i) | 1;
|
||||
}
|
||||
}
|
||||
mem_sam_se(opt, bns, pac, &s[0], &a[0], 0x41, &h[1]);
|
||||
mem_sam_se(opt, bns, pac, &s[1], &a[1], 0x81, &h[0]);
|
||||
|
|
|
|||
1
bwt.h
1
bwt.h
|
|
@ -29,6 +29,7 @@
|
|||
#define BWA_BWT_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line because some part of the code assume OCC_INTERVAL=0x80
|
||||
#define OCC_INTV_SHIFT 7
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq,
|
|||
for (k = p->k, j = 0; k < p->k + lt && k < l_pac; ++k)
|
||||
target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3;
|
||||
lt = j;
|
||||
score = ksw_extend(lq - p->beg, &query[p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, -1, 1, &qle, &tle, 0, 0, 0);
|
||||
score = ksw_extend(lq - p->beg, &query[p->beg], lt, target, 5, mat, opt->q, opt->r, opt->bw, -1, 1, &qle, &tle, 0, 0, 0) - 1;
|
||||
// if (score < p->G) fprintf(stderr, "[bsw2_extend_hits] %d < %d\n", score, p->G);
|
||||
if (score >= p->G) {
|
||||
p->G = score;
|
||||
|
|
|
|||
Loading…
Reference in New Issue