diff --git a/Makefile b/Makefile index 60c2104..7ea708c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CC= gcc -#CC= clang --analyze +#CC= gcc +CC= clang --analyze CFLAGS= -g -Wall -Wno-unused-function -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar diff --git a/NEWS b/NEWS.md similarity index 91% rename from NEWS rename to NEWS.md index 20ef1ed..bbc8da1 100644 --- a/NEWS +++ b/NEWS.md @@ -1,13 +1,13 @@ -Release 0.7.9 (11 May, 2014) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Release 0.7.9 (19 May, 2014) +---------------------------- This release brings several major changes to BWA-MEM. Notably, BWA-MEM now -formally supports PacBio read-to-reference alignment and experimentally support +formally supports PacBio read-to-reference alignment and experimentally supports PacBio read-to-read alignment. BWA-MEM also runs faster at a minor cost of accuracy. The speedup is more significant when GRCh38 is in use. More specifically: - * Support PacBio subreads to reference alignment. Although older BWA-MEM works + * Support PacBio subread-to-reference alignment. Although older BWA-MEM works with PacBio data in principle, the resultant alignments are frequently fragmented. In this release, we fine tuned existing methods and introduced new heuristics to improve PacBio alignment. These changes are not used by @@ -22,8 +22,8 @@ specifically: * Support mapping to ALT contigs in GRCh38 (EXPERIMENTAL). We provide a script to postprocess hits in the XA tag to adjust the mapping quality and generate - new primary alignments to all overlapping ALT contigs. We would NOT - recommended this feature for production uses. + new primary alignments to all overlapping ALT contigs. We would *NOT* + recommend this feature for production uses. * Improved alignments to many short reference sequences. Older BWA-MEM may generate an alignment bridging two or more adjacent reference sequences. @@ -33,7 +33,7 @@ specifically: two or more reference sequences any more. * Reduced the maximum seed occurrence from 10000 to 500. Reduced the maximum - number of Smith-Waterman mate rescue from 100 to 50. Added a heuristic to + rounds of Smith-Waterman mate rescue from 100 to 50. Added a heuristic to lower the mapping quality if a read contains seeds with excessive occurrences. These changes make BWA-MEM faster at a minor cost of accuracy in highly repetitive regions. @@ -42,7 +42,7 @@ specifically: * Bugfix: incomplete alignment extension in corner cases. - * Bugfix: integer overflow when aligning low query sequences. + * Bugfix: integer overflow when aligning long query sequences. * Bugfix: chain score is not computed correctly (almost no practical effect) @@ -50,12 +50,17 @@ specifically: * Added FAQs to README -(0.7.9: 11 May 2014, r777) +Changes in BWA-backtrack: + + * Bugfix: a segmentation fault when an alignment stands out of the end of the + last chromosome. + +(0.7.9: 19 May 2014, r782) Release 0.7.8 (31 March, 2014) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------------ Changes in BWA-MEM: @@ -63,7 +68,7 @@ Changes in BWA-MEM: Short-read alignment is not affected. * Bugfix: unnecessarily large bandwidth used during global alignment, - which reduces the mapping speed by ~5% for short reads. Results are not + which reduces the mapping speed by -5% for short reads. Results are not affected. * Bugfix: when the matching score is not one, paired-end mapping quality is @@ -87,14 +92,14 @@ With the default setting, 0.7.8 and 0.7.7 gave identical output on one million Release 0.7.7 (25 Feburary, 2014) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------------- This release fixes incorrect MD tags in the BWA-MEM output. A note about short-read mapping to GRCh38. The new human reference genome GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are hard masked as they cannot be localized. These highly repetitive arrays make -BWA-MEM ~50% slower. If you are concerned with the performance of BWA-MEM, you +BWA-MEM -50% slower. If you are concerned with the performance of BWA-MEM, you may consider to use option "-c2000 -m50". On simulated data, this setting helps the performance at a very minor cost on accuracy. I may consider to change the default in future releases. @@ -104,7 +109,7 @@ default in future releases. Release 0.7.6 (31 Januaray, 2014) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------------- Changes in BWA-MEM: @@ -160,7 +165,7 @@ where BWA-SW may excel. Release 0.7.5a (30 May, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +----------------------------- Fixed a bug in BWA-backtrack which leads to off-by-one mapping errors in rare cases. @@ -170,7 +175,7 @@ cases. Release 0.7.5 (29 May, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------- Changes in all components: @@ -222,7 +227,7 @@ Thank you. Release 0.7.4 (23 April, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------------ This is a bugfix release. Most of bugs are considered to be minor which only occur very rarely. @@ -254,7 +259,7 @@ BWA-backtrack for short-read mapping. Release 0.7.3a (15 March, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------------- In 0.7.3, the wrong CIGAR bug was only fixed in one scenario, but not fixed in another corner case. @@ -264,7 +269,7 @@ in another corner case. Release 0.7.3 (15 March, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------------ Changes to BWA-MEM: @@ -280,7 +285,7 @@ Changes to BWA-MEM: we can see the entire picture of alignment from one SAM line. XP gives the position, CIGAR, NM and mapQ of each aligned subsequence of the query. -BWA-MEM has been used to align ~300Gbp 100-700bp SE/PE reads. SNP/indel calling +BWA-MEM has been used to align -300Gbp 100-700bp SE/PE reads. SNP/indel calling has also been evaluated on part of these data. BWA-MEM generally gives better pre-filtered SNP calls than BWA. No significant issues have been observed since 0.7.2, though minor improvements or bugs (e.g. the bug fixed in this release) @@ -295,17 +300,17 @@ In addition, more detailed description of the BWA-MEM algorithm can be found at Release 0.7.2 (9 March, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +----------------------------- Emergent bug fix: 0.7.0 and 0.7.1 give a wrong sign to TLEN. In addition, -flagging `properly paired' also gets improved a little. +flagging 'properly paired' also gets improved a little. (0.7.2: 9 March 2013, r351) Release 0.7.1 (8 March, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +----------------------------- Changes to BWA-MEM: @@ -332,7 +337,7 @@ Changes to other components: An important note is that like BWA-SW, BWA-MEM may output multiple primary alignments for a read, which may cause problems to some tools. For aligning -sequence reads, it is advised to use `-M' to flag extra hits as secondary. This +sequence reads, it is advised to use '-M' to flag extra hits as secondary. This option is not the default because multiple primary alignments are theoretically possible in sequence alignment. @@ -341,7 +346,7 @@ possible in sequence alignment. Beta Release 0.7.0 (28 Feburary, 2013) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-------------------------------------- This release comes with a new alignment algorithm, BWA-MEM, for 70bp-1Mbp query sequences. BWA-MEM essentially seeds alignments with a variant of the fastmap @@ -378,7 +383,7 @@ handy features in practical aspects: (bwa mem ref.fa 'kept == 0) free(c->seeds); else a[k++] = a[i]; } - n_chn = k; return k; } diff --git a/bwase.c b/bwase.c index 30f306e..cb912ec 100644 --- a/bwase.c +++ b/bwase.c @@ -559,7 +559,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f fprintf(stderr, "[bwa_aln_core] print alignments... "); for (i = 0; i < n_seqs; ++i) bwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2); - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); diff --git a/bwtaln.c b/bwtaln.c index 68d0274..20b01cd 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -206,7 +206,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt); #endif - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); fprintf(stderr, "[bwa_aln_core] write to the disk... "); @@ -215,7 +215,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) err_fwrite(&p->n_aln, 4, 1, stdout); if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } - fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); + fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); diff --git a/kbtree.h b/kbtree.h index 2b76953..0da101d 100644 --- a/kbtree.h +++ b/kbtree.h @@ -77,7 +77,7 @@ typedef struct { *top++ = (b)->root; \ while (top != stack) { \ x = *--top; \ - if (x->is_internal == 0) { free(x); continue; } \ + if (x == 0 || x->is_internal == 0) { free(x); continue; } \ for (i = 0; i <= x->n; ++i) \ if (__KB_PTR(b, x)[i]) { \ if (top - stack == max) { \ diff --git a/main.c b/main.c index 31b74ec..bddffd2 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8-r779-dirty" +#define PACKAGE_VERSION "0.7.8-r782" #endif int bwa_fa2pac(int argc, char *argv[]);