Merge branch 'dev'

Conflicts:
	main.c
This commit is contained in:
Heng Li 2014-09-16 14:39:33 -04:00
commit 945d39fb79
3 changed files with 60 additions and 7 deletions

41
NEWS.md
View File

@ -1,3 +1,44 @@
Release 0.7.11 (16 September, 2014)
-----------------------------------
A major change to BWA-MEM is the support of mapping to ALT contigs. To use this
feature, users need to manually create a file "<index>.alt" with each line
giving the name of an ALT contig. In mapping, BWA-MEM considers all chromosomes
and contigs equally when it finds seeds, constructs chains, extends seeds and
derives the final alignments. It also uses all hits for the estimation of
mapping quality of ALT hits. However, BWA-MEM ignores ALT hits when it
estimates the mapping quality of hits to the primary assembly. As a result,
having ALT contigs almost has not effect on alignments to the primary assembly
(seeding may be affected in rare corner cases). At the same time, users may
get a primary alignment to ALT contigs (no 0x800 flag) if there are no good
hits to the primary assembly, or get a supplementary alignment to ALT contigs
if it is better than hits to the primary assembly. Since this release, it is
recommended to include ALT contigs.
Users may consider to use ALT contigs from GRCh38. I am also constructing a
non-redundant and more complete set of sequences missing from GRCh38.
Other notable changes to BWA-MEM:
* Added option `-b` to `bwa index`. This option tunes the batch size used in
the construction of BWT. It is advised to use large `-b` for huge reference
sequences such as the *nt* database.
* Optimized for PacBio data. This includes a change to the scoring based on a
mini-study done by Aaron Quinlan and a heuristic speedup. Further speedup is
possible, but needs more careful investigation.
* Dropped PacBio read-to-read alignment for now. BWA-MEM is only good at
finding the best hit, not all hits. Option `-x pbread` is still available,
but not shown on the command line.
* Added new pre-setting for Oxford Nanopore 2D reads. For small genomes,
though, LAST is still more sensitive.
(0.7.11: 16 September 2014, r845)
Release 0.7.10 (13 July, 2014) Release 0.7.10 (13 July, 2014)
------------------------------ ------------------------------

View File

@ -155,7 +155,9 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3); fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3);
fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired);
fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n");
fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0\n"); fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n");
fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n");
fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n");
// fprintf(stderr, " pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n"); // fprintf(stderr, " pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n");
fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, "\nInput/output options:\n\n");
fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n");
@ -181,23 +183,33 @@ int main_mem(int argc, char *argv[])
} }
if (mode) { if (mode) {
if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { if (strcmp(mode, "intractg") == 0) {
if (!opt0.a) opt->a = 1, opt0.a = 1; if (!opt0.o_del) opt->o_del = 16;
update_a(opt, &opt0); if (!opt0.o_ins) opt->o_ins = 16;
if (!opt0.b) opt->b = 9;
if (!opt0.pen_clip5) opt->pen_clip5 = 5;
if (!opt0.pen_clip3) opt->pen_clip3 = 5;
} else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) {
if (!opt0.o_del) opt->o_del = 1; if (!opt0.o_del) opt->o_del = 1;
if (!opt0.e_del) opt->e_del = 1; if (!opt0.e_del) opt->e_del = 1;
if (!opt0.o_ins) opt->o_ins = 1; if (!opt0.o_ins) opt->o_ins = 1;
if (!opt0.e_ins) opt->e_ins = 1; if (!opt0.e_ins) opt->e_ins = 1;
if (!opt0.b) opt->b = 1; if (!opt0.b) opt->b = 1;
if (opt0.split_factor == 0.) opt->split_factor = 10.; if (opt0.split_factor == 0.) opt->split_factor = 10.;
if (!opt0.min_chain_weight) opt->min_chain_weight = 40; if (strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well!
if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) {
opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG; opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG;
if (!opt0.min_chain_weight) opt->min_chain_weight = 40;
if (!opt0.max_occ) opt->max_occ = 1000; if (!opt0.max_occ) opt->max_occ = 1000;
if (!opt0.min_seed_len) opt->min_seed_len = 13; if (!opt0.min_seed_len) opt->min_seed_len = 13;
if (!opt0.max_chain_extend) opt->max_chain_extend = 25; if (!opt0.max_chain_extend) opt->max_chain_extend = 25;
if (opt0.drop_ratio == 0.) opt->drop_ratio = .001; if (opt0.drop_ratio == 0.) opt->drop_ratio = .001;
} else if (strcmp(mode, "ont2d") == 0) {
if (!opt0.min_chain_weight) opt->min_chain_weight = 20;
if (!opt0.min_seed_len) opt->min_seed_len = 14;
if (!opt0.pen_clip5) opt->pen_clip5 = 0;
if (!opt0.pen_clip3) opt->pen_clip3 = 0;
} else { } else {
if (!opt0.min_chain_weight) opt->min_chain_weight = 40;
if (!opt0.min_seed_len) opt->min_seed_len = 17; if (!opt0.min_seed_len) opt->min_seed_len = 17;
if (!opt0.pen_clip5) opt->pen_clip5 = 0; if (!opt0.pen_clip5) opt->pen_clip5 = 0;
if (!opt0.pen_clip3) opt->pen_clip3 = 0; if (!opt0.pen_clip3) opt->pen_clip3 = 0;

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h" #include "utils.h"
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r837-dirty" #define PACKAGE_VERSION "0.7.10-r846-dirty"
#endif #endif
int bwa_fa2pac(int argc, char *argv[]); int bwa_fa2pac(int argc, char *argv[]);