From 90518f11e391071c6d4552d7eeca41cf1d199441 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 16 Sep 2014 10:38:15 -0400 Subject: [PATCH 1/3] r843: presetting for ONT 2d reads Somewhat working for 1d reads, but not very well --- fastmap.c | 13 ++++++++++--- main.c | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fastmap.c b/fastmap.c index 6cdad68..57c54e7 100644 --- a/fastmap.c +++ b/fastmap.c @@ -156,6 +156,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired); fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0\n"); + fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0\n"); // fprintf(stderr, " pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n"); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); @@ -181,7 +182,7 @@ int main_mem(int argc, char *argv[]) } if (mode) { - if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { + if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) { if (!opt0.a) opt->a = 1, opt0.a = 1; update_a(opt, &opt0); if (!opt0.o_del) opt->o_del = 1; @@ -190,14 +191,20 @@ int main_mem(int argc, char *argv[]) if (!opt0.e_ins) opt->e_ins = 1; if (!opt0.b) opt->b = 1; if (opt0.split_factor == 0.) opt->split_factor = 10.; - if (!opt0.min_chain_weight) opt->min_chain_weight = 40; - if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { + if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well! opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG; + if (!opt0.min_chain_weight) opt->min_chain_weight = 40; if (!opt0.max_occ) opt->max_occ = 1000; if (!opt0.min_seed_len) opt->min_seed_len = 13; if (!opt0.max_chain_extend) opt->max_chain_extend = 25; if (opt0.drop_ratio == 0.) opt->drop_ratio = .001; + } else if (strcmp(mode, "ont2d") == 0) { + if (!opt0.min_chain_weight) opt->min_chain_weight = 20; + if (!opt0.min_seed_len) opt->min_seed_len = 14; + if (!opt0.pen_clip5) opt->pen_clip5 = 0; + if (!opt0.pen_clip3) opt->pen_clip3 = 0; } else { + if (!opt0.min_chain_weight) opt->min_chain_weight = 40; if (!opt0.min_seed_len) opt->min_seed_len = 17; if (!opt0.pen_clip5) opt->pen_clip5 = 0; if (!opt0.pen_clip3) opt->pen_clip3 = 0; diff --git a/main.c b/main.c index 0b09051..25ce9b4 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r830-dirty" +#define PACKAGE_VERSION "0.7.10-r843-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From 92bc6849a3855e5673d2d36b39f9be725b90380a Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 16 Sep 2014 10:53:07 -0400 Subject: [PATCH 2/3] r844: added intra-species contig mapping mode --- fastmap.c | 17 +++++++++++------ main.c | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fastmap.c b/fastmap.c index 57c54e7..546b2ea 100644 --- a/fastmap.c +++ b/fastmap.c @@ -155,8 +155,9 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired); fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); - fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0\n"); - fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0\n"); + fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n"); + fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n"); + fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n"); // fprintf(stderr, " pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n"); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); @@ -182,16 +183,20 @@ int main_mem(int argc, char *argv[]) } if (mode) { - if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) { - if (!opt0.a) opt->a = 1, opt0.a = 1; - update_a(opt, &opt0); + if (strcmp(mode, "intractg") == 0) { + if (!opt0.o_del) opt->o_del = 16; + if (!opt0.o_ins) opt->o_ins = 16; + if (!opt0.b) opt->b = 9; + if (!opt0.pen_clip5) opt->pen_clip5 = 5; + if (!opt0.pen_clip3) opt->pen_clip3 = 5; + } else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) { if (!opt0.o_del) opt->o_del = 1; if (!opt0.e_del) opt->e_del = 1; if (!opt0.o_ins) opt->o_ins = 1; if (!opt0.e_ins) opt->e_ins = 1; if (!opt0.b) opt->b = 1; if (opt0.split_factor == 0.) opt->split_factor = 10.; - if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well! + if (strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well! opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG; if (!opt0.min_chain_weight) opt->min_chain_weight = 40; if (!opt0.max_occ) opt->max_occ = 1000; diff --git a/main.c b/main.c index 25ce9b4..903d4ce 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r843-dirty" +#define PACKAGE_VERSION "0.7.10-r844-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From a458442b241dc70ed5223be36c8f5c8f0c057644 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 16 Sep 2014 14:38:41 -0400 Subject: [PATCH 3/3] r845: updated NEWS I will use the new version for a while and then release it. --- NEWS.md | 41 +++++++++++++++++++++++++++++++++++++++++ main.c | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 33a4760..cc7946c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,44 @@ +Release 0.7.11 (16 September, 2014) +----------------------------------- + +A major change to BWA-MEM is the support of mapping to ALT contigs. To use this +feature, users need to manually create a file ".alt" with each line +giving the name of an ALT contig. In mapping, BWA-MEM considers all chromosomes +and contigs equally when it finds seeds, constructs chains, extends seeds and +derives the final alignments. It also uses all hits for the estimation of +mapping quality of ALT hits. However, BWA-MEM ignores ALT hits when it +estimates the mapping quality of hits to the primary assembly. As a result, +having ALT contigs almost has not effect on alignments to the primary assembly +(seeding may be affected in rare corner cases). At the same time, users may +get a primary alignment to ALT contigs (no 0x800 flag) if there are no good +hits to the primary assembly, or get a supplementary alignment to ALT contigs +if it is better than hits to the primary assembly. Since this release, it is +recommended to include ALT contigs. + +Users may consider to use ALT contigs from GRCh38. I am also constructing a +non-redundant and more complete set of sequences missing from GRCh38. + +Other notable changes to BWA-MEM: + + * Added option `-b` to `bwa index`. This option tunes the batch size used in + the construction of BWT. It is advised to use large `-b` for huge reference + sequences such as the *nt* database. + + * Optimized for PacBio data. This includes a change to the scoring based on a + mini-study done by Aaron Quinlan and a heuristic speedup. Further speedup is + possible, but needs more careful investigation. + + * Dropped PacBio read-to-read alignment for now. BWA-MEM is only good at + finding the best hit, not all hits. Option `-x pbread` is still available, + but not shown on the command line. + + * Added new pre-setting for Oxford Nanopore 2D reads. For small genomes, + though, LAST is still more sensitive. + +(0.7.11: 16 September 2014, r845) + + + Release 0.7.10 (13 July, 2014) ------------------------------ diff --git a/main.c b/main.c index 903d4ce..35e598b 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r844-dirty" +#define PACKAGE_VERSION "0.7.10-r845-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);