From c4ac1bee8804c3636d26638b4243973cb14cfa57 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 17 Oct 2014 17:02:26 -0400 Subject: [PATCH 01/13] added Travis CI integration --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cfc84c6..c842202 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +![Travis-CI](https://travis-ci.org/lh3/bwa.svg?branch=dev) ##Getting started git clone https://github.com/lh3/bwa.git From 059c8eb8995bd3cce492f09fb5f140cdeda7fb8b Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 17 Oct 2014 21:03:11 -0400 Subject: [PATCH 02/13] for coverity --- .travis.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.travis.yml b/.travis.yml index e386b27..910510c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,3 +3,19 @@ compiler: - gcc - clang script: make + +env: + global: + # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created + # via the "travis encrypt" command using the project repo's public key + - secure: "iZ1KIL71+t+XHQOaOkuRDDvtUoQtvMciKI+U8u9r20dz1C+sdlMNt39e0NBvfA5iLj2mo9QaTVNsf31OucfmykSkbB7adFWsYx/11BdVd2t2PYjb3n07GJjDSodfo/8l3rrglkcB8ewYK37JImvRBQuJVn6MmpU32eZweo73c5o=" + +addons: + coverity_scan: + project: + name: "lh3/bwa" + description: "Build submitted via Travis CI" + notification_email: lh3@me.com + build_command_prepend: "make clean" + build_command: "make" + branch_pattern: dev From f968b4b73d3a03e972a186637147c13460f72b9b Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 17 Oct 2014 21:28:44 -0400 Subject: [PATCH 03/13] added drone build --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c842202..e38e3de 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ ![Travis-CI](https://travis-ci.org/lh3/bwa.svg?branch=dev) +[![Build Status](https://drone.io/github.com/lh3/bwa/status.png)](https://drone.io/github.com/lh3/bwa/latest) ##Getting started git clone https://github.com/lh3/bwa.git From e65dc133f44f44a6bf486c52bb5add1e71ab5c01 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 17 Oct 2014 21:34:43 -0400 Subject: [PATCH 04/13] updated Travis CI link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e38e3de..f81715c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Travis-CI](https://travis-ci.org/lh3/bwa.svg?branch=dev) +[![Build Status](https://travis-ci.org/lh3/bwa.svg?branch=dev)](https://travis-ci.org/lh3/bwa) [![Build Status](https://drone.io/github.com/lh3/bwa/status.png)](https://drone.io/github.com/lh3/bwa/latest) ##Getting started From 6e136947212c66bd98dafd39333c5b365bde0308 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 17 Oct 2014 21:46:11 -0400 Subject: [PATCH 05/13] removed coverity --- .travis.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index 910510c..e386b27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,19 +3,3 @@ compiler: - gcc - clang script: make - -env: - global: - # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created - # via the "travis encrypt" command using the project repo's public key - - secure: "iZ1KIL71+t+XHQOaOkuRDDvtUoQtvMciKI+U8u9r20dz1C+sdlMNt39e0NBvfA5iLj2mo9QaTVNsf31OucfmykSkbB7adFWsYx/11BdVd2t2PYjb3n07GJjDSodfo/8l3rrglkcB8ewYK37JImvRBQuJVn6MmpU32eZweo73c5o=" - -addons: - coverity_scan: - project: - name: "lh3/bwa" - description: "Build submitted via Travis CI" - notification_email: lh3@me.com - build_command_prepend: "make clean" - build_command: "make" - branch_pattern: dev From 3370ae9e358a9fc75b12b44eb3fee746cfe0ebc8 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 19 Oct 2014 20:43:53 -0400 Subject: [PATCH 06/13] r926: prepare to move -g to bwa-postalt.js --- bwa-postalt.js | 11 +++++++---- bwamem.c | 2 +- main.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bwa-postalt.js b/bwa-postalt.js index 411cfdd..47c4847 100644 --- a/bwa-postalt.js +++ b/bwa-postalt.js @@ -490,13 +490,16 @@ function bwa_postalt(args) while ((m = re_cigar.exec(t[5])) != null) if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N') end += parseInt(m[1]); + var om = -1; + for (var j = 11; j < s.length; ++j) + if ((m = /^om:i:(\d+)/.exec(s[j])) != null) + om = parseInt(m[1]); if (start < l[3] && l[2] < end) { - var om = -1; - for (var j = 11; j < s.length; ++j) - if ((m = /^om:i:(\d+)/.exec(s[j])) != null) - om = parseInt(m[1]); if (om > 0) s[4] = om; s[4] = s[4] < mapQ? s[4] : mapQ; + } else { + if (om < 0) s.push("om:i:" + s[4]); + s[4] = 0; } } } diff --git a/bwamem.c b/bwamem.c index 79785f4..08588d5 100644 --- a/bwamem.c +++ b/bwamem.c @@ -79,7 +79,7 @@ mem_opt_t *mem_opt_init() o->min_chain_weight = 0; o->max_chain_extend = 1<<30; o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len); - o->min_pa_ratio = 0.8; + o->min_pa_ratio = 0; bwa_fill_scmat(o->a, o->b, o->mat); return o; } diff --git a/main.c b/main.c index 1a18559..b103798 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r915-dirty" +#define PACKAGE_VERSION "0.7.10-r926-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From 6f41a27e27df2fd6788583f764b01dc0ed7eff1e Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 19 Oct 2014 20:53:47 -0400 Subject: [PATCH 07/13] towards adding "-r" to postprocessing --- bwa-postalt.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bwa-postalt.js b/bwa-postalt.js index 47c4847..2132a66 100644 --- a/bwa-postalt.js +++ b/bwa-postalt.js @@ -205,15 +205,17 @@ function parse_hit(s, opt) function bwa_postalt(args) { - var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, recover_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false }; + var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, recover_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:0.8 }; - while ((c = getopt(args, 'Pqev:p:')) != null) { + while ((c = getopt(args, 'Pqev:p:r')) != null) { if (c == 'v') opt.verbose = parseInt(getopt.arg); else if (c == 'p') opt.pre = getopt.arg; else if (c == 'P') opt.show_pri = true; else if (c == 'q') opt.recover_maq = false; else if (c == 'e') opt.show_ev = true; + else if (c == 'r') opt.min_pa_ratio = parseFloat(optarg.arg); } + if (opt.min_pa_ratio > 1.) opt.min_pa_ratio = 1.; if (opt.show_ev && opt.pre == null) { warn("ERROR: option '-p' must be specified if '-e' is applied."); @@ -490,14 +492,16 @@ function bwa_postalt(args) while ((m = re_cigar.exec(t[5])) != null) if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N') end += parseInt(m[1]); - var om = -1; + var om = -1, pa = 10.; for (var j = 11; j < s.length; ++j) if ((m = /^om:i:(\d+)/.exec(s[j])) != null) om = parseInt(m[1]); - if (start < l[3] && l[2] < end) { + else if ((m = /^pa:f:(\S+)/.exec(s[j])) != null) + pa = parseFloat(m[1]); + if (start < l[3] && l[2] < end) { // overlapping the lifted hit if (om > 0) s[4] = om; s[4] = s[4] < mapQ? s[4] : mapQ; - } else { + } else if (pa < opt.min_pa_ratio) { // not overlapping; has a small pa if (om < 0) s.push("om:i:" + s[4]); s[4] = 0; } From 25d45512d9bdddc9176ca713cdd175260c6917ea Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 20 Oct 2014 14:17:56 -0400 Subject: [PATCH 08/13] code backup --- bwa-postalt.js | 13 +++++++------ bwt.c | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bwa-postalt.js b/bwa-postalt.js index 2132a66..1580182 100644 --- a/bwa-postalt.js +++ b/bwa-postalt.js @@ -207,13 +207,13 @@ function bwa_postalt(args) { var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, recover_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:0.8 }; - while ((c = getopt(args, 'Pqev:p:r')) != null) { + while ((c = getopt(args, 'Pqev:p:r:')) != null) { if (c == 'v') opt.verbose = parseInt(getopt.arg); else if (c == 'p') opt.pre = getopt.arg; else if (c == 'P') opt.show_pri = true; else if (c == 'q') opt.recover_maq = false; else if (c == 'e') opt.show_ev = true; - else if (c == 'r') opt.min_pa_ratio = parseFloat(optarg.arg); + else if (c == 'r') opt.min_pa_ratio = parseFloat(getopt.arg); } if (opt.min_pa_ratio > 1.) opt.min_pa_ratio = 1.; @@ -485,20 +485,21 @@ function bwa_postalt(args) if (opt.recover_mapq && n_rpt_lifted == 1 && mapQ > 0) { var l = rpt_lifted; for (var i = 0; i < buf2.length; ++i) { - var s = buf2[i]; - if (l[0] != s[2]) continue; // different chr - if (((s[1]&16) != 0) != l[1]) continue; // different strand + var s = buf2[i], is_ovlp = true; + if (l[0] != s[2]) is_ovlp = false; // different chr + if (((s[1]&16) != 0) != l[1]) is_ovlp = false; // different strand var start = s[3] - 1, end = start; while ((m = re_cigar.exec(t[5])) != null) if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N') end += parseInt(m[1]); + if (!(start < l[3] && l[2] < end)) is_ovlp = false; // no overlap var om = -1, pa = 10.; for (var j = 11; j < s.length; ++j) if ((m = /^om:i:(\d+)/.exec(s[j])) != null) om = parseInt(m[1]); else if ((m = /^pa:f:(\S+)/.exec(s[j])) != null) pa = parseFloat(m[1]); - if (start < l[3] && l[2] < end) { // overlapping the lifted hit + if (is_ovlp) { // overlapping the lifted hit if (om > 0) s[4] = om; s[4] = s[4] < mapQ? s[4] : mapQ; } else if (pa < opt.min_pa_ratio) { // not overlapping; has a small pa diff --git a/bwt.c b/bwt.c index a719f53..833e37d 100644 --- a/bwt.c +++ b/bwt.c @@ -305,10 +305,10 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, if (q[i] < 4) { // an A/C/G/T base c = 3 - q[i]; // complement of q[i] bwt_extend(bwt, &ik, ok, 0); + if (ik.x[2] < max_intv && (i - x > max_len || ik.x[2] == 1)) break; if (ok[c].x[2] != ik.x[2]) { // change of the interval size kv_push(bwtintv_t, *curr, ik); if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further - if (i - x > max_len && ik.x[2] < max_intv) break; } ik = ok[c]; ik.info = i + 1; } else { // an ambiguous base From 038af2a55132da0b052f8dd7299872d3bc83f0a7 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 20 Oct 2014 17:00:31 -0400 Subject: [PATCH 09/13] r929: added simplified LAST-like seeding --- bwamem.c | 21 ++++++++++++++++++--- bwamem_extra.c | 2 +- bwt.c | 44 ++++++++++++++++++++++++++++++++------------ bwt.h | 4 ++-- main.c | 2 +- 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/bwamem.c b/bwamem.c index 08588d5..95478a6 100644 --- a/bwamem.c +++ b/bwamem.c @@ -121,7 +121,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co // first pass: find all SMEMs while (x < len) { if (seq[x] < 4) { - x = bwt_smem1a(bwt, len, seq, x, start_width, split_len, opt->max_mem_intv, &a->mem1, a->tmpv); + x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info>>32); // seed length @@ -131,7 +131,6 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co } else ++x; } // second pass: find MEMs inside a long SMEM - if (opt->max_mem_intv > 0) goto sort_intv; old_n = a->mem.n; for (k = 0; k < old_n; ++k) { bwtintv_t *p = &a->mem.a[k]; @@ -142,8 +141,24 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, a->mem1.a[i]); } + // third pass: LAST-like + if (opt->max_mem_intv > 0) { + x = 0; + while (x < len) { + if (seq[x] < 4) { + if (1) { + bwtintv_t m; + x = bwt_seed_strategy1(bwt, len, seq, x, opt->max_mem_intv, &m); + if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m); + } else { // for now, we never come to this block which is slower + x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv); + for (i = 0; i < a->mem1.n; ++i) + kv_push(bwtintv_t, a->mem, a->mem1.a[i]); + } + } else ++x; + } + } // sort -sort_intv: ks_introsort(mem_intv, a->mem.n, a->mem.a); } diff --git a/bwamem_extra.c b/bwamem_extra.c index 09faaa2..02e817a 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -65,7 +65,7 @@ const bwtintv_v *smem_next(smem_i *itr) while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases if (itr->start == itr->len) return 0; ori_start = itr->start; - itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_len, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM + itr->start = bwt_smem1a(itr->bwt, itr->len, itr->query, ori_start, itr->min_intv, itr->max_intv, itr->matches, itr->tmpvec); // search for SMEM return itr->matches; } diff --git a/bwt.c b/bwt.c index 833e37d..a97b60c 100644 --- a/bwt.c +++ b/bwt.c @@ -285,8 +285,8 @@ static void bwt_reverse_intvs(bwtintv_v *p) } } } - -int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, int max_len, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]) +// NOTE: $max_intv is not currently used in BWA-MEM +int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]) { int i, j, c, ret; bwtintv_t ik, ok[4]; @@ -302,10 +302,12 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, ik.info = x + 1; for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search - if (q[i] < 4) { // an A/C/G/T base + if (ik.x[2] < max_intv) { // an interval small enough + kv_push(bwtintv_t, *curr, ik); + break; + } else if (q[i] < 4) { // an A/C/G/T base c = 3 - q[i]; // complement of q[i] bwt_extend(bwt, &ik, ok, 0); - if (ik.x[2] < max_intv && (i - x > max_len || ik.x[2] == 1)) break; if (ok[c].x[2] != ik.x[2]) { // change of the interval size kv_push(bwtintv_t, *curr, ik); if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further @@ -325,13 +327,8 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base for (j = 0, curr->n = 0; j < prev->n; ++j) { bwtintv_t *p = &prev->a[j]; - int max_stop = 0; - bwt_extend(bwt, p, ok, 1); - if (ok[c].x[2] != p->x[2]) { // change of interval size - if (p->info - i - 1 > max_len && p->x[2] < max_intv) - max_stop = 1; - } - if (c < 0 || ok[c].x[2] < min_intv || max_stop) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough + if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1); + if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches ik = *p; ik.info |= (uint64_t)(i + 1)<<32; @@ -355,7 +352,30 @@ int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]) { - return bwt_smem1a(bwt, len, q, x, min_intv, INT_MAX, 0, mem, tmpvec); + return bwt_smem1a(bwt, len, q, x, min_intv, 0, mem, tmpvec); +} + +int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int max_intv, bwtintv_t *mem) +{ + int i, c; + bwtintv_t ik, ok[4]; + + memset(mem, 0, sizeof(bwtintv_t)); + if (q[x] > 3) return x + 1; + bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base + for (i = x + 1; i < len; ++i) { // forward search + if (q[i] < 4) { // an A/C/G/T base + c = 3 - q[i]; // complement of q[i] + bwt_extend(bwt, &ik, ok, 0); + if (ok[c].x[2] < max_intv) { + *mem = ok[c]; + mem->info = (uint64_t)x<<32 | (i + 1); + return i + 1; + } + ik = ok[c]; + } else return i + 1; + } + return len; } /************************* diff --git a/bwt.h b/bwt.h index 7ab49bc..afb84d2 100644 --- a/bwt.h +++ b/bwt.h @@ -119,9 +119,9 @@ extern "C" { * Return the end of the longest exact match starting from _x_. */ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]); - int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, int max_len, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]); + int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]); - // SMEM iterator interface + int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int max_intv, bwtintv_t *mem); #ifdef __cplusplus } diff --git a/main.c b/main.c index b103798..54f93fe 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r926-dirty" +#define PACKAGE_VERSION "0.7.10-r929-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From a6b5a30dabb254aa6534c4ec80d531413d22e12d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 20 Oct 2014 17:34:15 -0400 Subject: [PATCH 10/13] r930: use 3rd round seeding by default This strategy is similar to the seeding heuristic used by LAST. When it is used alone, it is not as accurate as the current seeding strategy at least for short reads. However, it may do a better job for a long contig mapped to multiple ALT contigs. This seeding strategy is also relatively cheap to perform. --- bwamem.c | 2 +- fastmap.c | 2 +- main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwamem.c b/bwamem.c index 95478a6..71bf387 100644 --- a/bwamem.c +++ b/bwamem.c @@ -59,7 +59,7 @@ mem_opt_t *mem_opt_init() o->pen_unpaired = 17; o->pen_clip5 = o->pen_clip3 = 5; - o->max_mem_intv = 0; + o->max_mem_intv = 10; o->min_seed_len = 19; o->split_width = 10; diff --git a/fastmap.c b/fastmap.c index e949da3..ddf464a 100644 --- a/fastmap.c +++ b/fastmap.c @@ -145,7 +145,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop); fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); - fprintf(stderr, " -y INT find MEMs longer than {-k} * {-r} with size less than INT (EXPERIMENTAL) [%ld]\n", (long)opt->max_mem_intv); + fprintf(stderr, " -y INT seed occurrence for the 3rd round seeding [%ld]\n", (long)opt->max_mem_intv); // fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio); diff --git a/main.c b/main.c index 54f93fe..e8fb688 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r929-dirty" +#define PACKAGE_VERSION "0.7.10-r930-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); From 497913d404a37507ccfc5272602fd51dfceb27c5 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 21 Oct 2014 00:16:41 -0400 Subject: [PATCH 11/13] working largely as expected --- bwa-postalt.js | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/bwa-postalt.js b/bwa-postalt.js index 1580182..d09d308 100644 --- a/bwa-postalt.js +++ b/bwa-postalt.js @@ -205,7 +205,7 @@ function parse_hit(s, opt) function bwa_postalt(args) { - var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, recover_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:0.8 }; + var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, update_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:0.8 }; while ((c = getopt(args, 'Pqev:p:r:')) != null) { if (c == 'v') opt.verbose = parseInt(getopt.arg); @@ -482,17 +482,19 @@ function bwa_postalt(args) } // check if the reported hit overlaps a hit to the primary assembly; if so, don't reduce mapping quality - if (opt.recover_mapq && n_rpt_lifted == 1 && mapQ > 0) { - var l = rpt_lifted; + if (opt.update_mapq && mapQ > 0 && n_rpt_lifted <= 1) { + var l = n_rpt_lifted == 1? rpt_lifted : null; for (var i = 0; i < buf2.length; ++i) { var s = buf2[i], is_ovlp = true; - if (l[0] != s[2]) is_ovlp = false; // different chr - if (((s[1]&16) != 0) != l[1]) is_ovlp = false; // different strand - var start = s[3] - 1, end = start; - while ((m = re_cigar.exec(t[5])) != null) - if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N') - end += parseInt(m[1]); - if (!(start < l[3] && l[2] < end)) is_ovlp = false; // no overlap + if (l != null) { + if (l[0] != s[2]) is_ovlp = false; // different chr + if (((s[1]&16) != 0) != l[1]) is_ovlp = false; // different strand + var start = s[3] - 1, end = start; + while ((m = re_cigar.exec(t[5])) != null) + if (m[2] == 'M' || m[2] == 'D' || m[2] == 'N') + end += parseInt(m[1]); + if (!(start < l[3] && l[2] < end)) is_ovlp = false; // no overlap + } else is_ovlp = false; var om = -1, pa = 10.; for (var j = 11; j < s.length; ++j) if ((m = /^om:i:(\d+)/.exec(s[j])) != null) From 2bfa3c767b06b333722b72c0ed543c1a93489e1d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 21 Oct 2014 00:22:59 -0400 Subject: [PATCH 12/13] CLI help --- bwa-postalt.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/bwa-postalt.js b/bwa-postalt.js index d09d308..a02b3e0 100644 --- a/bwa-postalt.js +++ b/bwa-postalt.js @@ -205,7 +205,7 @@ function parse_hit(s, opt) function bwa_postalt(args) { - var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, update_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:0.8 }; + var c, opt = { a:1, b:4, o:6, e:1, verbose:3, show_pri:false, update_mapq:true, min_mapq:10, min_sc:90, max_nm_sc:10, show_ev:false, min_pa_ratio:1 }; while ((c = getopt(args, 'Pqev:p:r:')) != null) { if (c == 'v') opt.verbose = parseInt(getopt.arg); @@ -225,13 +225,14 @@ function bwa_postalt(args) if (args.length == getopt.ind) { print(""); print("Usage: k8 bwa-postalt.js [options] [aln.sam]\n"); - print("Options: -p STR prefix of file(s) for additional information [null]"); - print(" PREFIX.ctw - weight of each ALT contig"); - print(" PREFIX.evi - reads supporting ALT contigs (effective with -e)"); - print(" -q don't modify mapQ for non-ALTs hit overlapping lifted ALT"); - print(" -e show reads supporting ALT contigs into file PREFIX.evi"); + print("Options: -p STR prefix of file(s) for additional information [null]"); + print(" PREFIX.ctw - weight of each ALT contig"); + print(" PREFIX.evi - reads supporting ALT contigs (effective with -e)"); + print(" -e show reads supporting ALT contigs into file PREFIX.evi"); + print(" -r FLOAT reduce mapQ to 0 if not overlapping lifted best and pa Date: Tue, 21 Oct 2014 00:23:14 -0400 Subject: [PATCH 13/13] r933: with bwa-postalt ready, drop option -g --- bwamem.c | 5 +---- bwamem.h | 1 - fastmap.c | 4 +--- main.c | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/bwamem.c b/bwamem.c index 71bf387..3fd008f 100644 --- a/bwamem.c +++ b/bwamem.c @@ -79,7 +79,6 @@ mem_opt_t *mem_opt_init() o->min_chain_weight = 0; o->max_chain_extend = 1<<30; o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len); - o->min_pa_ratio = 0; bwa_fill_scmat(o->a, o->b, o->mat); return o; } @@ -841,7 +840,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq if (p->rid >= 0) { // with coordinate kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME kputl(p->pos + 1, str); kputc('\t', str); // POS - kputw(p->score < opt->min_pa_ratio * p->alt_sc? 0 : p->mapq, str); kputc('\t', str); // MAPQ + kputw(p->mapq, str); kputc('\t', str); // MAPQ if (p->n_cigar) { // aligned for (i = 0; i < p->n_cigar; ++i) { int c = p->cigar[i]&0xf; @@ -931,8 +930,6 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq } if (p->alt_sc > 0) ksprintf(str, "\tpa:f:%.3f", (double)p->score / p->alt_sc); - if (p->score < opt->min_pa_ratio * p->alt_sc) - ksprintf(str, "\tom:i:%d", p->mapq); } if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); } if (s->comment) { kputc('\t', str); kputs(s->comment, str); } diff --git a/bwamem.h b/bwamem.h index ae53601..1746ab5 100644 --- a/bwamem.h +++ b/bwamem.h @@ -48,7 +48,6 @@ typedef struct { float XA_drop_ratio; // when counting hits for the XA tag, ignore alignments with score < XA_drop_ratio * max_score; only effective for the XA tag float mask_level_redun; float mapQ_coef_len; - float min_pa_ratio; int mapQ_coef_fac; int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end diff --git a/fastmap.c b/fastmap.c index ddf464a..18f3b0e 100644 --- a/fastmap.c +++ b/fastmap.c @@ -55,7 +55,7 @@ int main_mem(int argc, char *argv[]) opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "epaFMCSPHVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:g:")) >= 0) { + while ((c = getopt(argc, argv, "epaFMCSPHVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == 'x') mode = optarg; else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; @@ -85,7 +85,6 @@ int main_mem(int argc, char *argv[]) else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; - else if (c == 'g') opt->min_pa_ratio = atof(optarg), opt0.min_pa_ratio = 1; else if (c == 'C') copy_comment = 1; else if (c == 'K') fixed_chunk_size = atoi(optarg); else if (c == 'h') { @@ -172,7 +171,6 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -j ignore ALT contigs\n"); fprintf(stderr, "\n"); fprintf(stderr, " -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose); - fprintf(stderr, " -g FLOAT set mapQ to zero if the ratio of the primary-to-alt scores below FLOAT [%.3f]\n", opt->min_pa_ratio); fprintf(stderr, " -T INT minimum score to output [%d]\n", opt->T); fprintf(stderr, " -h INT[,INT] if there are 80%% of the max score, output all in XA [%d,%d]\n", opt->max_XA_hits, opt->max_XA_hits_alt); fprintf(stderr, " -a output all alignments for SE or unpaired PE\n"); diff --git a/main.c b/main.c index e8fb688..12b0930 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r930-dirty" +#define PACKAGE_VERSION "0.7.10-r933-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);