From db58392e9baa7606a6c9d6b48ba4e26f0b89cc7a Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 9 Apr 2014 13:20:04 -0400 Subject: [PATCH 1/3] dev-469: fixed wrong command line prompt --- fastmap.c | 6 +++--- main.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fastmap.c b/fastmap.c index f3dd2f6..9bdacb4 100644 --- a/fastmap.c +++ b/fastmap.c @@ -147,7 +147,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n", opt->pen_unpaired); fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); fprintf(stderr, " pacbio: -k17 -W40 -w200 -c1000 -r10 -A2 -B7 -O2 -E1 -L0\n"); - fprintf(stderr, " pbread: -k13 -W30 -w100 -c1000 -r10 -A2 -B5 -O2 -E1 -aeD.02\n"); + fprintf(stderr, " pbread: -k13 -W30 -w100 -c1000 -r10 -A2 -B5 -O2 -E1 -N20 -FeaD.01\n"); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); @@ -169,7 +169,7 @@ int main_mem(int argc, char *argv[]) } if (mode) { - if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0) { + if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { if (!opt0.a) opt->a = 2, opt0.a = 1; update_a(opt, &opt0); if (!opt0.o_del) opt->o_del = 2; @@ -178,7 +178,7 @@ int main_mem(int argc, char *argv[]) if (!opt0.e_ins) opt->e_ins = 1; if (!opt0.max_occ) opt->max_occ = 1000; if (opt0.split_factor == 0.) opt->split_factor = 10.; - if (strcmp(mode, "pbread1") == 0) { + if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) { opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG; if (!opt0.b) opt->b = 5; if (!opt0.w) opt->w = 100; diff --git a/main.c b/main.c index b100b8d..91f28bc 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r468" +#define PACKAGE_VERSION "0.7.8+dev-r469" #endif int bwa_fa2pac(int argc, char *argv[]); From ccbbe48c4f4124623551e1c72ad257a7040b8ad3 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 10 Apr 2014 11:43:17 -0400 Subject: [PATCH 2/3] dev-470: don't stop on bwa_fix_xref2() failures Peter Field has sent me an example caused by an alignment bridging three adjacent chromosomes/contigs. Bwa-mem always aligns the query to the contig covering the middle point of the alignment. In this example, it chooses the middle contig, which should not be aligned. This leads to weird things failing bwa_fix_xref2(), which cannot be fixed unless we build the contig boundaries into the FM-index. In the old code, bwa-mem halts when bwa_fix_xref2() fails. With this commit, bwa-mem will give a warning instead of halting. --- bwa.c | 7 ++++--- bwamem.c | 27 +++++++++++++++++++++------ bwamem.h | 1 + bwamem_extra.c | 5 +++-- main.c | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/bwa.c b/bwa.c index 0e9e606..08881c0 100644 --- a/bwa.c +++ b/bwa.c @@ -178,9 +178,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) { - int is_rev; - int64_t cb, ce, fm; + int is_rev, ori_ql = *qe - *qb; + int64_t cb, ce, fm, ori_rl = *re - *rb; bntann1_t *ra; + assert(ori_ql > 0 && ori_rl > 0); if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here *qb = *qe = *rb = *re = -1; return -1; // unable to fix @@ -218,7 +219,7 @@ int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_i } free(cigar); } - return (*qb == *qe || *rb == *re)? -2 : 0; + return (*qe - *qb < .33 * ori_ql || *re - *rb < .33 * ori_rl)? -2 : 0; } int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) diff --git a/bwamem.c b/bwamem.c index bfb45d9..9a58968 100644 --- a/bwamem.c +++ b/bwamem.c @@ -905,7 +905,11 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue; if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue; q = kv_pushp(mem_aln_t, aa); - *q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p); + *q = mem_reg2aln2(opt, bns, pac, s->l_seq, s->seq, p, s->name); + if (q->rid < 0) { + --aa.n; + continue; + } q->flag |= extra_flag; // flag secondary if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score if (k && p->secondary < 0) // if supplementary @@ -982,11 +986,10 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment - if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) { - if (name) fprintf(stderr, "[E::%s] Internal code inconsistency for read '%s'. Please contact the developer. Sorry.\n", __func__, name); - else fprintf(stderr, "[E::%s] Internal code inconsistency. Please contact the developer. Sorry.\n", __func__); - a.rid = -1; a.pos = -1; a.flag |= 0x4; - return a; + if ((ret = bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re)) < 0) { + if (bwa_verbose >= 2 && name) + fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name); + goto err_reg2aln; } tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del); w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins); @@ -1002,6 +1005,11 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t last_sc = score; w2 <<= 1; } while (++i < 3 && score < ar->truesc - opt->a); + if (score < 0) { + if (bwa_verbose >= 2 && name) + fprintf(stderr, "[W::%s] A hit to read '%s' has been dropped.\n", __func__, name); + goto err_reg2aln; + } l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1; a.NM = NM; pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); @@ -1036,6 +1044,13 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub; free(query); return a; + +err_reg2aln: + free(a.cigar); + memset(&a, 0, sizeof(mem_aln_t)); + a.rid = -1; a.pos = -1; a.flag |= 0x4; + free(query); + return a; } mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) diff --git a/bwamem.h b/bwamem.h index a6d6aa9..7cfe8b8 100644 --- a/bwamem.h +++ b/bwamem.h @@ -148,6 +148,7 @@ extern "C" { * @return CIGAR, strand, mapping quality and forward-strand position */ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar); + mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name); /** * Infer the insert size distribution from interleaved alignment regions diff --git a/bwamem_extra.c b/bwamem_extra.c index 58e9f67..157026c 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -89,8 +89,9 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int is_rev, rid, qb = p->qb, qe = p->qe; int64_t pos, rb = p->rb, re = p->re; if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)s->seq, &qb, &qe, &rb, &re) < 0) { - fprintf(stderr, "[E::%s] Internal errors when processing read '%s'. Please let the developer know. Abort. Sorry.\n", __func__, s->name); - exit(1); + if (bwa_verbose >= 2) + fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, s->name); + continue; } pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); rid = bns_pos2rid(bns, pos); diff --git a/main.c b/main.c index 91f28bc..dec0b04 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r469" +#define PACKAGE_VERSION "0.7.8+dev-r470" #endif int bwa_fa2pac(int argc, char *argv[]); From 23e0e99ec0744fd70f48dbac8ecbe73d6b219f97 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 10 Apr 2014 11:54:17 -0400 Subject: [PATCH 3/3] dev-471: fixed a compiling error from last commit --- bwamem.c | 2 +- main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bwamem.c b/bwamem.c index 9a58968..189e58b 100644 --- a/bwamem.c +++ b/bwamem.c @@ -986,7 +986,7 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment - if ((ret = bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re)) < 0) { + if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) { if (bwa_verbose >= 2 && name) fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name); goto err_reg2aln; diff --git a/main.c b/main.c index dec0b04..6a43b5f 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r470" +#define PACKAGE_VERSION "0.7.8+dev-r471" #endif int bwa_fa2pac(int argc, char *argv[]);