diff --git a/bwa.c b/bwa.c index 0e9e606..08881c0 100644 --- a/bwa.c +++ b/bwa.c @@ -178,9 +178,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) { - int is_rev; - int64_t cb, ce, fm; + int is_rev, ori_ql = *qe - *qb; + int64_t cb, ce, fm, ori_rl = *re - *rb; bntann1_t *ra; + assert(ori_ql > 0 && ori_rl > 0); if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here *qb = *qe = *rb = *re = -1; return -1; // unable to fix @@ -218,7 +219,7 @@ int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_i } free(cigar); } - return (*qb == *qe || *rb == *re)? -2 : 0; + return (*qe - *qb < .33 * ori_ql || *re - *rb < .33 * ori_rl)? -2 : 0; } int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) diff --git a/bwamem.c b/bwamem.c index bfb45d9..9a58968 100644 --- a/bwamem.c +++ b/bwamem.c @@ -905,7 +905,11 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue; if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue; q = kv_pushp(mem_aln_t, aa); - *q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p); + *q = mem_reg2aln2(opt, bns, pac, s->l_seq, s->seq, p, s->name); + if (q->rid < 0) { + --aa.n; + continue; + } q->flag |= extra_flag; // flag secondary if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score if (k && p->secondary < 0) // if supplementary @@ -982,11 +986,10 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment - if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) { - if (name) fprintf(stderr, "[E::%s] Internal code inconsistency for read '%s'. Please contact the developer. Sorry.\n", __func__, name); - else fprintf(stderr, "[E::%s] Internal code inconsistency. Please contact the developer. Sorry.\n", __func__); - a.rid = -1; a.pos = -1; a.flag |= 0x4; - return a; + if ((ret = bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re)) < 0) { + if (bwa_verbose >= 2 && name) + fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name); + goto err_reg2aln; } tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del); w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins); @@ -1002,6 +1005,11 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t last_sc = score; w2 <<= 1; } while (++i < 3 && score < ar->truesc - opt->a); + if (score < 0) { + if (bwa_verbose >= 2 && name) + fprintf(stderr, "[W::%s] A hit to read '%s' has been dropped.\n", __func__, name); + goto err_reg2aln; + } l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1; a.NM = NM; pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); @@ -1036,6 +1044,13 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub; free(query); return a; + +err_reg2aln: + free(a.cigar); + memset(&a, 0, sizeof(mem_aln_t)); + a.rid = -1; a.pos = -1; a.flag |= 0x4; + free(query); + return a; } mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) diff --git a/bwamem.h b/bwamem.h index a6d6aa9..7cfe8b8 100644 --- a/bwamem.h +++ b/bwamem.h @@ -148,6 +148,7 @@ extern "C" { * @return CIGAR, strand, mapping quality and forward-strand position */ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar); + mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name); /** * Infer the insert size distribution from interleaved alignment regions diff --git a/bwamem_extra.c b/bwamem_extra.c index 58e9f67..157026c 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -89,8 +89,9 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int is_rev, rid, qb = p->qb, qe = p->qe; int64_t pos, rb = p->rb, re = p->re; if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)s->seq, &qb, &qe, &rb, &re) < 0) { - fprintf(stderr, "[E::%s] Internal errors when processing read '%s'. Please let the developer know. Abort. Sorry.\n", __func__, s->name); - exit(1); + if (bwa_verbose >= 2) + fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, s->name); + continue; } pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); rid = bns_pos2rid(bns, pos); diff --git a/main.c b/main.c index 91f28bc..dec0b04 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r469" +#define PACKAGE_VERSION "0.7.8+dev-r470" #endif int bwa_fa2pac(int argc, char *argv[]);