dev-470: don't stop on bwa_fix_xref2() failures
Peter Field has sent me an example caused by an alignment bridging three adjacent chromosomes/contigs. Bwa-mem always aligns the query to the contig covering the middle point of the alignment. In this example, it chooses the middle contig, which should not be aligned. This leads to weird things failing bwa_fix_xref2(), which cannot be fixed unless we build the contig boundaries into the FM-index. In the old code, bwa-mem halts when bwa_fix_xref2() fails. With this commit, bwa-mem will give a warning instead of halting.
This commit is contained in:
parent
db58392e9b
commit
ccbbe48c4f
7
bwa.c
7
bwa.c
|
|
@ -178,9 +178,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
|
|||
|
||||
int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
|
||||
{
|
||||
int is_rev;
|
||||
int64_t cb, ce, fm;
|
||||
int is_rev, ori_ql = *qe - *qb;
|
||||
int64_t cb, ce, fm, ori_rl = *re - *rb;
|
||||
bntann1_t *ra;
|
||||
assert(ori_ql > 0 && ori_rl > 0);
|
||||
if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here
|
||||
*qb = *qe = *rb = *re = -1;
|
||||
return -1; // unable to fix
|
||||
|
|
@ -218,7 +219,7 @@ int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_i
|
|||
}
|
||||
free(cigar);
|
||||
}
|
||||
return (*qb == *qe || *rb == *re)? -2 : 0;
|
||||
return (*qe - *qb < .33 * ori_ql || *re - *rb < .33 * ori_rl)? -2 : 0;
|
||||
}
|
||||
|
||||
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
|
||||
|
|
|
|||
27
bwamem.c
27
bwamem.c
|
|
@ -905,7 +905,11 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
|
|||
if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
|
||||
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue;
|
||||
q = kv_pushp(mem_aln_t, aa);
|
||||
*q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
|
||||
*q = mem_reg2aln2(opt, bns, pac, s->l_seq, s->seq, p, s->name);
|
||||
if (q->rid < 0) {
|
||||
--aa.n;
|
||||
continue;
|
||||
}
|
||||
q->flag |= extra_flag; // flag secondary
|
||||
if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score
|
||||
if (k && p->secondary < 0) // if supplementary
|
||||
|
|
@ -982,11 +986,10 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
|
|||
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
|
||||
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
|
||||
if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment
|
||||
if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) {
|
||||
if (name) fprintf(stderr, "[E::%s] Internal code inconsistency for read '%s'. Please contact the developer. Sorry.\n", __func__, name);
|
||||
else fprintf(stderr, "[E::%s] Internal code inconsistency. Please contact the developer. Sorry.\n", __func__);
|
||||
a.rid = -1; a.pos = -1; a.flag |= 0x4;
|
||||
return a;
|
||||
if ((ret = bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re)) < 0) {
|
||||
if (bwa_verbose >= 2 && name)
|
||||
fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name);
|
||||
goto err_reg2aln;
|
||||
}
|
||||
tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del);
|
||||
w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins);
|
||||
|
|
@ -1002,6 +1005,11 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
|
|||
last_sc = score;
|
||||
w2 <<= 1;
|
||||
} while (++i < 3 && score < ar->truesc - opt->a);
|
||||
if (score < 0) {
|
||||
if (bwa_verbose >= 2 && name)
|
||||
fprintf(stderr, "[W::%s] A hit to read '%s' has been dropped.\n", __func__, name);
|
||||
goto err_reg2aln;
|
||||
}
|
||||
l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1;
|
||||
a.NM = NM;
|
||||
pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
|
||||
|
|
@ -1036,6 +1044,13 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
|
|||
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
|
||||
free(query);
|
||||
return a;
|
||||
|
||||
err_reg2aln:
|
||||
free(a.cigar);
|
||||
memset(&a, 0, sizeof(mem_aln_t));
|
||||
a.rid = -1; a.pos = -1; a.flag |= 0x4;
|
||||
free(query);
|
||||
return a;
|
||||
}
|
||||
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -148,6 +148,7 @@ extern "C" {
|
|||
* @return CIGAR, strand, mapping quality and forward-strand position
|
||||
*/
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
|
||||
mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name);
|
||||
|
||||
/**
|
||||
* Infer the insert size distribution from interleaved alignment regions
|
||||
|
|
|
|||
|
|
@ -89,8 +89,9 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
|
|||
int is_rev, rid, qb = p->qb, qe = p->qe;
|
||||
int64_t pos, rb = p->rb, re = p->re;
|
||||
if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)s->seq, &qb, &qe, &rb, &re) < 0) {
|
||||
fprintf(stderr, "[E::%s] Internal errors when processing read '%s'. Please let the developer know. Abort. Sorry.\n", __func__, s->name);
|
||||
exit(1);
|
||||
if (bwa_verbose >= 2)
|
||||
fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, s->name);
|
||||
continue;
|
||||
}
|
||||
pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
|
||||
rid = bns_pos2rid(bns, pos);
|
||||
|
|
|
|||
Loading…
Reference in New Issue