r391: better method to resolve xref alignment
The old method does not work when the alignment bridges three chr. This may actually happen often. The new method does not work all the time, either, but should be better than the old one. It is also simpler, arguably.
This commit is contained in:
parent
338ebfaae0
commit
9a6abe51b6
6
Makefile
6
Makefile
|
|
@ -1,13 +1,13 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -O2
|
||||
WRAP_MALLOC= -DUSE_MALLOC_WRAPPERS
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o
|
||||
LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o malloc_wrap.o
|
||||
AOBJS= QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
is.o bwtindex.o bwape.o kopen.o pemerge.o \
|
||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o malloc_wrap.o
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||
PROG= bwa
|
||||
INCLUDES=
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
|
|||
56
bwa.c
56
bwa.c
|
|
@ -146,57 +146,51 @@ ret_gen_cigar:
|
|||
|
||||
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
|
||||
{
|
||||
int ib, ie, is_rev;
|
||||
int64_t fb, fe, mid = -1;
|
||||
int is_rev;
|
||||
int64_t cb, ce, fm;
|
||||
bntann1_t *ra;
|
||||
if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here
|
||||
*qb = *qe = *rb = *re = -1;
|
||||
return -1; // unable to fix
|
||||
} else {
|
||||
fb = bns_depos(bns, *rb < bns->l_pac? *rb : *re - 1, &is_rev);
|
||||
ib = bns_pos2rid(bns, fb);
|
||||
if (fb - bns->anns[ib].offset + (*re - *rb) <= bns->anns[ib].len) return 0; // no need to fix
|
||||
fe = bns_depos(bns, *re - 1 < bns->l_pac? *re - 1 : *rb, &is_rev);
|
||||
ie = bns_pos2rid(bns, fe);
|
||||
if (ie - ib > 1) { // bridge three or more references
|
||||
*qb = *qe = *rb = *re = -1;
|
||||
return -2; // unable to fix
|
||||
} else {
|
||||
int l = bns->anns[ib].offset + bns->anns[ib].len - fb;
|
||||
mid = is_rev? *re - l : *rb + l;
|
||||
}
|
||||
}
|
||||
if (mid >= 0) {
|
||||
fm = bns_depos(bns, (*rb + *re) >> 1, &is_rev); // coordinate of the middle point on the forward strand
|
||||
ra = &bns->anns[bns_pos2rid(bns, fm)]; // annotation of chr corresponding to the middle point
|
||||
cb = is_rev? (bns->l_pac<<1) - (ra->offset + ra->len) : ra->offset; // chr start on the mapping strand
|
||||
ce = cb + ra->len; // chr end
|
||||
if (cb > *rb || ce < *re) { // fix is needed
|
||||
int i, score, n_cigar, y, NM;
|
||||
uint32_t *cigar;
|
||||
int64_t x;
|
||||
cb = cb > *rb? cb : *rb;
|
||||
ce = ce < *re? ce : *re;
|
||||
cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar, &NM);
|
||||
for (i = 0, x = *rb, y = *qb; i < n_cigar; ++i) {
|
||||
int op = cigar[i]&0xf, len = cigar[i]>>4;
|
||||
if (op == 0) {
|
||||
if (x <= mid && mid < x + len) {
|
||||
if (mid - *rb > *re - mid) { // the first part is longer
|
||||
if (x == mid) { // need to check the previous operation
|
||||
assert(i); // mid != *rb should always stand
|
||||
if ((cigar[i-1]&0xf) == 1) *qe = y - (cigar[i-1]>>4), *re = x;
|
||||
else if ((cigar[i-1]&0xf) == 2) *qe = y, *re = x - (cigar[i-1]>>4);
|
||||
else abort(); // should not be here
|
||||
} else *qe = y + (mid - x), *re = mid;
|
||||
} else *qb = y + (mid - x), *rb = mid;
|
||||
if (x <= cb && cb < x + len)
|
||||
*qb = y + (cb - x), *rb = cb;
|
||||
if (x < ce && ce <= x + len) {
|
||||
*qe = y + (ce - x), *re = ce;
|
||||
break;
|
||||
} else x += len, y += len;
|
||||
} else if (op == 1) { // insertion
|
||||
} else if (op == 1) {
|
||||
y += len;
|
||||
} else if (op == 2) { // deletion
|
||||
if (x <= mid && mid < x + len) {
|
||||
if (mid - *rb > *re - mid) *qe = y, *re = x;
|
||||
else *qb = y, *rb = x + len;
|
||||
} else if (op == 2) {
|
||||
if (x <= cb && cb < x + len)
|
||||
*qb = y, *rb = x + len;
|
||||
if (x < ce && ce <= x + len) {
|
||||
*qe = y, *re = x;
|
||||
break;
|
||||
} else x += len;
|
||||
} else abort(); // should not be here
|
||||
}
|
||||
free(cigar);
|
||||
if (*qb == *qe || *rb == *re) { // TODO: this may happen in theory, but should be very very rare...
|
||||
fprintf(stderr, "[E::%s] If you see this message, please let the developer know. Sorry.\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*********************
|
||||
|
|
|
|||
4
bwamem.c
4
bwamem.c
|
|
@ -789,10 +789,6 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
|
|||
if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue;
|
||||
q = kv_pushp(mem_aln_t, aa);
|
||||
*q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
|
||||
if (q->rid < 0) { // unfixable cross-reference alignment
|
||||
--aa.n;
|
||||
continue;
|
||||
}
|
||||
q->flag |= extra_flag | (p->secondary >= 0? 0x100 : 0); // flag secondary
|
||||
if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score
|
||||
if ((opt->flag&MEM_F_NO_MULTI) && k && p->secondary < 0) q->flag |= 0x10000;
|
||||
|
|
|
|||
Loading…
Reference in New Issue