diff --git a/bntseq.c b/bntseq.c index 06d82a0..0286c19 100644 --- a/bntseq.c +++ b/bntseq.c @@ -322,29 +322,25 @@ int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id) return nn; } -static inline void get_seq_core(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, uint8_t *seq) -{ - int64_t k, l = 0; - if (beg >= l_pac) { // reverse strand - int64_t beg_f = (l_pac<<1) - 1 - end; - int64_t end_f = (l_pac<<1) - 1 - beg; - for (k = end_f; k > beg_f; --k) - seq[l++] = 3 - _get_pac(pac, k); - } else { // forward strand - for (k = beg; k < end; ++k) - seq[l++] = _get_pac(pac, k); - } -} - uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len) { - uint8_t *seq; + uint8_t *seq = 0; + if (end < beg) end ^= beg, beg ^= end, end ^= beg; // if end is smaller, swap if (end > l_pac<<1) end = l_pac<<1; - *len = end - beg; - seq = malloc(end - beg); - if (beg < l_pac && end > l_pac) { - get_seq_core(l_pac, pac, beg, l_pac, seq); - get_seq_core(l_pac, pac, l_pac, end, seq + (l_pac - beg)); - } else get_seq_core(l_pac, pac, beg, end, seq); + if (beg < 0) beg = 0; + if (beg >= l_pac || end <= l_pac) { + int64_t k, l = 0; + *len = end - beg; + seq = malloc(end - beg); + if (beg >= l_pac) { // reverse strand + int64_t beg_f = (l_pac<<1) - 1 - end; + int64_t end_f = (l_pac<<1) - 1 - beg; + for (k = end_f; k > beg_f; --k) + seq[l++] = 3 - _get_pac(pac, k); + } else { // forward strand + for (k = beg; k < end; ++k) + seq[l++] = _get_pac(pac, k); + } + } else *len = 0; // if bridging the forward-reverse boundary, return nothing return seq; } diff --git a/bwamem.c b/bwamem.c index a639109..320df8d 100644 --- a/bwamem.c +++ b/bwamem.c @@ -395,6 +395,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int mem_alnreg_t best; memset(&best, 0, sizeof(mem_alnreg_t)); + memset(a, 0, sizeof(mem_alnreg_t)); // get the max possible span rmax[0] = l_pac<<1; rmax[1] = 0; for (i = 0; i < c->n; ++i) { @@ -408,6 +409,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int } // retrieve the reference sequence rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen); + if (rlen != rmax[1] - rmax[0]) return; for (k = 0; k < c->n;) { s = &c->seeds[k]; diff --git a/bwamem_pair.c b/bwamem_pair.c index cc0e8f0..dd9b3cd 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -97,7 +97,7 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * void mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], int rn, const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma) { int i, r, skip[4]; - rn = !!rn; // either 0 or 1 + rn = !!rn; // either 0 or 1; $rn is the read number of $a for (r = 0; r < 4; ++r) skip[r] = pes[r].failed? 1 : 0; for (i = 0; i < ma->n; ++i) { // check which orinentation has been found @@ -109,9 +109,28 @@ void mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const m if (skip[0] + skip[1] + skip[2] + skip[3] == 4) return; // consistent pair exist; no need to perform SW for (r = 0; r < 4; ++r) { int is_rev, is_larger; - if (pes[r].failed) continue; - is_rev = r>>1 == (r&1)? 0 : 1; // whether to reverse complement the mate - is_larger = r>>(!rn)&1; // whether the mate has larger coordinate + uint8_t *seq, *rev = 0, *ref; + int64_t rb, re, len; + if (skip[r]) continue; + is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate + is_larger = r>>rn&1; // whether the mate has larger coordinate + if (is_rev) { + rev = malloc(l_ms); // this is the reverse complement of $ms + for (i = 0; i < l_ms; ++i) rev[l_ms - 1 - i] = ms[i] < 4? ms[i] : 4; + seq = rev; + } else seq = (uint8_t*)ms; + if (!is_rev) { + rb = is_larger? a->rb + pes[r].low : a->rb - pes[r].high; + re = (is_larger? a->rb + pes[r].high: a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length + } else { + rb = (is_larger? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands + re = is_larger? a->rb + pes[r].high: a->rb - pes[r].low; + } + ref = bns_get_seq(l_pac, pac, rb, re, &len); + if (len == re - rb) { + } + if (rev == 0) free(rev); + free(ref); } } @@ -174,8 +193,16 @@ void mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, c { kstring_t str; bwahit_t h[2]; + mem_alnreg_t a0[2]; str.l = str.m = 0; str.s = 0; - if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful + // perform SW for the best alignment + a0[0].score = a0[1].score = -1; + if (a[0].n) a0[0] = a[0].a[0]; + if (a[1].n) a0[1] = a[1].a[0]; + if (a0[0].score > 0) mem_matesw(opt, bns->l_pac, pac, pes, 0, &a0[0], s[1].l_seq, (uint8_t*)s[1].seq, &a[1]); + if (a0[1].score > 0) mem_matesw(opt, bns->l_pac, pac, pes, 1, &a0[1], s[0].l_seq, (uint8_t*)s[0].seq, &a[0]); + // pairing single-end hits + if (mem_pair(opt, bns->l_pac, pac, pes, s, a, h) == 0) { // successful pairing bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[0], &h[0], opt->is_hard); s[0].sam = strdup(str.s); str.l = 0; bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, &s[1], &h[1], opt->is_hard);