diff --git a/bntseq.c b/bntseq.c index e1cd323..b63dff4 100644 --- a/bntseq.c +++ b/bntseq.c @@ -329,6 +329,15 @@ int bns_pos2rid(const bntseq_t *bns, int64_t pos_f) return mid; } +int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re) +{ + int is_rev, rid_b, rid_e; + if (rb < bns->l_pac && re > bns->l_pac) return -2; + rid_b = bns_pos2rid(bns, bns_depos(bns, rb, &is_rev)); + rid_e = bns_pos2rid(bns, bns_depos(bns, re, &is_rev) - 1); + return rid_b == rid_e? rid_b : -1; +} + int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id) { int left, mid, right, nn; @@ -374,3 +383,26 @@ uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end } else *len = 0; // if bridging the forward-reverse boundary, return nothing return seq; } + +uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid) +{ + int64_t far_beg, far_end, len; + int is_rev; + uint8_t *seq; + + if (*end < *beg) *end ^= *beg, *beg ^= *end, *end ^= *beg; // if end is smaller, swap + assert(*beg <= mid && mid < *end); + *rid = bns_pos2rid(bns, bns_depos(bns, mid, &is_rev)); + far_beg = bns->anns[*rid].offset; + far_end = far_beg + bns->anns[*rid].len; + if (is_rev) { // flip to the reverse strand + int64_t tmp = far_beg; + far_beg = (bns->l_pac<<1) - 1 - far_end; + far_end = (bns->l_pac<<1) - 1 - tmp; + } + *beg = *beg > far_beg? *beg : far_beg; + *end = *end < far_end? *end : far_end; + seq = bns_get_seq(bns->l_pac, pac, *beg, *end, &len); + assert(seq && *end - *beg == len); // assertion failure should never happen + return seq; +} diff --git a/bntseq.h b/bntseq.h index 4061438..6437cf6 100644 --- a/bntseq.h +++ b/bntseq.h @@ -28,6 +28,7 @@ #ifndef BWT_BNTSEQ_H #define BWT_BNTSEQ_H +#include #include #include #include @@ -75,6 +76,8 @@ extern "C" { int bns_pos2rid(const bntseq_t *bns, int64_t pos_f); int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id); uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len); + uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid); + int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re); #ifdef __cplusplus } diff --git a/bwa.c b/bwa.c index 08881c0..db3b947 100644 --- a/bwa.c +++ b/bwa.c @@ -176,57 +176,6 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa return bwa_gen_cigar2(mat, q, r, q, r, w_, l_pac, pac, l_query, query, rb, re, score, n_cigar, NM); } -int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) -{ - int is_rev, ori_ql = *qe - *qb; - int64_t cb, ce, fm, ori_rl = *re - *rb; - bntann1_t *ra; - assert(ori_ql > 0 && ori_rl > 0); - if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here - *qb = *qe = *rb = *re = -1; - return -1; // unable to fix - } - fm = bns_depos(bns, (*rb + *re) >> 1, &is_rev); // coordinate of the middle point on the forward strand - ra = &bns->anns[bns_pos2rid(bns, fm)]; // annotation of chr corresponding to the middle point - cb = is_rev? (bns->l_pac<<1) - (ra->offset + ra->len) : ra->offset; // chr start on the mapping strand - ce = cb + ra->len; // chr end - if (cb > *rb || ce < *re) { // fix is needed - int i, score, n_cigar, y, NM; - uint32_t *cigar; - int64_t x; - cb = cb > *rb? cb : *rb; - ce = ce < *re? ce : *re; - cigar = bwa_gen_cigar2(mat, o_del, e_del, o_ins, e_ins, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar, &NM); - for (i = 0, x = *rb, y = *qb; i < n_cigar; ++i) { - int op = cigar[i]&0xf, len = cigar[i]>>4; - if (op == 0) { - if (x <= cb && cb < x + len) - *qb = y + (cb - x), *rb = cb; - if (x < ce && ce <= x + len) { - *qe = y + (ce - x), *re = ce; - break; - } else x += len, y += len; - } else if (op == 1) { - y += len; - } else if (op == 2) { - if (x <= cb && cb < x + len) - *qb = y, *rb = x + len; - if (x < ce && ce <= x + len) { - *qe = y, *re = x; - break; - } else x += len; - } else abort(); // should not be here - } - free(cigar); - } - return (*qe - *qb < .33 * ori_ql || *re - *rb < .33 * ori_rl)? -2 : 0; -} - -int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re) -{ - return bwa_fix_xref2(mat, q, r, q, r, w, bns, pac, query, qb, qe, rb, re); -} - /********************* * Full index reader * *********************/ diff --git a/bwa.h b/bwa.h index 8d46e58..bbc2525 100644 --- a/bwa.h +++ b/bwa.h @@ -33,8 +33,6 @@ extern "C" { void bwa_fill_scmat(int a, int b, int8_t mat[25]); uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); - int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re); - int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re); char *bwa_idx_infer_prefix(const char *hint); bwt_t *bwa_idx_load_bwt(const char *hint); diff --git a/bwamem.c b/bwamem.c index 189e58b..346715c 100644 --- a/bwamem.c +++ b/bwamem.c @@ -143,11 +143,11 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co typedef struct { int64_t rbeg; int32_t qbeg, len; -} mem_seed_t; +} mem_seed_t; // unaligned memory typedef struct { - int n, m, first; - uint32_t w:30, kept:2; + int n, m, first, rid; + int w, kept; int64_t pos; mem_seed_t *seeds; } mem_chain_t; @@ -160,12 +160,13 @@ typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v; KBTREE_INIT(chn, mem_chain_t, chain_cmp) // return 1 if the seed is merged into the chain -static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p) +static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid) { int64_t qend, rend, x, y; const mem_seed_t *last = &c->seeds[c->n-1]; qend = last->qbeg + last->len; rend = last->rbeg + last->len; + if (seed_rid != c->rid) return 0; // different chr; request a new chain if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend) return 1; // contained seed; do nothing if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand @@ -220,9 +221,10 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn) } } -mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int len, const uint8_t *seq) +mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, int len, const uint8_t *seq) { int i; + int64_t l_pac = bns->l_pac; mem_chain_v chain; kbtree_t(chn) *tree; smem_aux_t *aux; @@ -241,19 +243,21 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int for (k = 0; k < p->x[2]; ++k) { mem_chain_t tmp, *lower, *upper; mem_seed_t s; - int to_add = 0; + int rid, to_add = 0; s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference s.qbeg = p->info>>32; s.len = slen; - if (s.rbeg < l_pac && l_pac < s.rbeg + s.len) continue; // bridging forward-reverse boundary; skip + rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len); + if (rid < 0) continue; // bridging multiple reference sequences or the forward-reverse boundary if (kb_size(tree)) { kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain - if (!lower || !test_and_merge(opt, l_pac, lower, &s)) to_add = 1; + if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) to_add = 1; } else to_add = 1; if (to_add) { // add the seed as a new chain tmp.n = 1; tmp.m = 4; tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t)); tmp.seeds[0] = s; + tmp.rid = rid; kb_putp(chn, tree, &tmp); } } @@ -473,11 +477,11 @@ void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t i * low-divergence sequences, more testing is needed. For now, I only recommend * to use mem_test_chain_sw() for PacBio data. It is disabled by default. */ -int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c) +int mem_test_chain_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c) { - int i, qb, qe; + int i, qb, qe, rid; int min_HSP_score = (int)(opt->min_chain_weight * opt->a * MEM_HSP_COEF + .499); - int64_t rb, re, rlen; + int64_t rb, re, l_pac = bns->l_pac; uint8_t *rseq = 0; kswr_t x; @@ -505,8 +509,8 @@ int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, i if (qe - qb >= opt->w * 4 || re - rb >= opt->w * 4) return 1; if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return 1; - rseq = bns_get_seq(l_pac, pac, rb, re, &rlen); - assert(rlen == re - rb); + rseq = bns_fetch_seq(bns, pac, &rb, c->seeds[0].rbeg, &re, &rid); + assert(c->rid == rid); x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0); free(rseq); if (x.score >= min_HSP_score) return 1; @@ -514,10 +518,10 @@ int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, i return 0; } -int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) +int mem_chain2aln_short(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) { - int i, qb, qe, xtra; - int64_t rb, re, rlen; + int i, qb, qe, xtra, rid; + int64_t rb, re, l_pac = bns->l_pac; uint8_t *rseq = 0; mem_alnreg_t a; kswr_t x; @@ -547,8 +551,8 @@ int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, if (qe - qb >= opt->w * 4 || re - rb >= opt->w * 4) return 1; if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return 1; - rseq = bns_get_seq(l_pac, pac, rb, re, &rlen); - assert(rlen == re - rb); + rseq = bns_fetch_seq(bns, pac, &rb, c->seeds[0].rbeg, &re, &rid); + assert(c->rid == rid); xtra = KSW_XSUBO | KSW_XSTART | ((qe - qb) * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); free(rseq); @@ -556,6 +560,7 @@ int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, a.qb = qb + x.qb; a.qe = qb + x.qe + 1; a.score = x.score; a.csub = x.score2; + a.rid = c->rid; if (bwa_verbose >= 4) printf("** Attempted alignment via mem_chain2aln_short(): [%d,%d) <=> [%ld,%ld); score=%d; %d/%d\n", a.qb, a.qe, (long)a.rb, (long)a.re, x.score, a.qe-a.qb, qe-qb); if (x.tb < MEM_SHORT_EXT>>1 || x.te > re - rb - (MEM_SHORT_EXT>>1)) return 1; kv_push(mem_alnreg_t, *av, a); @@ -571,10 +576,10 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen) return l < opt->w<<1? l : opt->w<<1; } -void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) +void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av) { - int i, k, max_off[2], aw[2]; // aw: actual bandwidth used in extension - int64_t rlen, rmax[2], tmp, max = 0; + int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension + int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0; const mem_seed_t *s; uint8_t *rseq = 0; uint64_t *srt; @@ -598,8 +603,8 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int else rmax[0] = l_pac; } // retrieve the reference sequence - rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen); - assert(rlen == rmax[1] - rmax[0]); + rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid); + assert(c->rid == rid); srt = malloc(c->n * 8); for (i = 0; i < c->n; ++i) @@ -649,6 +654,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int memset(a, 0, sizeof(mem_alnreg_t)); a->w = aw[0] = aw[1] = opt->w; a->score = a->truesc = -1; + a->rid = c->rid; if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg); if (s->qbeg) { // left extension @@ -939,7 +945,7 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]]; - chn = mem_chain(opt, bwt, bns->l_pac, l_seq, (uint8_t*)seq); + chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t*)seq); chn.n = mem_chain_flt(opt, chn.n, chn.a); if (bwa_verbose >= 4) mem_print_chain(bns, &chn); @@ -948,9 +954,9 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse mem_chain_t *p = &chn.a[i]; int ret; if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i); - if (opt->min_chain_weight > 0) ret = mem_test_chain_sw(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p); - else ret = mem_chain2aln_short(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, ®s); - if (ret > 0) mem_chain2aln(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, ®s); + if (opt->min_chain_weight > 0) ret = mem_test_chain_sw(opt, bns, pac, l_seq, (uint8_t*)seq, p); + else ret = mem_chain2aln_short(opt, bns, pac, l_seq, (uint8_t*)seq, p, ®s); + if (ret > 0) mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, ®s); free(chn.a[i].seeds); } free(chn.a); @@ -986,11 +992,6 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment - if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) { - if (bwa_verbose >= 2 && name) - fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name); - goto err_reg2aln; - } tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del); w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins); w2 = w2 > tmp? w2 : tmp; @@ -1005,11 +1006,6 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t last_sc = score; w2 <<= 1; } while (++i < 3 && score < ar->truesc - opt->a); - if (score < 0) { - if (bwa_verbose >= 2 && name) - fprintf(stderr, "[W::%s] A hit to read '%s' has been dropped.\n", __func__, name); - goto err_reg2aln; - } l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1; a.NM = NM; pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); @@ -1040,17 +1036,11 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t } } a.rid = bns_pos2rid(bns, pos); + assert(a.rid == ar->rid); a.pos = pos - bns->anns[a.rid].offset; a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub; free(query); return a; - -err_reg2aln: - free(a.cigar); - memset(&a, 0, sizeof(mem_aln_t)); - a.rid = -1; a.pos = -1; a.flag |= 0x4; - free(query); - return a; } mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar) diff --git a/bwamem.h b/bwamem.h index 7cfe8b8..53472fe 100644 --- a/bwamem.h +++ b/bwamem.h @@ -52,6 +52,7 @@ typedef struct { typedef struct { int64_t rb, re; // [rb,re): reference sequence in the alignment int qb, qe; // [qb,qe): query sequence in the alignment + int rid; // reference seq ID int score; // best local SW score int truesc; // actual score corresponding to the aligned region; possibly smaller than $score int sub; // 2nd best SW score diff --git a/bwamem_extra.c b/bwamem_extra.c index bc841dd..96cdbcd 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -88,13 +88,9 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_t *p = &a->a[i]; int is_rev, rid, qb = p->qb, qe = p->qe; int64_t pos, rb = p->rb, re = p->re; - if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)s->seq, &qb, &qe, &rb, &re) < 0) { - if (bwa_verbose >= 2) - fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, s->name); - continue; - } pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); rid = bns_pos2rid(bns, pos); + assert(rid == p->rid); pos -= bns->anns[rid].offset; kputs(s->name, &str); kputc('\t', &str); kputw(s->l_seq, &str); kputc('\t', &str); diff --git a/bwamem_pair.c b/bwamem_pair.c index 4a7cdf3..cec25da 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -106,10 +106,11 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * } } -int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma) +int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma) { extern int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun); - int i, r, skip[4], n = 0; + int64_t l_pac = bns->l_pac; + int i, r, skip[4], n = 0, rid; for (r = 0; r < 4; ++r) skip[r] = pes[r].failed? 1 : 0; for (i = 0; i < ma->n; ++i) { // check which orinentation has been found @@ -122,7 +123,7 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me for (r = 0; r < 4; ++r) { int is_rev, is_larger; uint8_t *seq, *rev = 0, *ref; - int64_t rb, re, len; + int64_t rb, re; if (skip[r]) continue; is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate is_larger = !(r>>1); // whether the mate has larger coordinate @@ -140,14 +141,15 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me } if (rb < 0) rb = 0; if (re > l_pac<<1) re = l_pac<<1; - ref = bns_get_seq(l_pac, pac, rb, re, &len); - if (len == re - rb) { // no funny things happening + ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid); + if (a->rid == rid) { // no funny things happening kswr_t aln; mem_alnreg_t b; int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); - aln = ksw_align2(l_ms, seq, len, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); + aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); memset(&b, 0, sizeof(mem_alnreg_t)); if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 + b.rid = a->rid; b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb; b.qe = is_rev? l_ms - aln.qb : aln.qe + 1; b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb; @@ -258,7 +260,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co kv_push(mem_alnreg_t, b[i], a[i].a[j]); for (i = 0; i < 2; ++i) for (j = 0; j < b[i].n && j < opt->max_matesw; ++j) - n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); + n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); free(b[0].a); free(b[1].a); } mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0); diff --git a/main.c b/main.c index 2993fc0..fdd223d 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+layout-r477" +#define PACKAGE_VERSION "0.7.8+layout-r478" #endif int bwa_fa2pac(int argc, char *argv[]);