dev-472: get rid of bwa_fix_xref()

This function causes all kinds of problems when the reference genome consists
of many short reads/contigs/chromsomes. Some of the problems are nearly
unfixable at the point where bwa_fix_xref() gets called. This commit attempts
to fix the problem at the root. It disallows chains spanning multiple contigs
and never retrieves sequences bridging two adjacent contigs. Thus all the
chaining, extension, SW and global alignments are confined to on contig only.

This commit brings many changes. I have tested it on a couple examples
including Peter Field's PacBio example. It works well so far.
This commit is contained in:
Heng Li 2014-04-10 20:54:27 -04:00
parent 23e0e99ec0
commit 8638cfadc8
9 changed files with 79 additions and 110 deletions

View File

@ -329,6 +329,15 @@ int bns_pos2rid(const bntseq_t *bns, int64_t pos_f)
return mid;
}
int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re)
{
int is_rev, rid_b, rid_e;
if (rb < bns->l_pac && re > bns->l_pac) return -2;
rid_b = bns_pos2rid(bns, bns_depos(bns, rb, &is_rev));
rid_e = bns_pos2rid(bns, bns_depos(bns, re, &is_rev) - 1);
return rid_b == rid_e? rid_b : -1;
}
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)
{
int left, mid, right, nn;
@ -374,3 +383,26 @@ uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end
} else *len = 0; // if bridging the forward-reverse boundary, return nothing
return seq;
}
uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid)
{
int64_t far_beg, far_end, len;
int is_rev;
uint8_t *seq;
if (*end < *beg) *end ^= *beg, *beg ^= *end, *end ^= *beg; // if end is smaller, swap
assert(*beg <= mid && mid < *end);
*rid = bns_pos2rid(bns, bns_depos(bns, mid, &is_rev));
far_beg = bns->anns[*rid].offset;
far_end = far_beg + bns->anns[*rid].len;
if (is_rev) { // flip to the reverse strand
int64_t tmp = far_beg;
far_beg = (bns->l_pac<<1) - 1 - far_end;
far_end = (bns->l_pac<<1) - 1 - tmp;
}
*beg = *beg > far_beg? *beg : far_beg;
*end = *end < far_end? *end : far_end;
seq = bns_get_seq(bns->l_pac, pac, *beg, *end, &len);
assert(seq && *end - *beg == len); // assertion failure should never happen
return seq;
}

View File

@ -28,6 +28,7 @@
#ifndef BWT_BNTSEQ_H
#define BWT_BNTSEQ_H
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <zlib.h>
@ -75,6 +76,8 @@ extern "C" {
int bns_pos2rid(const bntseq_t *bns, int64_t pos_f);
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len);
uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid);
int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re);
#ifdef __cplusplus
}

51
bwa.c
View File

@ -176,57 +176,6 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
return bwa_gen_cigar2(mat, q, r, q, r, w_, l_pac, pac, l_query, query, rb, re, score, n_cigar, NM);
}
int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
{
int is_rev, ori_ql = *qe - *qb;
int64_t cb, ce, fm, ori_rl = *re - *rb;
bntann1_t *ra;
assert(ori_ql > 0 && ori_rl > 0);
if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here
*qb = *qe = *rb = *re = -1;
return -1; // unable to fix
}
fm = bns_depos(bns, (*rb + *re) >> 1, &is_rev); // coordinate of the middle point on the forward strand
ra = &bns->anns[bns_pos2rid(bns, fm)]; // annotation of chr corresponding to the middle point
cb = is_rev? (bns->l_pac<<1) - (ra->offset + ra->len) : ra->offset; // chr start on the mapping strand
ce = cb + ra->len; // chr end
if (cb > *rb || ce < *re) { // fix is needed
int i, score, n_cigar, y, NM;
uint32_t *cigar;
int64_t x;
cb = cb > *rb? cb : *rb;
ce = ce < *re? ce : *re;
cigar = bwa_gen_cigar2(mat, o_del, e_del, o_ins, e_ins, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar, &NM);
for (i = 0, x = *rb, y = *qb; i < n_cigar; ++i) {
int op = cigar[i]&0xf, len = cigar[i]>>4;
if (op == 0) {
if (x <= cb && cb < x + len)
*qb = y + (cb - x), *rb = cb;
if (x < ce && ce <= x + len) {
*qe = y + (ce - x), *re = ce;
break;
} else x += len, y += len;
} else if (op == 1) {
y += len;
} else if (op == 2) {
if (x <= cb && cb < x + len)
*qb = y, *rb = x + len;
if (x < ce && ce <= x + len) {
*qe = y, *re = x;
break;
} else x += len;
} else abort(); // should not be here
}
free(cigar);
}
return (*qe - *qb < .33 * ori_ql || *re - *rb < .33 * ori_rl)? -2 : 0;
}
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
{
return bwa_fix_xref2(mat, q, r, q, r, w, bns, pac, query, qb, qe, rb, re);
}
/*********************
* Full index reader *
*********************/

2
bwa.h
View File

@ -33,8 +33,6 @@ extern "C" {
void bwa_fill_scmat(int a, int b, int8_t mat[25]);
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);
int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);
char *bwa_idx_infer_prefix(const char *hint);
bwt_t *bwa_idx_load_bwt(const char *hint);

View File

@ -143,11 +143,11 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
typedef struct {
int64_t rbeg;
int32_t qbeg, len;
} mem_seed_t;
} mem_seed_t; // unaligned memory
typedef struct {
int n, m, first;
uint32_t w:30, kept:2;
int n, m, first, rid;
int w, kept;
int64_t pos;
mem_seed_t *seeds;
} mem_chain_t;
@ -160,12 +160,13 @@ typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
KBTREE_INIT(chn, mem_chain_t, chain_cmp)
// return 1 if the seed is merged into the chain
static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p)
static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid)
{
int64_t qend, rend, x, y;
const mem_seed_t *last = &c->seeds[c->n-1];
qend = last->qbeg + last->len;
rend = last->rbeg + last->len;
if (seed_rid != c->rid) return 0; // different chr; request a new chain
if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend)
return 1; // contained seed; do nothing
if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand
@ -220,9 +221,10 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
}
}
mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int len, const uint8_t *seq)
mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, int len, const uint8_t *seq)
{
int i;
int64_t l_pac = bns->l_pac;
mem_chain_v chain;
kbtree_t(chn) *tree;
smem_aux_t *aux;
@ -241,19 +243,21 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int
for (k = 0; k < p->x[2]; ++k) {
mem_chain_t tmp, *lower, *upper;
mem_seed_t s;
int to_add = 0;
int rid, to_add = 0;
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
s.qbeg = p->info>>32;
s.len = slen;
if (s.rbeg < l_pac && l_pac < s.rbeg + s.len) continue; // bridging forward-reverse boundary; skip
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
if (rid < 0) continue; // bridging multiple reference sequences or the forward-reverse boundary
if (kb_size(tree)) {
kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain
if (!lower || !test_and_merge(opt, l_pac, lower, &s)) to_add = 1;
if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) to_add = 1;
} else to_add = 1;
if (to_add) { // add the seed as a new chain
tmp.n = 1; tmp.m = 4;
tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t));
tmp.seeds[0] = s;
tmp.rid = rid;
kb_putp(chn, tree, &tmp);
}
}
@ -473,11 +477,11 @@ void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t i
* low-divergence sequences, more testing is needed. For now, I only recommend
* to use mem_test_chain_sw() for PacBio data. It is disabled by default.
*/
int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c)
int mem_test_chain_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c)
{
int i, qb, qe;
int i, qb, qe, rid;
int min_HSP_score = (int)(opt->min_chain_weight * opt->a * MEM_HSP_COEF + .499);
int64_t rb, re, rlen;
int64_t rb, re, l_pac = bns->l_pac;
uint8_t *rseq = 0;
kswr_t x;
@ -505,8 +509,8 @@ int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, i
if (qe - qb >= opt->w * 4 || re - rb >= opt->w * 4) return 1;
if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return 1;
rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
assert(rlen == re - rb);
rseq = bns_fetch_seq(bns, pac, &rb, c->seeds[0].rbeg, &re, &rid);
assert(c->rid == rid);
x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0);
free(rseq);
if (x.score >= min_HSP_score) return 1;
@ -514,10 +518,10 @@ int mem_test_chain_sw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, i
return 0;
}
int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
int mem_chain2aln_short(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
{
int i, qb, qe, xtra;
int64_t rb, re, rlen;
int i, qb, qe, xtra, rid;
int64_t rb, re, l_pac = bns->l_pac;
uint8_t *rseq = 0;
mem_alnreg_t a;
kswr_t x;
@ -547,8 +551,8 @@ int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac,
if (qe - qb >= opt->w * 4 || re - rb >= opt->w * 4) return 1;
if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return 1;
rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
assert(rlen == re - rb);
rseq = bns_fetch_seq(bns, pac, &rb, c->seeds[0].rbeg, &re, &rid);
assert(c->rid == rid);
xtra = KSW_XSUBO | KSW_XSTART | ((qe - qb) * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
free(rseq);
@ -556,6 +560,7 @@ int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac,
a.qb = qb + x.qb; a.qe = qb + x.qe + 1;
a.score = x.score;
a.csub = x.score2;
a.rid = c->rid;
if (bwa_verbose >= 4) printf("** Attempted alignment via mem_chain2aln_short(): [%d,%d) <=> [%ld,%ld); score=%d; %d/%d\n", a.qb, a.qe, (long)a.rb, (long)a.re, x.score, a.qe-a.qb, qe-qb);
if (x.tb < MEM_SHORT_EXT>>1 || x.te > re - rb - (MEM_SHORT_EXT>>1)) return 1;
kv_push(mem_alnreg_t, *av, a);
@ -571,10 +576,10 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
return l < opt->w<<1? l : opt->w<<1;
}
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
{
int i, k, max_off[2], aw[2]; // aw: actual bandwidth used in extension
int64_t rlen, rmax[2], tmp, max = 0;
int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension
int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0;
const mem_seed_t *s;
uint8_t *rseq = 0;
uint64_t *srt;
@ -598,8 +603,8 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
else rmax[0] = l_pac;
}
// retrieve the reference sequence
rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen);
assert(rlen == rmax[1] - rmax[0]);
rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid);
assert(c->rid == rid);
srt = malloc(c->n * 8);
for (i = 0; i < c->n; ++i)
@ -649,6 +654,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
memset(a, 0, sizeof(mem_alnreg_t));
a->w = aw[0] = aw[1] = opt->w;
a->score = a->truesc = -1;
a->rid = c->rid;
if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg);
if (s->qbeg) { // left extension
@ -939,7 +945,7 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so
seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]];
chn = mem_chain(opt, bwt, bns->l_pac, l_seq, (uint8_t*)seq);
chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t*)seq);
chn.n = mem_chain_flt(opt, chn.n, chn.a);
if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
@ -948,9 +954,9 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
mem_chain_t *p = &chn.a[i];
int ret;
if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i);
if (opt->min_chain_weight > 0) ret = mem_test_chain_sw(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p);
else ret = mem_chain2aln_short(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, &regs);
if (ret > 0) mem_chain2aln(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, &regs);
if (opt->min_chain_weight > 0) ret = mem_test_chain_sw(opt, bns, pac, l_seq, (uint8_t*)seq, p);
else ret = mem_chain2aln_short(opt, bns, pac, l_seq, (uint8_t*)seq, p, &regs);
if (ret > 0) mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, &regs);
free(chn.a[i].seeds);
}
free(chn.a);
@ -986,11 +992,6 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment
if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) {
if (bwa_verbose >= 2 && name)
fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, name);
goto err_reg2aln;
}
tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del);
w2 = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins);
w2 = w2 > tmp? w2 : tmp;
@ -1005,11 +1006,6 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
last_sc = score;
w2 <<= 1;
} while (++i < 3 && score < ar->truesc - opt->a);
if (score < 0) {
if (bwa_verbose >= 2 && name)
fprintf(stderr, "[W::%s] A hit to read '%s' has been dropped.\n", __func__, name);
goto err_reg2aln;
}
l_MD = strlen((char*)(a.cigar + a.n_cigar)) + 1;
a.NM = NM;
pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
@ -1044,13 +1040,6 @@ mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t
a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
free(query);
return a;
err_reg2aln:
free(a.cigar);
memset(&a, 0, sizeof(mem_aln_t));
a.rid = -1; a.pos = -1; a.flag |= 0x4;
free(query);
return a;
}
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)

View File

@ -52,6 +52,7 @@ typedef struct {
typedef struct {
int64_t rb, re; // [rb,re): reference sequence in the alignment
int qb, qe; // [qb,qe): query sequence in the alignment
int rid; // reference seq ID
int score; // best local SW score
int truesc; // actual score corresponding to the aligned region; possibly smaller than $score
int sub; // 2nd best SW score

View File

@ -88,11 +88,6 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
const mem_alnreg_t *p = &a->a[i];
int is_rev, rid, qb = p->qb, qe = p->qe;
int64_t pos, rb = p->rb, re = p->re;
if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)s->seq, &qb, &qe, &rb, &re) < 0) {
if (bwa_verbose >= 2)
fprintf(stderr, "[W::%s] A cross-chr hit of read '%s' has been dropped.\n", __func__, s->name);
continue;
}
pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
rid = bns_pos2rid(bns, pos);
pos -= bns->anns[rid].offset;

View File

@ -106,10 +106,11 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
}
}
int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma)
int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma)
{
extern int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun);
int i, r, skip[4], n = 0;
int64_t l_pac = bns->l_pac;
int i, r, skip[4], n = 0, rid;
for (r = 0; r < 4; ++r)
skip[r] = pes[r].failed? 1 : 0;
for (i = 0; i < ma->n; ++i) { // check which orinentation has been found
@ -122,7 +123,7 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
for (r = 0; r < 4; ++r) {
int is_rev, is_larger;
uint8_t *seq, *rev = 0, *ref;
int64_t rb, re, len;
int64_t rb, re;
if (skip[r]) continue;
is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate
is_larger = !(r>>1); // whether the mate has larger coordinate
@ -140,14 +141,15 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
}
if (rb < 0) rb = 0;
if (re > l_pac<<1) re = l_pac<<1;
ref = bns_get_seq(l_pac, pac, rb, re, &len);
if (len == re - rb) { // no funny things happening
ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid);
if (a->rid == rid) { // no funny things happening
kswr_t aln;
mem_alnreg_t b;
int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
aln = ksw_align2(l_ms, seq, len, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
memset(&b, 0, sizeof(mem_alnreg_t));
if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
b.rid = a->rid;
b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb;
b.qe = is_rev? l_ms - aln.qb : aln.qe + 1;
b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb;
@ -258,7 +260,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
kv_push(mem_alnreg_t, b[i], a[i].a[j]);
for (i = 0; i < 2; ++i)
for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
free(b[0].a); free(b[1].a);
}
mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.8+dev-r471"
#define PACKAGE_VERSION "0.7.8+dev-r472"
#endif
int bwa_fa2pac(int argc, char *argv[]);