code backup; more changes coming later
This commit is contained in:
parent
733410b50d
commit
59bc9341f6
28
bwamem.c
28
bwamem.c
|
|
@ -43,7 +43,7 @@ mem_opt_t *mem_opt_init()
|
|||
mem_opt_t *o;
|
||||
o = calloc(1, sizeof(mem_opt_t));
|
||||
o->flag = 0;
|
||||
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 100;
|
||||
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 60; o->max_w = 500;
|
||||
o->pen_unpaired = 9;
|
||||
o->pen_clip = 5;
|
||||
o->min_seed_len = 19;
|
||||
|
|
@ -492,7 +492,7 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
|
|||
|
||||
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
|
||||
{
|
||||
int i, k;
|
||||
int i, k, max_off[2], aw[2]; // aw: actual bandwidth used in extension
|
||||
int64_t rlen, rmax[2], tmp, max = 0;
|
||||
const mem_seed_t *s;
|
||||
uint8_t *rseq = 0;
|
||||
|
|
@ -549,16 +549,22 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
|
||||
a = kv_pushp(mem_alnreg_t, *av);
|
||||
memset(a, 0, sizeof(mem_alnreg_t));
|
||||
a->w = aw[0] = aw[1] = opt->w;
|
||||
|
||||
if (s->qbeg) { // left extension
|
||||
uint8_t *rs, *qs;
|
||||
int qle, tle, gtle, gscore;
|
||||
int qle, tle, gtle, gscore, tmps = -1;
|
||||
qs = malloc(s->qbeg);
|
||||
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
|
||||
tmp = s->rbeg - rmax[0];
|
||||
rs = malloc(tmp);
|
||||
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
||||
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, s->len * opt->a, &qle, &tle, >le, &gscore);
|
||||
for (aw[0] = opt->w;; aw[0] <<= 1) {
|
||||
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[0], max_off[0]); fflush(stdout);
|
||||
if (a->score == tmps || aw[0]<<1 > opt->max_w || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||
tmps = a->score;
|
||||
}
|
||||
// check whether we prefer to reach the end of the query
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; // local hits
|
||||
else a->qb = 0, a->rb = s->rbeg - gtle; // reach the end
|
||||
|
|
@ -566,16 +572,21 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||
|
||||
if (s->qbeg + s->len != l_query) { // right extension
|
||||
int qle, tle, qe, re, gtle, gscore;
|
||||
int qle, tle, qe, re, gtle, gscore, tmps = -1;
|
||||
qe = s->qbeg + s->len;
|
||||
re = s->rbeg + s->len - rmax[0];
|
||||
assert(re >= 0);
|
||||
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, &qle, &tle, >le, &gscore);
|
||||
for (aw[1] = opt->w;; aw[1] <<= 1) {
|
||||
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], a->score, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[1], max_off[1]); fflush(stdout);
|
||||
if (a->score == tmps || aw[1]<<1 > opt->max_w || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||
tmps = a->score;
|
||||
}
|
||||
// similar to the above
|
||||
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qe = qe + qle, a->re = rmax[0] + re + tle;
|
||||
else a->qe = l_query, a->re = rmax[0] + re + gtle;
|
||||
} else a->qe = l_query, a->re = s->rbeg + s->len;
|
||||
if (bwa_verbose >= 4) { printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
|
||||
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
|
||||
|
||||
// compute seedcov
|
||||
for (i = 0, a->seedcov = 0; i < c->n; ++i) {
|
||||
|
|
@ -583,6 +594,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
if (t->qbeg >= a->qb && t->qbeg + t->len <= a->qe && t->rbeg >= a->rb && t->rbeg + t->len <= a->re) // seed fully contained
|
||||
a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough
|
||||
}
|
||||
a->w = aw[0] > aw[1]? aw[0] : aw[1];
|
||||
}
|
||||
free(srt); free(rseq);
|
||||
}
|
||||
|
|
@ -750,7 +762,7 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
|
|||
h.qual = p->secondary >= 0? 0 : mem_approx_mapq_se(opt, p);
|
||||
if (k == 0) mapq0 = h.qual;
|
||||
else if (h.qual > mapq0) h.qual = mapq0;
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
||||
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, p->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
|
||||
}
|
||||
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP, m);
|
||||
s->sam = str.s;
|
||||
|
|
|
|||
2
bwamem.h
2
bwamem.h
|
|
@ -22,6 +22,7 @@ typedef struct {
|
|||
int pen_unpaired; // phred-scaled penalty for unpaired reads
|
||||
int pen_clip; // clipping penalty. This score is not deducted from the DP score.
|
||||
int w; // band width
|
||||
int max_w; // max band width
|
||||
|
||||
int flag; // see MEM_F_* macros
|
||||
int min_seed_len; // minimum seed length
|
||||
|
|
@ -45,6 +46,7 @@ typedef struct {
|
|||
int sub; // 2nd best SW score
|
||||
int csub; // SW score of a tandem hit
|
||||
int sub_n; // approximate number of suboptimal hits
|
||||
int w; // actual band width used in extension
|
||||
int seedcov; // length of regions coverged by seeds
|
||||
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
|
||||
} mem_alnreg_t;
|
||||
|
|
|
|||
13
ksw.c
13
ksw.c
|
|
@ -359,11 +359,11 @@ typedef struct {
|
|||
int32_t h, e;
|
||||
} eh_t;
|
||||
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore)
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
|
||||
{
|
||||
eh_t *eh; // score array
|
||||
int8_t *qp; // query profile
|
||||
int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap, max_ie, gscore;
|
||||
int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap, max_ie, gscore, max_off;
|
||||
if (h0 < 0) h0 = 0;
|
||||
// allocate memory
|
||||
qp = malloc(qlen * m);
|
||||
|
|
@ -386,6 +386,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
w = w < max_gap? w : max_gap;
|
||||
// DP loop
|
||||
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
|
||||
max_off = 0;
|
||||
beg = 0, end = qlen;
|
||||
for (i = 0; LIKELY(i < tlen); ++i) {
|
||||
int f = 0, h1, m = 0, mj = -1;
|
||||
|
|
@ -410,7 +411,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
h = h > e? h : e;
|
||||
h = h > f? h : f;
|
||||
h1 = h; // save H(i,j) to h1 for the next column
|
||||
mj = m > h? mj : j;
|
||||
mj = m > h? mj : j; // record the position where max score is achieved
|
||||
m = m > h? m : h; // m is stored at eh[mj+1]
|
||||
h -= gapoe;
|
||||
h = h > 0? h : 0;
|
||||
|
|
@ -426,7 +427,10 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
gscore = gscore > h1? gscore : h1;
|
||||
}
|
||||
if (m == 0) break;
|
||||
if (m > max) max = m, max_i = i, max_j = mj;
|
||||
if (m > max) {
|
||||
max = m, max_i = i, max_j = mj;
|
||||
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
|
||||
}
|
||||
// update beg and end for the next round
|
||||
for (j = mj; j >= beg && eh[j].h; --j);
|
||||
beg = j + 1;
|
||||
|
|
@ -439,6 +443,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
if (_tle) *_tle = max_i + 1;
|
||||
if (_gtle) *_gtle = max_ie + 1;
|
||||
if (_gscore) *_gscore = gscore;
|
||||
if (_max_off) *_max_off = max_off;
|
||||
return max;
|
||||
}
|
||||
|
||||
|
|
|
|||
2
ksw.h
2
ksw.h
|
|
@ -102,7 +102,7 @@ extern "C" {
|
|||
*
|
||||
* @return best semi-local alignment score
|
||||
*/
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *qle, int *tle, int *gtle, int *gscore);
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue