separate CIGAR generation
This commit is contained in:
parent
14e6a7bdb9
commit
a61288c768
42
bwamem.c
42
bwamem.c
|
|
@ -278,7 +278,7 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
|
|||
|
||||
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain1_t *c, mem_aln_t *a)
|
||||
{ // FIXME: in general, we SHOULD check funny seed patterns such as contained seeds. When that happens, we should use a SW or extend more seeds
|
||||
int i, j, qbeg, w, nw_score;
|
||||
int i, j, qbeg;
|
||||
int64_t rlen, rbeg, rmax[2], tmp;
|
||||
const mem_seed_t *s;
|
||||
uint8_t *rseq = 0;
|
||||
|
|
@ -344,16 +344,42 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
|
|||
if (s->qbeg + s->len > a->qe) a->is_all = 0;
|
||||
}
|
||||
|
||||
w = (int)((double)(l_query * opt->a - opt->q) / opt->r + 1.);
|
||||
w = w < opt->w? w : opt->w;
|
||||
w += abs((a->re - a->rb) - (a->qe - a->qb));
|
||||
nw_score = ksw_global(a->qe - a->qb, query + a->qb, a->re - a->rb, rseq + (a->rb - rmax[0]), 5, opt->mat, opt->q, opt->r, w, &a->n_cigar, &a->cigar);
|
||||
|
||||
//printf("[Q] "); for (i = a->qb; i < a->qe; ++i) putchar("ACGTN"[(int)query[i]]); putchar('\n');
|
||||
//printf("[R] "); for (i = a->rb; i < a->re; ++i) putchar("ACGTN"[(int)rseq[i - rmax[0]]]); putchar('\n');
|
||||
printf("[%d] score=%d,%d\t[%d,%d) <=> [%lld,%lld)\tis_all=%d\t", c->n, a->score, nw_score, a->qb, a->qe, a->rb, a->re, a->is_all);
|
||||
for (i = 0; i < a->n_cigar; ++i) printf("%d%c", a->cigar[i]>>4, "MIDS"[a->cigar[i]&0xf]);
|
||||
printf("[%d] score=%d\t[%d,%d) <=> [%lld,%lld)\tis_all=%d\t", c->n, a->score, a->qb, a->qe, a->rb, a->re, a->is_all);
|
||||
putchar('\n');
|
||||
|
||||
free(rseq);
|
||||
}
|
||||
|
||||
uint32_t *mem_gen_cigar(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar)
|
||||
{
|
||||
uint32_t *cigar = 0;
|
||||
uint8_t tmp, *rseq;
|
||||
int i, w;
|
||||
int64_t rlen;
|
||||
*n_cigar = 0;
|
||||
if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
|
||||
rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
|
||||
if (re - rb != rlen) goto ret_gen_cigar; // possible if out of range
|
||||
if (rb >= l_pac) { // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position
|
||||
for (i = 0; i < l_query>>1; ++i)
|
||||
tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;
|
||||
for (i = 0; i < rlen>>1; ++i)
|
||||
tmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], query[rlen - 1 - i] = tmp;
|
||||
}
|
||||
// set the band-width
|
||||
w = (int)((double)(l_query * opt->a - opt->q) / opt->r + 1.);
|
||||
w = w < 1? w : 1;
|
||||
w = w < opt->w? w : opt->w;
|
||||
w += abs(rlen - l_query);
|
||||
// NW alignment
|
||||
*score = ksw_global(l_query, query, rlen, rseq, 5, opt->mat, opt->q, opt->r, w, n_cigar, &cigar);
|
||||
if (rb >= l_pac) // reverse back query
|
||||
for (i = 0; i < l_query>>1; ++i)
|
||||
tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;
|
||||
|
||||
ret_gen_cigar:
|
||||
free(rseq);
|
||||
return cigar;
|
||||
}
|
||||
|
|
|
|||
3
bwamem.h
3
bwamem.h
|
|
@ -31,8 +31,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
int64_t pos, rb, re;
|
||||
int n_cigar, len, score, qb, qe, is_all;
|
||||
uint32_t *cigar;
|
||||
int len, score, qb, qe, is_all;
|
||||
} mem_aln_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ int main_mem(int argc, char *argv[])
|
|||
printf("\t%d,%d,%s:%c%ld", p->seeds[j].len, p->seeds[j].qbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||
}
|
||||
putchar('\n');
|
||||
free(a.cigar);
|
||||
}
|
||||
puts("//");
|
||||
for (i = 0; i < chain.n; ++i) free(chain.chains[i].seeds);
|
||||
|
|
|
|||
Loading…
Reference in New Issue