bwa-sw: use bwa_gen_cigar() for cigar generation

This commit is contained in:
Heng Li 2013-03-05 09:54:49 -05:00
parent 07921659cf
commit 086c9d0e7d
1 changed files with 11 additions and 45 deletions

View File

@ -169,33 +169,22 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq,
} }
/* generate CIGAR array(s) in b->cigar[] */ /* generate CIGAR array(s) in b->cigar[] */
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name) static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], int64_t l_pac, const uint8_t *pac, bwtsw2_t *b, const char *name)
{ {
uint8_t *target; int i;
int i, matrix[25]; int8_t mat[25];
AlnParam par;
path_t *path;
par.matrix = matrix; bwa_fill_scmat(opt->a, opt->b, mat);
__gen_ap(par, opt);
i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
target = calloc(i, 1);
path = calloc(i + lq, sizeof(path_t));
// generate CIGAR
for (i = 0; i < b->n; ++i) { for (i = 0; i < b->n; ++i) {
bsw2hit_t *p = b->hits + i; bsw2hit_t *p = b->hits + i;
bsw2aux_t *q = b->aux + i; bsw2aux_t *q = b->aux + i;
uint8_t *query; uint8_t *query;
bwtint_t k; int beg, end, score;
int path_len, beg, end;
if (p->l) continue; if (p->l) continue;
beg = (p->flag & 0x10)? lq - p->end : p->beg; beg = (p->flag & 0x10)? lq - p->end : p->beg;
end = (p->flag & 0x10)? lq - p->beg : p->end; end = (p->flag & 0x10)? lq - p->beg : p->end;
query = seq[(p->flag & 0x10)? 1 : 0] + beg; query = seq[(p->flag & 0x10)? 1 : 0] + beg;
for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here q->cigar = bwa_gen_cigar(mat, opt->q, opt->r, opt->bw, l_pac, pac, end - beg, query, p->k, p->k + p->len, &score, &q->n_cigar, &q->nm);
target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
#if 0 #if 0
if (name && score != p->G) { // debugging only if (name && score != p->G) { // debugging only
int j, glen = 0; int j, glen = 0;
@ -206,7 +195,7 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw); __func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
} }
#endif #endif
if (beg != 0 || end < lq) { // write soft clipping if (q->cigar && (beg != 0 || end < lq)) { // write soft clipping
q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2)); q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
if (beg != 0) { if (beg != 0) {
memmove(q->cigar + 1, q->cigar, q->n_cigar * 4); memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
@ -219,7 +208,6 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
} }
} }
} }
free(target); free(path);
} }
/* this is for the debugging purpose only */ /* this is for the debugging purpose only */
@ -407,27 +395,6 @@ static int fix_cigar(const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *c
return n_cigar; return n_cigar;
} }
static int compute_nm(bsw2hit_t *p, int n_cigar, const uint32_t *cigar, const uint8_t *pac, const uint8_t *seq)
{
int k, x, n_mm = 0, i, n_gap = 0;
bwtint_t y;
x = 0; y = p->k;
for (k = 0; k < n_cigar; ++k) {
int op = cigar[k]&0xf;
int len = cigar[k]>>4;
if (op == 0) { // match
for (i = 0; i < len; ++i) {
int ref = pac[(y+i)>>2] >> (~(y+i)&3)*2 & 0x3;
if (seq[x + i] != ref) ++n_mm;
}
x += len; y += len;
} else if (op == 1) x += len, n_gap += len;
else if (op == 2) y += len, n_gap += len;
else if (op == 4) x += len;
}
return n_mm + n_gap;
}
static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name) static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
{ {
int i; int i;
@ -439,7 +406,7 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
} }
b->aux = calloc(b->n, sizeof(bsw2aux_t)); b->aux = calloc(b->n, sizeof(bsw2aux_t));
// generate CIGAR // generate CIGAR
gen_cigar(opt, qlen, seq, pac, b, name); gen_cigar(opt, qlen, seq, bns->l_pac, pac, b, name);
// fix CIGAR, generate mapQ, and write chromosomal position // fix CIGAR, generate mapQ, and write chromosomal position
for (i = 0; i < b->n; ++i) { for (i = 0; i < b->n; ++i) {
bsw2hit_t *p = &b->hits[i]; bsw2hit_t *p = &b->hits[i];
@ -451,8 +418,6 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
int subo; int subo;
// fix out-of-boundary CIGAR // fix out-of-boundary CIGAR
q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar); q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar);
// compute the NM tag
q->nm = compute_nm(p, q->n_cigar, q->cigar, pac, seq[p->is_rev]);
// compute mapQ // compute mapQ
subo = p->G2 > opt->t? p->G2 : opt->t; subo = p->G2 > opt->t? p->G2 : opt->t;
if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5; if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5;
@ -527,9 +492,10 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
bsw2aux_t *q = b->aux + i; bsw2aux_t *q = b->aux + i;
int j, beg, end, type = 0; int j, beg, end, type = 0;
// print mandatory fields before SEQ // print mandatory fields before SEQ
if (q->cigar == 0) q->flag |= 0x4;
ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0)); ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0));
ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1); ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1);
if (p->l == 0) { // not a repetitive hit if (p->l == 0 && q->cigar) { // not a repetitive hit
ksprintf(&str, "\t%d\t", q->pqual); ksprintf(&str, "\t%d\t", q->pqual);
for (k = 0; k < q->n_cigar; ++k) for (k = 0; k < q->n_cigar; ++k)
ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]); ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]);
@ -538,7 +504,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize); else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize);
// get the sequence begin and end // get the sequence begin and end
beg = 0; end = ks->l; beg = 0; end = ks->l;
if (opt->hard_clip) { if (opt->hard_clip && q->cigar) {
if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4; if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4;
if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4; if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4;
} }