bwa-sw: use bwa_gen_cigar() for cigar generation
This commit is contained in:
parent
07921659cf
commit
086c9d0e7d
56
bwtsw2_aux.c
56
bwtsw2_aux.c
|
|
@ -169,33 +169,22 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generate CIGAR array(s) in b->cigar[] */
|
/* generate CIGAR array(s) in b->cigar[] */
|
||||||
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
|
static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], int64_t l_pac, const uint8_t *pac, bwtsw2_t *b, const char *name)
|
||||||
{
|
{
|
||||||
uint8_t *target;
|
int i;
|
||||||
int i, matrix[25];
|
int8_t mat[25];
|
||||||
AlnParam par;
|
|
||||||
path_t *path;
|
|
||||||
|
|
||||||
par.matrix = matrix;
|
bwa_fill_scmat(opt->a, opt->b, mat);
|
||||||
__gen_ap(par, opt);
|
|
||||||
i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
|
|
||||||
target = calloc(i, 1);
|
|
||||||
path = calloc(i + lq, sizeof(path_t));
|
|
||||||
// generate CIGAR
|
|
||||||
for (i = 0; i < b->n; ++i) {
|
for (i = 0; i < b->n; ++i) {
|
||||||
bsw2hit_t *p = b->hits + i;
|
bsw2hit_t *p = b->hits + i;
|
||||||
bsw2aux_t *q = b->aux + i;
|
bsw2aux_t *q = b->aux + i;
|
||||||
uint8_t *query;
|
uint8_t *query;
|
||||||
bwtint_t k;
|
int beg, end, score;
|
||||||
int path_len, beg, end;
|
|
||||||
if (p->l) continue;
|
if (p->l) continue;
|
||||||
beg = (p->flag & 0x10)? lq - p->end : p->beg;
|
beg = (p->flag & 0x10)? lq - p->end : p->beg;
|
||||||
end = (p->flag & 0x10)? lq - p->beg : p->end;
|
end = (p->flag & 0x10)? lq - p->beg : p->end;
|
||||||
query = seq[(p->flag & 0x10)? 1 : 0] + beg;
|
query = seq[(p->flag & 0x10)? 1 : 0] + beg;
|
||||||
for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
|
q->cigar = bwa_gen_cigar(mat, opt->q, opt->r, opt->bw, l_pac, pac, end - beg, query, p->k, p->k + p->len, &score, &q->n_cigar, &q->nm);
|
||||||
target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
|
|
||||||
aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
|
|
||||||
q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
|
|
||||||
#if 0
|
#if 0
|
||||||
if (name && score != p->G) { // debugging only
|
if (name && score != p->G) { // debugging only
|
||||||
int j, glen = 0;
|
int j, glen = 0;
|
||||||
|
|
@ -206,7 +195,7 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
|
||||||
__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
|
__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (beg != 0 || end < lq) { // write soft clipping
|
if (q->cigar && (beg != 0 || end < lq)) { // write soft clipping
|
||||||
q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
|
q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
|
||||||
if (beg != 0) {
|
if (beg != 0) {
|
||||||
memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
|
memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
|
||||||
|
|
@ -219,7 +208,6 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(target); free(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this is for the debugging purpose only */
|
/* this is for the debugging purpose only */
|
||||||
|
|
@ -407,27 +395,6 @@ static int fix_cigar(const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *c
|
||||||
return n_cigar;
|
return n_cigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int compute_nm(bsw2hit_t *p, int n_cigar, const uint32_t *cigar, const uint8_t *pac, const uint8_t *seq)
|
|
||||||
{
|
|
||||||
int k, x, n_mm = 0, i, n_gap = 0;
|
|
||||||
bwtint_t y;
|
|
||||||
x = 0; y = p->k;
|
|
||||||
for (k = 0; k < n_cigar; ++k) {
|
|
||||||
int op = cigar[k]&0xf;
|
|
||||||
int len = cigar[k]>>4;
|
|
||||||
if (op == 0) { // match
|
|
||||||
for (i = 0; i < len; ++i) {
|
|
||||||
int ref = pac[(y+i)>>2] >> (~(y+i)&3)*2 & 0x3;
|
|
||||||
if (seq[x + i] != ref) ++n_mm;
|
|
||||||
}
|
|
||||||
x += len; y += len;
|
|
||||||
} else if (op == 1) x += len, n_gap += len;
|
|
||||||
else if (op == 2) y += len, n_gap += len;
|
|
||||||
else if (op == 4) x += len;
|
|
||||||
}
|
|
||||||
return n_mm + n_gap;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
|
static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -439,7 +406,7 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
|
||||||
}
|
}
|
||||||
b->aux = calloc(b->n, sizeof(bsw2aux_t));
|
b->aux = calloc(b->n, sizeof(bsw2aux_t));
|
||||||
// generate CIGAR
|
// generate CIGAR
|
||||||
gen_cigar(opt, qlen, seq, pac, b, name);
|
gen_cigar(opt, qlen, seq, bns->l_pac, pac, b, name);
|
||||||
// fix CIGAR, generate mapQ, and write chromosomal position
|
// fix CIGAR, generate mapQ, and write chromosomal position
|
||||||
for (i = 0; i < b->n; ++i) {
|
for (i = 0; i < b->n; ++i) {
|
||||||
bsw2hit_t *p = &b->hits[i];
|
bsw2hit_t *p = &b->hits[i];
|
||||||
|
|
@ -451,8 +418,6 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8
|
||||||
int subo;
|
int subo;
|
||||||
// fix out-of-boundary CIGAR
|
// fix out-of-boundary CIGAR
|
||||||
q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar);
|
q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar);
|
||||||
// compute the NM tag
|
|
||||||
q->nm = compute_nm(p, q->n_cigar, q->cigar, pac, seq[p->is_rev]);
|
|
||||||
// compute mapQ
|
// compute mapQ
|
||||||
subo = p->G2 > opt->t? p->G2 : opt->t;
|
subo = p->G2 > opt->t? p->G2 : opt->t;
|
||||||
if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5;
|
if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5;
|
||||||
|
|
@ -527,9 +492,10 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
|
||||||
bsw2aux_t *q = b->aux + i;
|
bsw2aux_t *q = b->aux + i;
|
||||||
int j, beg, end, type = 0;
|
int j, beg, end, type = 0;
|
||||||
// print mandatory fields before SEQ
|
// print mandatory fields before SEQ
|
||||||
|
if (q->cigar == 0) q->flag |= 0x4;
|
||||||
ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0));
|
ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0));
|
||||||
ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1);
|
ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1);
|
||||||
if (p->l == 0) { // not a repetitive hit
|
if (p->l == 0 && q->cigar) { // not a repetitive hit
|
||||||
ksprintf(&str, "\t%d\t", q->pqual);
|
ksprintf(&str, "\t%d\t", q->pqual);
|
||||||
for (k = 0; k < q->n_cigar; ++k)
|
for (k = 0; k < q->n_cigar; ++k)
|
||||||
ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]);
|
ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]);
|
||||||
|
|
@ -538,7 +504,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks
|
||||||
else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize);
|
else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize);
|
||||||
// get the sequence begin and end
|
// get the sequence begin and end
|
||||||
beg = 0; end = ks->l;
|
beg = 0; end = ks->l;
|
||||||
if (opt->hard_clip) {
|
if (opt->hard_clip && q->cigar) {
|
||||||
if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4;
|
if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4;
|
||||||
if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4;
|
if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue