From b5170e0efa26daf1695a8c4bf37b9d2d82ecc07c Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 24 Nov 2011 11:51:38 -0500 Subject: [PATCH] output the NM tag --- bwtsw2.h | 2 +- bwtsw2_aux.c | 29 ++++++++++++++++++++++++++--- main.c | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/bwtsw2.h b/bwtsw2.h index c156c31..89615b5 100644 --- a/bwtsw2.h +++ b/bwtsw2.h @@ -26,7 +26,7 @@ typedef struct { } bsw2hit_t; typedef struct { - int flag, nn, n_cigar, chr, pos, qual, mchr, mpos, pqual, isize; + int flag, nn, n_cigar, chr, pos, qual, mchr, mpos, pqual, isize, nm; uint32_t *cigar; } bsw2aux_t; diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index 400fcf2..c3f42bd 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -165,7 +165,7 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq, } /* generate CIGAR array(s) in b->cigar[] */ -static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], uint8_t *pac, bwtsw2_t *b) +static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b) { uint8_t *target; int i, matrix[25]; @@ -392,7 +392,28 @@ static int fix_cigar(const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *c return n_cigar; } -static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], uint8_t *pac, bwtsw2_t *b) +static int compute_nm(bsw2hit_t *p, int n_cigar, const uint32_t *cigar, const uint8_t *pac, const uint8_t *seq) +{ + int k, x, n_mm = 0, i, n_gap = 0; + bwtint_t y; + x = 0; y = p->k; + for (k = 0; k < n_cigar; ++k) { + int op = cigar[k]&0xf; + int len = cigar[k]>>4; + if (op == 0) { // match + for (i = 0; i < len; ++i) { + int ref = pac[(y+i)>>2] >> (~(y+i)&3)*2 & 0x3; + if (seq[x + i] != ref) ++n_mm; + } + x += len; y += len; + } else if (op == 1) x += len, n_gap += len; + else if (op == 2) y += len, n_gap += len; + else if (op == 4) x += len; + } + return n_mm + n_gap; +} + +static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b) { int i; // allocate for b->aux @@ -415,6 +436,8 @@ static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8 int subo; // fix out-of-boundary CIGAR q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar); + // compute the NM tag + q->nm = compute_nm(p, q->n_cigar, q->cigar, pac, seq[p->is_rev]); // compute mapQ subo = p->G2 > opt->t? p->G2 : opt->t; if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5; @@ -517,7 +540,7 @@ static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks } } else ksprintf(&str, "\t*"); // print optional tags - ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds); + ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d\tNM:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds, q->nm); if (q->nn) ksprintf(&str, "\tXN:i:%d", q->nn); if (p->l) ksprintf(&str, "\tXI:i:%d", p->l - p->k + 1); if (p->flag&BSW2_FLAG_MATESW) type |= 1; diff --git a/main.c b/main.c index 69379ed..c1d9864 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.6.0-r97-dev" +#define PACKAGE_VERSION "0.6.0-r100-dev" #endif static int usage()