r295: generate NM

This commit is contained in:
Heng Li 2013-02-26 13:36:01 -05:00
parent 32f2d60a2e
commit 98787f0ae0
5 changed files with 25 additions and 9 deletions

22
bwa.c
View File

@ -70,13 +70,13 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
*****************/
// Generate CIGAR when the alignment end points are known
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar)
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM)
{
uint32_t *cigar = 0;
uint8_t tmp, *rseq;
int i, w;
int64_t rlen;
*n_cigar = 0;
*n_cigar = 0; *NM = -1;
if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
if (re - rb != rlen) goto ret_gen_cigar; // possible if out of range
@ -95,6 +95,20 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
w += abs(rlen - l_query);
// NW alignment
*score = ksw_global(l_query, query, rlen, rseq, 5, mat, q, r, w, n_cigar, &cigar);
{// compute NM
int k, x, y, n_mm = 0, n_gap = 0;
for (k = 0, x = y = 0; k < *n_cigar; ++k) {
int op = cigar[k]&0xf;
int len = cigar[k]>>4;
if (op == 0) { // match
for (i = 0; i < len; ++i)
if (query[x + i] != rseq[y + i]) ++n_mm;
x += len; y += len;
} else if (op == 1) x += len, n_gap += len;
else if (op == 2) y += len, n_gap += len;
}
*NM = n_mm + n_gap;
}
if (rb >= l_pac) // reverse back query
for (i = 0; i < l_query>>1; ++i)
tmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;
@ -126,10 +140,10 @@ int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns,
}
}
if (mid >= 0) {
int i, score, n_cigar, y;
int i, score, n_cigar, y, NM;
uint32_t *cigar;
int64_t x;
cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar);
cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar, &NM);
for (i = 0, x = *rb, y = *qb; i < n_cigar; ++i) {
int op = cigar[i]&0xf, len = cigar[i]>>4;
if (op == 0) {

2
bwa.h
View File

@ -30,7 +30,7 @@ extern "C" {
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);
char *bwa_idx_infer_prefix(const char *hint);

View File

@ -496,7 +496,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, const bwahit_t *p_, int is_hard, const bwahit_t *m)
{
#define is_mapped(x) ((x)->rb >= 0 && (x)->rb < (x)->re && (x)->re <= bns->l_pac<<1)
int score, n_cigar, is_rev = 0, rid, mid, copy_mate = 0;
int score, n_cigar, is_rev = 0, rid, mid, copy_mate = 0, NM = -1;
uint32_t *cigar = 0;
int64_t pos;
bwahit_t ptmp, *p = &ptmp;
@ -519,7 +519,7 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
int sam_flag = p->flag&0xff; // the flag that will be outputed to SAM; it is not always the same as p->flag
if (p->flag&0x10000) sam_flag |= 0x100;
if (!copy_mate) {
cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar);
cigar = bwa_gen_cigar(mat, q, r, w, bns->l_pac, pac, p->qe - p->qb, (uint8_t*)&s->seq[p->qb], p->rb, p->re, &score, &n_cigar, &NM);
p->flag |= n_cigar == 0? 4 : 0; // FIXME: check why this may happen (this has already happened)
} else n_cigar = 0, cigar = 0;
pos = bns_depos(bns, p->rb < bns->l_pac? p->rb : p->re - 1, &is_rev);
@ -582,6 +582,7 @@ void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, cons
str->s[str->l] = 0;
} else kputc('*', str);
}
if (NM >= 0) { kputsn("\tNM:i:", 6, str); kputw(NM, str); }
if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); }
if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); }
if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); }

View File

@ -47,6 +47,7 @@ int main_mem(int argc, char *argv[])
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak
} else if (c == 's') opt->split_width = atoi(optarg);
}
if (opt->n_threads < 1) opt->n_threads = 1;
if (optind + 1 >= argc) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n\n");
@ -94,7 +95,7 @@ int main_mem(int argc, char *argv[])
opt->flag |= MEM_F_PE;
}
}
while ((seqs = bseq_read(opt->chunk_size * (ko2? 2 : 1), &n, ks, ks2)) != 0) {
while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
int64_t size = 0;
if (!copy_comment)
for (i = 0; i < n; ++i) {

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.6.2-r294-beta"
#define PACKAGE_VERSION "0.6.2-r295-beta"
#endif
static int usage()