From bdf34f6ce7438bcc2d5ca3abce4f29930f93aef4 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 12 Mar 2013 09:56:04 -0400 Subject: [PATCH] r363: XA=>XP; output mapQ in XP In BWA, XA gives hits "shadowed" by the primary hit. In BWA-MEM, we output primary hits only. Primary hits may have non-zero mapping quality. --- bwa.1 | 6 ++++-- bwamem.c | 16 ++++++++++------ main.c | 2 +- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/bwa.1 b/bwa.1 index 8e90848..f01dce1 100644 --- a/bwa.1 +++ b/bwa.1 @@ -1,4 +1,4 @@ -.TH bwa 1 "9 March 2013" "bwa-0.7.2" "Bioinformatics tools" +.TH bwa 1 "13 March 2013" "bwa-0.7.3" "Bioinformatics tools" .SH NAME .PP bwa - Burrows-Wheeler Alignment Tool @@ -574,11 +574,13 @@ XM Number of mismatches in the alignment XO Number of gap opens XG Number of gap extentions XT Type: Unique/Repeat/N/Mate-sw -XA Alternative hits; format: (chr,pos,CIGAR,NM;)* +XA Alternative hits; format: /(chr,pos,CIGAR,NM;)*/ _ XS Suboptimal alignment score XF Support from forward/reverse alignment XE Number of supporting seeds +_ +XP Alt primary hits; format: /(chr,pos,CIGAR;mapQ,NM;)+/ .TE .PP diff --git a/bwamem.c b/bwamem.c index 924c97f..b6a32f7 100644 --- a/bwamem.c +++ b/bwamem.c @@ -703,20 +703,23 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); } if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); } if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); } - if (n > 1) { // output multiple primary hits - kputsn("\tXA:Z:", 6, str); + for (i = 0; i < n; ++i) + if (i != which && !(list[i].flag&0x20000)) break; // 0x20000: shadowed multi hit + if (i < n) { // there are other primary hits; output them + kputsn("\tXP:Z:", 6, str); for (i = 0; i < n; ++i) { const mem_aln_t *r = &list[i]; int k; - if (i == which) continue; + if (i == which || (list[i].flag&0x20000)) continue; // proceed if: 1) different from the current; 2) not shadowed multi hit kputs(bns->anns[r->rid].name, str); kputc(',', str); kputc("+-"[r->is_rev], str); kputl(r->pos+1, str); kputc(',', str); for (k = 0; k < r->n_cigar; ++k) { kputw(r->cigar[k]>>4, str); kputc("MIDSH"[r->cigar[k]&0xf], str); } - kputc(',', str); - kputw(r->NM, str); kputc(';', str); + kputc(',', str); kputw(r->mapq, str); + kputc(',', str); kputw(r->NM, str); + kputc(';', str); } } if (s->comment) { kputc('\t', str); kputs(s->comment, str); } @@ -833,7 +836,8 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t * query = malloc(l_query); for (i = 0; i < l_query; ++i) // convert to the nt4 encoding query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]]; - a.mapq = mem_approx_mapq_se(opt, ar); + a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0; + if (ar->secondary >= 0) a.flag |= 0x20000; bwa_fix_xref(opt->mat, opt->q, opt->r, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re); w2 = infer_bw(qe - qb, re - rb, ar->score, opt->a, opt->q, opt->r); w2 = w2 < opt->w? w2 : opt->w; diff --git a/main.c b/main.c index b749dfc..c5b84be 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.2-r362-beta" +#define PACKAGE_VERSION "0.7.2-r363-beta" #endif int bwa_fa2pac(int argc, char *argv[]);