From 417c6d66c714d47e1243d7502fde4864499acbe1 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 31 Mar 2014 10:52:45 -0400 Subject: [PATCH] dev-r451: fixed a few bugs when -A!=1 Something is still wrong. --- bwa.1 | 19 +++++++++++++------ bwamem_pair.c | 11 +++++++---- main.c | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/bwa.1 b/bwa.1 index 601a529..47bdd61 100644 --- a/bwa.1 +++ b/bwa.1 @@ -1,4 +1,4 @@ -.TH bwa 1 "25 Feburary 2014" "bwa-0.7.7" "Bioinformatics tools" +.TH bwa 1 "31 March 2014" "bwa-0.7.8" "Bioinformatics tools" .SH NAME .PP bwa - Burrows-Wheeler Alignment Tool @@ -148,9 +148,10 @@ not work with split alignments. One may consider to use option .B -M to flag shorter split hits as secondary. -.B OPTIONS: .RS .TP 10 +.B ALGORITHM OPTIONS: +.TP .BI -t \ INT Number of threads [1] .TP @@ -202,11 +203,14 @@ Matching score. [1] .BI -B \ INT Mismatch penalty. The sequence error rate is approximately: {.75 * exp[-log(4) * B/A]}. [4] .TP -.BI -O \ INT -Gap open penalty. [6] +.BI -O \ INT[,INT] +Gap open penalty. If two numbers are specified, the first is the penalty of +openning a deletion and the second for openning an insertion. [6] .TP -.BI -E \ INT -Gap extension penalty. A gap of length k costs O + k*E (i.e. +.BI -E \ INT[,INT] +Gap extension penalty. If two numbers are specified, the first is the penalty +of extending a deletion and second for extending an insertion. A gap of length +k costs O + k*E (i.e. .B -O is for opening a zero-length gap). [1] .TP @@ -224,6 +228,9 @@ Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these two scores to determine whether we should force pairing. A larger value leads to more aggressive read pair. [17] + +.TP +.B INPUT/OUTPUT OPTIONS: .TP .B -p Assume the first input query file is interleaved paired-end FASTA/Q. See the diff --git a/bwamem_pair.c b/bwamem_pair.c index 4c9c3de..b240f42 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -144,7 +144,7 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me if (len == re - rb) { // no funny things happening kswr_t aln; mem_alnreg_t b; - int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | opt->min_seed_len; + int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); aln = ksw_align2(l_ms, seq, len, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); memset(&b, 0, sizeof(mem_alnreg_t)); if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 @@ -235,6 +235,8 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_ return ret; } +#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499)) + int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]) { extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); @@ -276,10 +278,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired; //q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0; subo = subo > score_un? subo : score_un; - q_pe = (o - subo) * 6; + q_pe = raw_mapq(o - subo, opt->a); if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499); if (q_pe < 0) q_pe = 0; if (q_pe > 60) q_pe = 60; + //printf("[1] %ld, %d, %d\n", (long)id, q_pe, n_sub); // the following assumes no split hits if (o > score_un) { // paired alignment is preferred mem_alnreg_t *c[2]; @@ -293,8 +296,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co q_se[1] = q_se[1] > q_pe? q_se[1] : q_pe < q_se[1] + 40? q_pe : q_se[1] + 40; extra_flag |= 2; // cap at the tandem repeat score - q_se[0] = q_se[0] < (c[0]->score - c[0]->csub) * 6? q_se[0] : (c[0]->score - c[0]->csub) * 6; - q_se[1] = q_se[1] < (c[1]->score - c[1]->csub) * 6? q_se[1] : (c[1]->score - c[1]->csub) * 6; + q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a)? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a); + q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a)? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a); } else { // the unpaired alignment is preferred z[0] = z[1] = 0; q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]); diff --git a/main.c b/main.c index 43178dc..3a8811e 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.7+dev-r450" +#define PACKAGE_VERSION "0.7.7+dev-r451" #endif int bwa_fa2pac(int argc, char *argv[]);