From 417c6d66c714d47e1243d7502fde4864499acbe1 Mon Sep 17 00:00:00 2001
From: Heng Li <lh3@me.com>
Date: Mon, 31 Mar 2014 10:52:45 -0400
Subject: [PATCH] dev-r451: fixed a few bugs when -A!=1

Something is still wrong.
---
 bwa.1         | 19 +++++++++++++------
 bwamem_pair.c | 11 +++++++----
 main.c        |  2 +-
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/bwa.1 b/bwa.1
index 601a529..47bdd61 100644
--- a/bwa.1
+++ b/bwa.1
@@ -1,4 +1,4 @@
-.TH bwa 1 "25 Feburary 2014" "bwa-0.7.7" "Bioinformatics tools"
+.TH bwa 1 "31 March 2014" "bwa-0.7.8" "Bioinformatics tools"
 .SH NAME
 .PP
 bwa - Burrows-Wheeler Alignment Tool
@@ -148,9 +148,10 @@ not work with split alignments. One may consider to use option
 .B -M
 to flag shorter split hits as secondary.
 
-.B OPTIONS:
 .RS
 .TP 10
+.B ALGORITHM OPTIONS:
+.TP
 .BI -t \ INT
 Number of threads [1]
 .TP
@@ -202,11 +203,14 @@ Matching score. [1]
 .BI -B \ INT
 Mismatch penalty. The sequence error rate is approximately: {.75 * exp[-log(4) * B/A]}. [4]
 .TP
-.BI -O \ INT
-Gap open penalty. [6]
+.BI -O \ INT[,INT]
+Gap open penalty. If two numbers are specified, the first is the penalty of
+openning a deletion and the second for openning an insertion. [6]
 .TP
-.BI -E \ INT
-Gap extension penalty. A gap of length k costs O + k*E (i.e.
+.BI -E \ INT[,INT]
+Gap extension penalty. If two numbers are specified, the first is the penalty
+of extending a deletion and second for extending an insertion. A gap of length
+k costs O + k*E (i.e.
 .B -O
 is for opening a zero-length gap). [1]
 .TP
@@ -224,6 +228,9 @@ Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as
 and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these
 two scores to determine whether we should force pairing. A larger value leads to
 more aggressive read pair. [17]
+
+.TP
+.B INPUT/OUTPUT OPTIONS:
 .TP
 .B -p
 Assume the first input query file is interleaved paired-end FASTA/Q. See the
diff --git a/bwamem_pair.c b/bwamem_pair.c
index 4c9c3de..b240f42 100644
--- a/bwamem_pair.c
+++ b/bwamem_pair.c
@@ -144,7 +144,7 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
 		if (len == re - rb) { // no funny things happening
 			kswr_t aln;
 			mem_alnreg_t b;
-			int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | opt->min_seed_len;
+			int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
 			aln = ksw_align2(l_ms, seq, len, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
 			memset(&b, 0, sizeof(mem_alnreg_t));
 			if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
@@ -235,6 +235,8 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
 	return ret;
 }
 
+#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
+
 int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
 {
 	extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
@@ -276,10 +278,11 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 		score_un = a[0].a[0].score + a[1].a[0].score - opt->pen_unpaired;
 		//q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
 		subo = subo > score_un? subo : score_un;
-		q_pe = (o - subo) * 6;
+		q_pe = raw_mapq(o - subo, opt->a);
 		if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499);
 		if (q_pe < 0) q_pe = 0;
 		if (q_pe > 60) q_pe = 60;
+		//printf("[1] %ld, %d, %d\n", (long)id, q_pe, n_sub);
 		// the following assumes no split hits
 		if (o > score_un) { // paired alignment is preferred
 			mem_alnreg_t *c[2];
@@ -293,8 +296,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 			q_se[1] = q_se[1] > q_pe? q_se[1] : q_pe < q_se[1] + 40? q_pe : q_se[1] + 40;
 			extra_flag |= 2;
 			// cap at the tandem repeat score
-			q_se[0] = q_se[0] < (c[0]->score - c[0]->csub) * 6? q_se[0] : (c[0]->score - c[0]->csub) * 6;
-			q_se[1] = q_se[1] < (c[1]->score - c[1]->csub) * 6? q_se[1] : (c[1]->score - c[1]->csub) * 6;
+			q_se[0] = q_se[0] < raw_mapq(c[0]->score - c[0]->csub, opt->a)? q_se[0] : raw_mapq(c[0]->score - c[0]->csub, opt->a);
+			q_se[1] = q_se[1] < raw_mapq(c[1]->score - c[1]->csub, opt->a)? q_se[1] : raw_mapq(c[1]->score - c[1]->csub, opt->a);
 		} else { // the unpaired alignment is preferred
 			z[0] = z[1] = 0;
 			q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
diff --git a/main.c b/main.c
index 43178dc..3a8811e 100644
--- a/main.c
+++ b/main.c
@@ -4,7 +4,7 @@
 #include "utils.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.7+dev-r450"
+#define PACKAGE_VERSION "0.7.7+dev-r451"
 #endif
 
 int bwa_fa2pac(int argc, char *argv[]);