backup comments

2013-02-20 01:11:38 -05:00 · 2013-02-20 01:11:38 -05:00 · a7d574d125
parent 688872fb1b
commit a7d574d125
1 changed files with 22 additions and 1 deletions
--- a/bwamem.c
+++ b/bwamem.c
@ -26,11 +26,32 @@ void mem_fill_scmat(int a, int b, int8_t mat[25])
 	for (j = 0; j < 5; ++j) mat[k++] = 0;
 }

+/* Theory on probability and scoring *ungapped* alignment
+ *
+ * s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
+ * s'(a,a) = log(4), s'(a,b) = log(4e/3), where e is the error rate
+ *
+ * Scale s'(a,b) to s(a,a) s.t. s(a,a)=x. Then s(a,b) = x*s'(a,b)/log(4), or conversely: s'(a,b)=s(a,b)*log(4)/x
+ *
+ * If the matching score is x and mismatch penalty is -y, we can compute error rate e:
+ *   e = .75 * exp[-log(4) * y/x]
+ *
+ * log P(seq) = \sum_i log P(b_i|a_i) = \sum_i {s'(a,b) - log(4)}
+ *   = \sum_i { s(a,b)*log(4)/x - log(4) } = log(4) * (S/x - l)
+ *
+ * where S=\sum_i s(a,b) is the alignment score. Converting to the phred scale:
+ *   Q(seq) = -10/log(10) * log P(seq) = 10*log(4)/log(10) * (l - S/x) = 6.02 * (l - S/x)
+ *
+ *
+ * Gap open (zero gap): q' = log[P(gap-open)], r' = log[P(gap-ext)] (see Durbin et al. (1998) Section 4.1)
+ * Then q = x*log[P(gap-open)]/log(4), r = x*log[P(gap-ext)]/log(4)
+ */
+
 mem_opt_t *mem_opt_init()
 {
 	mem_opt_t *o;
 	o = calloc(1, sizeof(mem_opt_t));
-	o->a = 1; o->b = 5; o->q = 8; o->r = 1; o->w = 100;
+	o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 100;
 	o->flag = 0;
 	o->min_seed_len = 19;
 	o->split_width = 10;