Intermediate commit. Refactored some simple base manipulation stuff into BaseUtils.java. Generalized some likelihood computation logic to make future possible EM-ing easier.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@424 348d0f76-0448-11de-a6fe-93d51630548a
2009-04-15 04:18:07 +00:00 · 2009-04-15 04:18:07 +00:00 · 7949e377e4
parent d0b8d311e6
commit 7949e377e4
4 changed files with 198 additions and 133 deletions
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
@ -7,6 +7,7 @@ import cern.colt.matrix.DoubleFactory2D;
 import cern.colt.matrix.linalg.Algebra;

 import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.BaseUtils;

 import java.io.*;

@ -21,38 +22,46 @@ import java.io.*;
 * @author Kiran Garimella
 */
 public class BasecallingBaseModel {
-    private double[] counts;
-    private DoubleMatrix1D[] sums;
-    private DoubleMatrix2D[] unscaledCovarianceSums;
+    private double[][] counts;
+    private DoubleMatrix1D[][] sums;
+    private DoubleMatrix2D[][] unscaledCovarianceSums;

-    private DoubleMatrix1D[] means;
-    private DoubleMatrix2D[] inverseCovariances;
-    private double[] norms;
+    private DoubleMatrix1D[][] means;
+    private DoubleMatrix2D[][] inverseCovariances;
+    private double[][] norms;

    private cern.jet.math.Functions F = cern.jet.math.Functions.functions;
    private Algebra alg;

+    private boolean correctForContext = false;
+    private int numTheories = 1;
+
    private boolean readyToCall = false;

    /**
     * Constructor for BasecallingBaseModel
     */
-    public BasecallingBaseModel() {
-        counts = new double[4];
+    public BasecallingBaseModel(boolean correctForContext) {
+        this.correctForContext = correctForContext;
+        this.numTheories = (correctForContext) ? 4 : 1;
+        
+        counts = new double[this.numTheories][4];

-        sums = new DoubleMatrix1D[4];
-        unscaledCovarianceSums = new DoubleMatrix2D[4];
+        sums = new DoubleMatrix1D[this.numTheories][4];
+        unscaledCovarianceSums = new DoubleMatrix2D[this.numTheories][4];

-        means = new DoubleMatrix1D[4];
-        inverseCovariances = new DoubleMatrix2D[4];
-        norms = new double[4];
+        means = new DoubleMatrix1D[this.numTheories][4];
+        inverseCovariances = new DoubleMatrix2D[this.numTheories][4];
+        norms = new double[this.numTheories][4];

-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-            sums[baseCurIndex] = (DoubleFactory1D.dense).make(4);
-            unscaledCovarianceSums[baseCurIndex] = (DoubleFactory2D.dense).make(4, 4);
+        for (int basePrevIndex = 0; basePrevIndex < this.numTheories; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                sums[basePrevIndex][baseCurIndex] = (DoubleFactory1D.dense).make(4);
+                unscaledCovarianceSums[basePrevIndex][baseCurIndex] = (DoubleFactory2D.dense).make(4, 4);

-            means[baseCurIndex] = (DoubleFactory1D.dense).make(4);
-            inverseCovariances[baseCurIndex] = (DoubleFactory2D.dense).make(4, 4);
+                means[basePrevIndex][baseCurIndex] = (DoubleFactory1D.dense).make(4);
+                inverseCovariances[basePrevIndex][baseCurIndex] = (DoubleFactory2D.dense).make(4, 4);
+            }
        }

        alg = new Algebra();
@ -61,23 +70,25 @@ public class BasecallingBaseModel {
    /**
     * Add a single training point to the model to estimate the means.
     *
+     * @param basePrev       the previous cycle's base call (A, C, G, T)
     * @param baseCur        the current cycle's base call (A, C, G, T)
-     * @param qualCur        the quality score for the current cycle's base call
-     * @param fourintensity  the four intensities for the current cycle's base call
     */
-    public void addMeanPoint(char baseCur, byte qualCur, double[] fourintensity) {
-        int actualBaseCurIndex = baseToBaseIndex(baseCur);
+    public void addMeanPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
+        int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
+        int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
        double actualWeight = QualityUtils.qualToProb(qualCur);

-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-            // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
-            double weight = (baseCurIndex == actualBaseCurIndex) ? actualWeight : ((1.0 - actualWeight)/3.0);
+        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
+                double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));

-            DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity);
-            weightedChannelIntensities.assign(F.mult(weight));
+                DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity);
+                weightedChannelIntensities.assign(F.mult(weight));

-            sums[baseCurIndex].assign(weightedChannelIntensities, F.plus);
-            counts[baseCurIndex] += weight;
+                sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus);
+                counts[basePrevIndex][baseCurIndex] += weight;
+            }
        }

        readyToCall = false;
@ -85,30 +96,34 @@ public class BasecallingBaseModel {

    /**
     * Add a single training point to the model to estimate the covariances.
-     * 
+     *
+     * @param basePrev       the previous cycle's base call (A, C, G, T)
     * @param baseCur        the current cycle's base call (A, C, G, T)
     * @param qualCur        the quality score for the current cycle's base call
     * @param fourintensity  the four intensities for the current cycle's base call
     */
-    public void addCovariancePoint(char baseCur, byte qualCur, double[] fourintensity) {
-        int actualBaseCurIndex = baseToBaseIndex(baseCur);
+    public void addCovariancePoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
+        int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
+        int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
        double actualWeight = QualityUtils.qualToProb(qualCur);

-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-            // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
-            double weight = (baseCurIndex == actualBaseCurIndex) ? actualWeight : ((1.0 - actualWeight)/3.0);
+        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
+                double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));

-            DoubleMatrix1D mean = sums[baseCurIndex].copy();
-            mean.assign(F.div(counts[baseCurIndex]));
+                DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy();
+                mean.assign(F.div(counts[basePrevIndex][baseCurIndex]));

-            DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
-            sub.assign(mean, F.minus);
+                DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
+                sub.assign(mean, F.minus);

-            DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4);
-            alg.multOuter(sub, sub, cov);
+                DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4);
+                alg.multOuter(sub, sub, cov);

-            cov.assign(F.mult(weight));
-            unscaledCovarianceSums[baseCurIndex].assign(cov, F.plus);
+                cov.assign(F.mult(weight));
+                unscaledCovarianceSums[basePrevIndex][baseCurIndex].assign(cov, F.plus);
+            }
        }
        
        readyToCall = false;
@ -118,16 +133,18 @@ public class BasecallingBaseModel {
     * Precompute all the matrix inversions and determinants we'll need for computing the likelihood distributions.
     */
    public void prepareToCallBases() {
-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-            means[baseCurIndex] = sums[baseCurIndex].copy();
-            means[baseCurIndex].assign(F.div(counts[baseCurIndex]));
+        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                means[basePrevIndex][baseCurIndex] = sums[basePrevIndex][baseCurIndex].copy();
+                means[basePrevIndex][baseCurIndex].assign(F.div(counts[basePrevIndex][baseCurIndex]));

-            inverseCovariances[baseCurIndex] = unscaledCovarianceSums[baseCurIndex].copy();
-            inverseCovariances[baseCurIndex].assign(F.div(counts[baseCurIndex]));
-            DoubleMatrix2D invcov = alg.inverse(inverseCovariances[baseCurIndex]);
-            inverseCovariances[baseCurIndex] = invcov;
+                inverseCovariances[basePrevIndex][baseCurIndex] = unscaledCovarianceSums[basePrevIndex][baseCurIndex].copy();
+                inverseCovariances[basePrevIndex][baseCurIndex].assign(F.div(counts[basePrevIndex][baseCurIndex]));
+                DoubleMatrix2D invcov = alg.inverse(inverseCovariances[basePrevIndex][baseCurIndex]);
+                inverseCovariances[basePrevIndex][baseCurIndex] = invcov;

-            norms[baseCurIndex] = Math.pow(alg.det(invcov), 0.5)/Math.pow(2.0*Math.PI, 2.0);
+                norms[basePrevIndex][baseCurIndex] = Math.pow(alg.det(invcov), 0.5)/Math.pow(2.0*Math.PI, 2.0);
+            }
        }

        readyToCall = true;
@ -141,22 +158,24 @@ public class BasecallingBaseModel {
     * @return              a 4x4 matrix of likelihoods, where the row is the previous cycle base hypothesis and
     *                      the column is the current cycle base hypothesis
     */
-    public double[] computeLikelihoods(int cycle, double[] fourintensity) {
+    public double[][] computeLikelihoods(int cycle, double[] fourintensity) {
        if (!readyToCall) {
            prepareToCallBases();
        }

-        double[] likedist = new double[4];
-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-            double norm = norms[baseCurIndex];
+        double[][] likedist = new double[numTheories][4];
+        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                double norm = norms[basePrevIndex][baseCurIndex];

-            DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
-            sub.assign(means[baseCurIndex], F.minus);
+                DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
+                sub.assign(means[basePrevIndex][baseCurIndex], F.minus);

-            DoubleMatrix1D Ax = alg.mult(inverseCovariances[baseCurIndex], sub);
-            double exparg = -0.5*alg.mult(sub, Ax);
+                DoubleMatrix1D Ax = alg.mult(inverseCovariances[basePrevIndex][baseCurIndex], sub);
+                double exparg = -0.5*alg.mult(sub, Ax);

-            likedist[baseCurIndex] = norm*Math.exp(exparg);
+                likedist[basePrevIndex][baseCurIndex] = norm*Math.exp(exparg);
+            }
        }

        return likedist;
@ -166,66 +185,33 @@ public class BasecallingBaseModel {
        try {
            PrintWriter writer = new PrintWriter(outparam);

-            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-                writer.print("mean_" + baseIndexToBase(baseCurIndex) + " = c(");
-                for (int channel = 0; channel < 4; channel++) {
-                    writer.print(sums[baseCurIndex].getQuick(channel)/counts[baseCurIndex]);
+            for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
+                for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                    writer.print("mean_" + BaseUtils.baseIndexToSimpleBase(baseCurIndex) + " = c(");
+                    for (int channel = 0; channel < 4; channel++) {
+                        writer.print(sums[basePrevIndex][baseCurIndex].getQuick(channel)/counts[basePrevIndex][baseCurIndex]);

-                    if (channel < 3) {
-                        writer.print(", ");
+                        if (channel < 3) {
+                            writer.print(", ");
+                        }
                    }
-                }
-                writer.println(");");
+                    writer.println(");");

-                DoubleMatrix2D cov = unscaledCovarianceSums[baseCurIndex].copy();
-                cov.assign(F.div(counts[baseCurIndex]));
+                    DoubleMatrix2D cov = unscaledCovarianceSums[basePrevIndex][baseCurIndex].copy();
+                    cov.assign(F.div(counts[basePrevIndex][baseCurIndex]));

-                writer.print("cov_" + baseIndexToBase(baseCurIndex) + " = matrix(c(");
-                for (int channel1 = 0; channel1 < 4; channel1++) {
-                    for (int channel2 = 0; channel2 < 4; channel2++) {
-                        writer.print(cov.get(channel2, channel1) + (channel1 == 3 && channel2 == 3 ? "" : ","));
+                    writer.print("cov_" + BaseUtils.baseIndexToSimpleBase(baseCurIndex) + " = matrix(c(");
+                    for (int channel1 = 0; channel1 < 4; channel1++) {
+                        for (int channel2 = 0; channel2 < 4; channel2++) {
+                            writer.print(cov.get(channel2, channel1) + (channel1 == 3 && channel2 == 3 ? "" : ","));
+                        }
                    }
+                    writer.println("), nr=4, nc=4);\n");
                }
-                writer.println("), nr=4, nc=4);\n");
            }

            writer.close();
        } catch (IOException e) {
        }
    }
-
-    /**
-     * Utility method for converting a base ([Aa*], [Cc], [Gg], [Tt]) to an index (0, 1, 2, 3);
-     * 
-     * @param base
-     * @return 0, 1, 2, 3, or -1 if the base can't be understood.
-     */
-    private int baseToBaseIndex(char base) {
-        switch (base) {
-            case 'A':
-            case 'a':
-            case '*': return 0;
-
-            case 'C':
-            case 'c': return 1;
-
-            case 'G':
-            case 'g': return 2;
-
-            case 'T':
-            case 't': return 3;
-        }
-
-        return -1;
-    }
-
-    private char baseIndexToBase(int baseIndex) {
-        switch (baseIndex) {
-            case 0: return 'A';
-            case 1: return 'C';
-            case 2: return 'G';
-            case 3: return 'T';
-            default: return '.';
-        }
-    }
 }
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
@ -1,5 +1,8 @@
 package org.broadinstitute.sting.playground.fourbasecaller;

+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
 import java.io.File;

 /**
@ -14,34 +17,47 @@ import java.io.File;
 */
 public class BasecallingReadModel {
    private BasecallingBaseModel[] basemodels = null;
+    private boolean correctForContext = false;

    /**
     * Constructor for BasecallingReadModel.
     *
     * @param readLength    the length of the read that this model will support
     */
-    public BasecallingReadModel(int readLength) {
+    public BasecallingReadModel(int readLength, boolean correctForContext) {
+        this.correctForContext = correctForContext;
+        
        basemodels = new BasecallingBaseModel[readLength];

-        for (int i = 0; i < readLength; i++) {
-            basemodels[i] = new BasecallingBaseModel();
+        for (int cycle = 0; cycle < readLength; cycle++) {
+            basemodels[cycle] = new BasecallingBaseModel(cycle == 0 ? false : correctForContext);
        }
    }

    /**
-     * Add a single training point to the model.
+     * Add a single training point to the model means.
     *
     * @param cycle         the cycle for which this point should be added
+     * @param basePrev      the previous base
     * @param baseCur       the current base
     * @param qualCur       the current base's quality
     * @param fourintensity the four intensities of the current base
     */
-    public void addMeanPoint(int cycle, char baseCur, byte qualCur, double[] fourintensity) {
-        basemodels[cycle].addMeanPoint(baseCur, qualCur, fourintensity);
+    public void addMeanPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
+        basemodels[cycle].addMeanPoint(basePrev, baseCur, qualCur, fourintensity);
    }

-    public void addCovariancePoint(int cycle, char baseCur, byte qualCur, double[] fourintensity) {
-        basemodels[cycle].addCovariancePoint(baseCur, qualCur, fourintensity);
+    /**
+     * Add a single training point to the model covariances.
+     *
+     * @param cycle         the cycle for which this point should be added
+     * @param basePrev      the previous base
+     * @param baseCur       the current base
+     * @param qualCur       the current base's quality
+     * @param fourintensity the four intensities of the current base
+     */
+    public void addCovariancePoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
+        basemodels[cycle].addCovariancePoint(basePrev, baseCur, qualCur, fourintensity);
    }

    /**
@ -51,7 +67,7 @@ public class BasecallingReadModel {
     * @param fourintensity the four intensities for the current cycle's base
     * @return 4x4 matrix of likelihoods
     */
-    public double[] computeLikelihoods(int cycle, double[] fourintensity) {
+    public double[][] computeLikelihoods(int cycle, double[] fourintensity) {
        return basemodels[cycle].computeLikelihoods(cycle, fourintensity);
    }

@ -63,15 +79,48 @@ public class BasecallingReadModel {
     * @return an instance of FourProb, which encodes a base hypothesis, its probability,
     *         and the ranking among the other hypotheses
     */
-    public FourProb computeProbabilities(int cycle, double[] fourintensity) {
-        double[] likes = computeLikelihoods(cycle, fourintensity);
+    public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
+        double[][] likes = computeLikelihoods(cycle, fourintensity);

        double total = 0;

-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { total += likes[baseCurIndex]; }
-        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { likes[baseCurIndex] /= total; }
+        for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                double prior = 1.0;
+                if (correctForContext) {
+                    double prob = QualityUtils.qualToProb(qualPrev);
+                    if (basePrevIndex == BaseUtils.simpleBaseToBaseIndex(basePrev)) {
+                        prior = prob;
+                    } else {
+                        prior = (1.0 - prob)/((double) (4*likes.length - 1));
+                    }
+                }
+                likes[basePrevIndex][baseCurIndex] = prior*likes[basePrevIndex][baseCurIndex];
+                total += likes[basePrevIndex][baseCurIndex];
+            }
+        }

-        return new FourProb(likes);
+        for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                likes[basePrevIndex][baseCurIndex] /= total;
+            }
+        }
+
+        FourProb fp = new FourProb(likes);
+
+        /*
+        System.out.println(fp.baseAtRank(0) + ":");
+        for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+                System.out.print(likes[basePrevIndex][baseCurIndex] + " ");
+            }
+            System.out.println();
+        }
+        System.out.println();
+        */
+
+        //return new FourProb(likes);
+        return fp;
    }

    /**
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
@ -30,6 +30,7 @@ public class FourBaseRecaller extends CommandLineProgram {
    public int CALLING_LIMIT = 1000000000;
    public Boolean RAW = false;
    public Boolean OLD = false;
+    public Boolean CONTEXT = false;

    public static void main(String[] argv) {
        Instance = new FourBaseRecaller();
@ -46,6 +47,7 @@ public class FourBaseRecaller extends CommandLineProgram {
        m_parser.addOptionalArg("clim",        "C", "Number of reads to basecall", "CALLING_LIMIT");
        m_parser.addOptionalFlag("raw",        "R", "Use raw intensities?", "RAW");
        m_parser.addOptionalFlag("old",        "1", "Old Bustard 1.1 mode?", "OLD");
+        m_parser.addOptionalFlag("context",    "X", "Correct for context?", "CONTEXT");
    }

    protected int execute() {
@ -72,7 +74,7 @@ public class FourBaseRecaller extends CommandLineProgram {
        fread = ffp.next();

        int cycle_offset = (END <= 1) ? 0 : bread.getIntensities().length/2;
-        BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length());
+        BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length(), CONTEXT);
        int queryid;

        // learn mean parameters
@ -90,9 +92,10 @@ public class FourBaseRecaller extends CommandLineProgram {
            for (int cycle = 0; cycle < bases.length(); cycle++) {
                char baseCur  = bases.charAt(cycle);
                byte qualCur  = quals[cycle];
+                //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
                double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];

-                model.addMeanPoint(cycle, baseCur, qualCur, fourintensity);
+                model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
            }

            queryid++;
@ -118,9 +121,10 @@ public class FourBaseRecaller extends CommandLineProgram {
            for (int cycle = 0; cycle < bases.length(); cycle++) {
                char baseCur  = bases.charAt(cycle);
                byte qualCur  = quals[cycle];
+                //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
                double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];

-                model.addCovariancePoint(cycle, baseCur, qualCur, fourintensity);
+                model.addCovariancePoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
            }

            queryid++;
@ -143,7 +147,7 @@ public class FourBaseRecaller extends CommandLineProgram {
        int[][] base_counts = new int[4][bread.getFirstReadSequence().length()];
        int bases_incorrect = 0, bases_total = 0;

-        if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob"); }
+        if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob kiran_base_prev"); }

        queryid = 0;
        do {
@ -161,14 +165,17 @@ public class FourBaseRecaller extends CommandLineProgram {
            for (int cycle = 0; cycle < bases.length(); cycle++) {
                double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];

-                FourProb fp = model.computeProbabilities(cycle, fourintensity);
+                char basePrev = (cycle == 0 ? '.' : (char) asciiseq[cycle - 1]);
+                byte qualPrev = (cycle == 0 ? 40 : bestqual[cycle - 1]);
+
+                FourProb fp = model.computeProbabilities(cycle, basePrev, qualPrev, fourintensity);

                asciiseq[cycle] = (byte) fp.baseAtRank(0);
                bestqual[cycle] = fp.qualAtRank(0);
                nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1));

                if (debugout != null && bases.charAt(cycle) != '.' && base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle] < 1000) {
-                    debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle]);
+                    debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle] + " " + basePrev);

                    base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle]++;
                }
@ -188,11 +195,27 @@ public class FourBaseRecaller extends CommandLineProgram {
        }
        sfw.close();

-        System.out.println("Nonmatch rate: " + ((double) bases_incorrect)/((double) bases_total));
+        System.out.println("Disagreement rate: " + ((double) bases_incorrect)/((double) bases_total));

        return 0;
    }

+    private double[] getBaseProbabilityDistribution(char base, byte qual) {
+        double[] dist = new double[4];
+
+        int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
+        dist[baseIndex] = QualityUtils.qualToProb(qual);
+        double residualprob = (1.0 - dist[baseIndex])/3.0;
+
+        for (int i = 0; i < 4; i++) {
+            if (i != baseIndex) {
+                dist[i] = residualprob;
+            }
+        }
+
+        return dist;
+    }
+
    private SAMRecord constructSAMRecord(String readNamePrefix, String bases, byte[] bestqual, byte[] nextbestqual, BustardReadData bread, SAMFileHeader sfh) {
        SAMRecord sr = new SAMRecord(sfh);
        
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java
@ -15,9 +15,16 @@ public class FourProb {
    /**
     * Constructor for FourProb.
     *
-     * @param baseProbs   the unsorted base hypothesis probabilities (in ACGT order).
+     * @param baseLikes  the unsorted base hypothesis probabilities (in ACGT order).
     */
-    public FourProb(double[] baseProbs) {
+    public FourProb(double[][] baseLikes) {
+        double[] baseProbs = new double[4];
+        for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
+            for (int basePrevIndex = 0; basePrevIndex < baseLikes.length; basePrevIndex++) {
+                baseProbs[baseCurIndex] += baseLikes[basePrevIndex][baseCurIndex];
+            }
+        }
+
        int[] baseIndices = {0, 1, 2, 3};
        
        Integer[] perm = Utils.SortPermutation(baseProbs);