Minor tweaks to how probs are supplied.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@676 348d0f76-0448-11de-a6fe-93d51630548a
2009-05-12 19:47:41 +00:00 · 2009-05-12 19:47:41 +00:00 · f1de3d6366
parent 095dacd154
commit f1de3d6366
3 changed files with 52 additions and 54 deletions
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
@ -69,21 +69,13 @@ public class BasecallingBaseModel {

    /**
     * Add a single training point to the model to estimate the means.
-     *
-     * @param basePrev       the previous cycle's base call (A, C, G, T)
-     * @param baseCur        the current cycle's base call (A, C, G, T)
     */
-    public void addMeanPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
-        int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
-        int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
-        double actualWeight = QualityUtils.qualToProb(qualCur);
-
+    public void addMeanPoint(double[][] probMatrix, double[] fourIntensity) {
        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-                // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
-                double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));
+                double weight = probMatrix[basePrevIndex][baseCurIndex];                

-                DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity);
+                DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourIntensity);
                weightedChannelIntensities.assign(F.mult(weight));

                sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus);
@ -96,26 +88,16 @@ public class BasecallingBaseModel {

    /**
     * Add a single training point to the model to estimate the covariances.
-     *
-     * @param basePrev       the previous cycle's base call (A, C, G, T)
-     * @param baseCur        the current cycle's base call (A, C, G, T)
-     * @param qualCur        the quality score for the current cycle's base call
-     * @param fourintensity  the four intensities for the current cycle's base call
     */
-    public void addCovariancePoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
-        int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
-        int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
-        double actualWeight = QualityUtils.qualToProb(qualCur);
-
+    public void addCovariancePoint(double[][] probMatrix, double[] fourIntensity) {
        for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-                // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
-                double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));
+                double weight = probMatrix[basePrevIndex][baseCurIndex];

                DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy();
                mean.assign(F.div(counts[basePrevIndex][baseCurIndex]));

-                DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
+                DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourIntensity);
                sub.assign(mean, F.minus);

                DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4);
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
@ -7,11 +7,11 @@ import java.io.File;

 /**
 * BasecallingReadModel represents the statistical models for
- * all bases in all cycles.  It allows for easy, one-pass
- * training via the addTrainingPoint() method, and for the
- * computation of the 4x4 likelihood matrix or the 1x4
- * probability vector (with contextual components marginalized
- * out of the likelihood matrix).
+ * all bases in all cycles.  It allows for easy training via
+ * the addTrainingPoint() method, and for the computation of
+ * the 4x4 likelihood matrix or the 1x4 probability vector
+ * (with contextual components marginalized out of the
+ * likelihood matrix).
 *
 * @author Kiran Garimella
 */
@ -38,26 +38,20 @@ public class BasecallingReadModel {
     * Add a single training point to the model means.
     *
     * @param cycle         the cycle for which this point should be added
-     * @param basePrev      the previous base
-     * @param baseCur       the current base
-     * @param qualCur       the current base's quality
     * @param fourintensity the four intensities of the current base
     */
-    public void addMeanPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
-        basemodels[cycle].addMeanPoint(basePrev, baseCur, qualCur, fourintensity);
+    public void addMeanPoint(int cycle, double[][] probMatrix, double[] fourintensity) {
+        basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
    }

    /**
     * Add a single training point to the model covariances.
     *
     * @param cycle         the cycle for which this point should be added
-     * @param basePrev      the previous base
-     * @param baseCur       the current base
-     * @param qualCur       the current base's quality
     * @param fourintensity the four intensities of the current base
     */
-    public void addCovariancePoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
-        basemodels[cycle].addCovariancePoint(basePrev, baseCur, qualCur, fourintensity);
+    public void addCovariancePoint(int cycle, double[][] probMatrix, double[] fourintensity) {
+        basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
    }

    /**
@ -108,21 +102,35 @@ public class BasecallingReadModel {

        FourProb fp = new FourProb(likes);

-        /*
-        System.out.println(fp.baseAtRank(0) + ":");
-        for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
-            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
-                System.out.print(likes[basePrevIndex][baseCurIndex] + " ");
-            }
-            System.out.println();
-        }
-        System.out.println();
-        */
-
-        //return new FourProb(likes);
        return fp;
    }

+    /**
+     * Returns the base probability matrix
+     *
+     * @param cycle        the cycle of the base
+     * @param basePrev     the previous base
+     * @param baseCur      the current base
+     * @param probCur      the probability of the current base
+     * @return the base probability matrix (1x4 if no contextual correction, 4x4 otherwise)
+     */
+    public double[][] getBaseProbabilityMatrix(int cycle, char basePrev, char baseCur, double probCur) {
+        double[][] dist = new double[(correctForContext && cycle > 0) ? 4 : 1][4];
+
+        int actualBasePrevIndex = (correctForContext && cycle > 0) ? BaseUtils.simpleBaseToBaseIndex(basePrev) : 0;
+        int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
+
+        double residualTheories = (double) (dist.length*dist[0].length - 1);
+
+        for (int basePrevIndex = 0; basePrevIndex < dist.length; basePrevIndex++) {
+            for (int baseCurIndex = 0; baseCurIndex < dist[basePrevIndex].length; baseCurIndex++) {
+                dist[basePrevIndex][baseCurIndex] = (basePrevIndex == actualBasePrevIndex && baseCurIndex == actualBaseCurIndex) ? probCur : ((1.0 - probCur)/residualTheories);
+            }
+        }
+
+        return dist;
+    }
+
    /**
     * Writes model parameters to a file per cycle.
     *
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
@ -93,7 +93,12 @@ public class FourBaseRecaller extends CommandLineProgram {
                //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
                double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];

-                model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
+                //model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
+                model.addMeanPoint(cycle,
+                                   model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
+                                                                  baseCur,
+                                                                  QualityUtils.qualToProb(qualCur)),
+                                   fourintensity);
            }

            queryid++;
@ -119,10 +124,13 @@ public class FourBaseRecaller extends CommandLineProgram {
            for (int cycle = 0; cycle < bases.length(); cycle++) {
                char baseCur  = bases.charAt(cycle);
                byte qualCur  = quals[cycle];
-                //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
                double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];

-                model.addCovariancePoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
+                model.addCovariancePoint(cycle,
+                                         model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
+                                                                        baseCur,
+                                                                        QualityUtils.qualToProb(qualCur)),
+                                         fourintensity);
            }

            queryid++;