JavaDocs!

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@290 348d0f76-0448-11de-a6fe-93d51630548a
2009-04-03 19:19:17 +00:00 · 2009-04-03 19:19:17 +00:00 · ef06924f73
parent 42eb356782
commit ef06924f73
5 changed files with 159 additions and 6 deletions
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java
@ -8,6 +8,16 @@ import cern.colt.matrix.linalg.Algebra;

 import org.broadinstitute.sting.utils.QualityUtils;

+/**
+ * BasecallingBaseModel is a class that represents the statistical
+ * model for all bases at a given cycle.  It allows for easy, one
+ * pass training via the addTrainingPoint() method.  Once the model
+ * is trained, computeLikelihoods will return the probability matrix
+ * over previous cycle's base hypotheses and current cycle base
+ * hypotheses (contextual prior is included in these likelihoods).
+ *
+ * @author Kiran Garimella
+ */
 public class BasecallingBaseModel {
    private double[][] counts;

@ -19,8 +29,11 @@ public class BasecallingBaseModel {
    private DoubleMatrix2D[][] inverseCovariances;
    private double[][] norms;

-    Algebra alg;
+    private Algebra alg;

+    /**
+     * Constructor for BasecallingBaseModel
+     */
    public BasecallingBaseModel() {
        counts = new double[4][4];

@ -44,6 +57,13 @@ public class BasecallingBaseModel {
        alg = new Algebra();
    }

+    /**
+     * Add a single training point to the model.
+     * @param basePrev       the previous cycle's base call (A, C, G, T, or * for the first cycle)
+     * @param baseCur        the current cycle's base call (A, C, G, T)
+     * @param qualCur        the quality score for the current cycle's base call
+     * @param fourintensity  the four intensities for the current cycle's base call
+     */
    public void addTrainingPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
        int basePrevIndex = baseToBaseIndex(basePrev);
        int baseCurIndex = baseToBaseIndex(baseCur);
@ -67,6 +87,9 @@ public class BasecallingBaseModel {
        readyToCall = false;
    }

+    /**
+     * Precompute all the matrix inversions and determinants we'll need for computing the likelihood distributions.
+     */
    public void prepareToCallBases() {
        for (int basePrevIndex = 0; basePrevIndex < 4; basePrevIndex++) {
            for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
@ -88,6 +111,16 @@ public class BasecallingBaseModel {
        readyToCall = true;
    }

+    /**
+     * Compute the likelihood matrix for a base (contextual priors included).
+     *
+     * @param cycle         the cycle we're calling right now
+     * @param basePrev      the previous cycle's base
+     * @param qualPrev      the previous cycle's quality score
+     * @param fourintensity the four intensities of the current cycle's base
+     * @return              a 4x4 matrix of likelihoods, where the row is the previous cycle base hypothesis and
+     *                      the column is the current cycle base hypothesis
+     */
    public double[][] computeLikelihoods(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
        if (!readyToCall) {
            prepareToCallBases();
@ -115,6 +148,11 @@ public class BasecallingBaseModel {
        return probdist;
    }

+    /**
+     * Utility method for converting a base ([Aa*], [Cc], [Gg], [Tt]) to an index (0, 1, 2, 3);
+     * @param base
+     * @return 0, 1, 2, 3, or -1 if the base can't be understood.
+     */
    private int baseToBaseIndex(char base) {
        switch (base) {
            case 'A':
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java
@ -1,10 +1,23 @@
 package org.broadinstitute.sting.playground.fourbasecaller;

-import org.broadinstitute.sting.utils.Utils;
-
+/**
+ * BasecallingReadModel represents the statistical models for
+ * all bases in all cycles.  It allows for easy, one-pass
+ * training via the addTrainingPoint() method, and for the
+ * computation of the 4x4 likelihood matrix or the 1x4
+ * probability vector (with contextual components marginalized
+ * out of the likelihood matrix).
+ *
+ * @author Kiran Garimella
+ */
 public class BasecallingReadModel {
    private BasecallingBaseModel[] basemodels = null;

+    /**
+     * Constructor for BasecallingReadModel.
+     *
+     * @param readLength    the length of the read that this model will support
+     */
    public BasecallingReadModel(int readLength) {
        basemodels = new BasecallingBaseModel[readLength];

@ -13,14 +26,42 @@ public class BasecallingReadModel {
        }
    }

+    /**
+     * Add a single training point to the model.
+     * @param cycle         the cycle for which this point should be added
+     * @param basePrev      the previous base
+     * @param baseCur       the current base
+     * @param qualCur       the current base's quality
+     * @param fourintensity the four intensities of the current base
+     */
    public void addTrainingPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
        basemodels[cycle].addTrainingPoint(basePrev, baseCur, qualCur, fourintensity);
    }

+    /**
+     * Compute the likelihood matrix for a given cycle.
+     *
+     * @param cycle         the cycle number for the current base
+     * @param basePrev      the previous cycle's base
+     * @param qualPrev      the quality score for the previous cycle's base
+     * @param fourintensity the four intensities for the current cycle's base
+     * @return 4x4 matrix of likelihoods
+     */
    public double[][] computeLikelihoods(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
        return basemodels[cycle].computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);
    }

+    /**
+     * Compute the probability distribution for the base at a given cycle.
+     * Contextual components of the likelihood matrix are marginalized out.
+     *
+     * @param cycle         the cycle number for the current base
+     * @param basePrev      the previous cycle's base
+     * @param qualPrev      the quality score for the previous cycle's base
+     * @param fourintensity the four intensities for the current cycle's base
+     * @return an instance of FourProb, which encodes a base hypothesis, its probability,
+     *         and the ranking among the other hypotheses
+     */
    public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
        double[][] likes = computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);

--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java
@ -85,8 +85,6 @@ public class FourBaseRecaller extends CommandLineProgram {
            byte[] nextbestqual = new byte[bases.length()];

            for (int cycle = 0; cycle < bases.length(); cycle++) {
-                //char basePrev = (cycle == 0) ? '*' : bases.charAt(cycle - 1);
-                //byte qualPrev = (cycle == 0) ? 0 : quals[cycle - 1];
                char basePrev = (cycle == 0) ? '*' : (char) asciiseq[cycle - 1];
                byte qualPrev = (cycle == 0) ? 0 : bestqual[cycle - 1];
                double[] fourintensity = intensities[cycle + cycle_offset];
--- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java
+++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java
@ -3,10 +3,21 @@ package org.broadinstitute.sting.playground.fourbasecaller;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.QualityUtils;

+/**
+ * FourProb represents four base hypotheses, their probabilities, and their ranking among one another.
+ *
+ * @author Kiran Garimella
+ */
 public class FourProb {
    private double[] baseProbs;
    private int[] baseIndices;

+    /**
+     * Constructor for FourProb.
+     *
+     * @param baseIndices the unsorted base indices (A:0, C:1, G:2, T:3).  Now that I think about it, this is stupid.
+     * @param baseProbs   the unsorted base hypothesis probabilities.
+     */
    public FourProb(int[] baseIndices, double[] baseProbs) {
        Integer[] perm = Utils.SortPermutation(baseProbs);
        double[] ascendingBaseProbs = Utils.PermuteArray(baseProbs, perm);
@ -21,11 +32,40 @@ public class FourProb {
        }
    }

+    /**
+     * Returns the index of the base at the specified rank.
+     *
+     * @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
+     * @return the index (0, 1, 2, 3).
+     */
    public int indexAtRank(int rank) { return baseIndices[rank]; }
+
+    /**
+     * Returns the base label of the base at the specified rank.
+     * @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
+     * @return the base label (A, C, G, T).
+     */
    public char baseAtRank(int rank) { return baseIndexToBase(indexAtRank(rank)); }
+
+    /**
+     * Returns the probability of the base at the specified rank.
+     * @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
+     * @return the probability of the base (0.0-1.0)
+     */
    public double probAtRank(int rank) { return baseProbs[rank]; }
+
+    /**
+     * Returns the quality score of the base at the specified rank.
+     * @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
+     * @return the quality score of the base (0-40)
+     */
    public byte qualAtRank(int rank) { return QualityUtils.probToQual(probAtRank(rank)); }

+    /**
+     * A utility method to convert a base index into a base label.
+     * @param baseIndex the index of the base (0, 1, 2, 3).
+     * @return A, C, G, T, or '.' if the base index can't be understood.
+     */
    private char baseIndexToBase(int baseIndex) {
        switch (baseIndex) {
            case 0: return 'A';
@ -36,6 +76,11 @@ public class FourProb {
        }
    }

+    /**
+     * Prettily formats the FourProb info.
+     * 
+     * @return a prettily formatted Sting containing the base and quality score in rank order.
+     */
    public String toString() {
        return (
                "[" + baseAtRank(0) + ":" + qualAtRank(0) + " "
--- a/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@ -1,16 +1,47 @@
 package org.broadinstitute.sting.utils;

+/**
+ * QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating
+ * quality scores.
+ *
+ * @author Kiran Garimella
+ */
 public class QualityUtils {
+    /**
+     * Private constructor.  No instantiating this class!
+     */
    private QualityUtils() {}
-    
+
+    /**
+     * Convert a quality score to a probability.  This is the Phred-style
+     * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
+     *
+     * @param qual a quality score (0-40)
+     * @return a probability (0.0-1.0)
+     */
    static public double qualToProb(byte qual) {
        return 1.0 - Math.pow(10.0, ((double) qual)/-10.0);
    }

+    /**
+     * Convert a probability to a quality score.  Note, this is capped at Q40.
+     *
+     * @param prob a probability (0.0-1.0)
+     * @return a quality score (0-40)
+     */
    static public byte probToQual(double prob) {
        return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001));
    }

+    /**
+     * Compress a base and a probability into a single byte so that it can be output in a SAMRecord's SQ field.
+     * Note: the highest probability this function can encode is 64%, so this function should only never be used on the best base hypothesis.
+     * Another note: the probability encoded here gets rounded to the nearest 1%.
+     *
+     * @param baseIndex the base index
+     * @param prob      the base probability
+     * @return a byte containing the index and the probability
+     */
    static public byte baseAndProbToCompressedQuality(int baseIndex, double prob) {
        byte compressedQual = (byte) baseIndex;
        byte cprob = (byte) (100.0*prob);