JavaDocs!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@290 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
42eb356782
commit
ef06924f73
|
|
@ -8,6 +8,16 @@ import cern.colt.matrix.linalg.Algebra;
|
|||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/**
|
||||
* BasecallingBaseModel is a class that represents the statistical
|
||||
* model for all bases at a given cycle. It allows for easy, one
|
||||
* pass training via the addTrainingPoint() method. Once the model
|
||||
* is trained, computeLikelihoods will return the probability matrix
|
||||
* over previous cycle's base hypotheses and current cycle base
|
||||
* hypotheses (contextual prior is included in these likelihoods).
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class BasecallingBaseModel {
|
||||
private double[][] counts;
|
||||
|
||||
|
|
@ -19,8 +29,11 @@ public class BasecallingBaseModel {
|
|||
private DoubleMatrix2D[][] inverseCovariances;
|
||||
private double[][] norms;
|
||||
|
||||
Algebra alg;
|
||||
private Algebra alg;
|
||||
|
||||
/**
|
||||
* Constructor for BasecallingBaseModel
|
||||
*/
|
||||
public BasecallingBaseModel() {
|
||||
counts = new double[4][4];
|
||||
|
||||
|
|
@ -44,6 +57,13 @@ public class BasecallingBaseModel {
|
|||
alg = new Algebra();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a single training point to the model.
|
||||
* @param basePrev the previous cycle's base call (A, C, G, T, or * for the first cycle)
|
||||
* @param baseCur the current cycle's base call (A, C, G, T)
|
||||
* @param qualCur the quality score for the current cycle's base call
|
||||
* @param fourintensity the four intensities for the current cycle's base call
|
||||
*/
|
||||
public void addTrainingPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
||||
int basePrevIndex = baseToBaseIndex(basePrev);
|
||||
int baseCurIndex = baseToBaseIndex(baseCur);
|
||||
|
|
@ -67,6 +87,9 @@ public class BasecallingBaseModel {
|
|||
readyToCall = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Precompute all the matrix inversions and determinants we'll need for computing the likelihood distributions.
|
||||
*/
|
||||
public void prepareToCallBases() {
|
||||
for (int basePrevIndex = 0; basePrevIndex < 4; basePrevIndex++) {
|
||||
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
||||
|
|
@ -88,6 +111,16 @@ public class BasecallingBaseModel {
|
|||
readyToCall = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the likelihood matrix for a base (contextual priors included).
|
||||
*
|
||||
* @param cycle the cycle we're calling right now
|
||||
* @param basePrev the previous cycle's base
|
||||
* @param qualPrev the previous cycle's quality score
|
||||
* @param fourintensity the four intensities of the current cycle's base
|
||||
* @return a 4x4 matrix of likelihoods, where the row is the previous cycle base hypothesis and
|
||||
* the column is the current cycle base hypothesis
|
||||
*/
|
||||
public double[][] computeLikelihoods(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
||||
if (!readyToCall) {
|
||||
prepareToCallBases();
|
||||
|
|
@ -115,6 +148,11 @@ public class BasecallingBaseModel {
|
|||
return probdist;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method for converting a base ([Aa*], [Cc], [Gg], [Tt]) to an index (0, 1, 2, 3);
|
||||
* @param base
|
||||
* @return 0, 1, 2, 3, or -1 if the base can't be understood.
|
||||
*/
|
||||
private int baseToBaseIndex(char base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
|
|
|
|||
|
|
@ -1,10 +1,23 @@
|
|||
package org.broadinstitute.sting.playground.fourbasecaller;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* BasecallingReadModel represents the statistical models for
|
||||
* all bases in all cycles. It allows for easy, one-pass
|
||||
* training via the addTrainingPoint() method, and for the
|
||||
* computation of the 4x4 likelihood matrix or the 1x4
|
||||
* probability vector (with contextual components marginalized
|
||||
* out of the likelihood matrix).
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class BasecallingReadModel {
|
||||
private BasecallingBaseModel[] basemodels = null;
|
||||
|
||||
/**
|
||||
* Constructor for BasecallingReadModel.
|
||||
*
|
||||
* @param readLength the length of the read that this model will support
|
||||
*/
|
||||
public BasecallingReadModel(int readLength) {
|
||||
basemodels = new BasecallingBaseModel[readLength];
|
||||
|
||||
|
|
@ -13,14 +26,42 @@ public class BasecallingReadModel {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a single training point to the model.
|
||||
* @param cycle the cycle for which this point should be added
|
||||
* @param basePrev the previous base
|
||||
* @param baseCur the current base
|
||||
* @param qualCur the current base's quality
|
||||
* @param fourintensity the four intensities of the current base
|
||||
*/
|
||||
public void addTrainingPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
||||
basemodels[cycle].addTrainingPoint(basePrev, baseCur, qualCur, fourintensity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the likelihood matrix for a given cycle.
|
||||
*
|
||||
* @param cycle the cycle number for the current base
|
||||
* @param basePrev the previous cycle's base
|
||||
* @param qualPrev the quality score for the previous cycle's base
|
||||
* @param fourintensity the four intensities for the current cycle's base
|
||||
* @return 4x4 matrix of likelihoods
|
||||
*/
|
||||
public double[][] computeLikelihoods(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
||||
return basemodels[cycle].computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the probability distribution for the base at a given cycle.
|
||||
* Contextual components of the likelihood matrix are marginalized out.
|
||||
*
|
||||
* @param cycle the cycle number for the current base
|
||||
* @param basePrev the previous cycle's base
|
||||
* @param qualPrev the quality score for the previous cycle's base
|
||||
* @param fourintensity the four intensities for the current cycle's base
|
||||
* @return an instance of FourProb, which encodes a base hypothesis, its probability,
|
||||
* and the ranking among the other hypotheses
|
||||
*/
|
||||
public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
||||
double[][] likes = computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);
|
||||
|
||||
|
|
|
|||
|
|
@ -85,8 +85,6 @@ public class FourBaseRecaller extends CommandLineProgram {
|
|||
byte[] nextbestqual = new byte[bases.length()];
|
||||
|
||||
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
||||
//char basePrev = (cycle == 0) ? '*' : bases.charAt(cycle - 1);
|
||||
//byte qualPrev = (cycle == 0) ? 0 : quals[cycle - 1];
|
||||
char basePrev = (cycle == 0) ? '*' : (char) asciiseq[cycle - 1];
|
||||
byte qualPrev = (cycle == 0) ? 0 : bestqual[cycle - 1];
|
||||
double[] fourintensity = intensities[cycle + cycle_offset];
|
||||
|
|
|
|||
|
|
@ -3,10 +3,21 @@ package org.broadinstitute.sting.playground.fourbasecaller;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/**
|
||||
* FourProb represents four base hypotheses, their probabilities, and their ranking among one another.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class FourProb {
|
||||
private double[] baseProbs;
|
||||
private int[] baseIndices;
|
||||
|
||||
/**
|
||||
* Constructor for FourProb.
|
||||
*
|
||||
* @param baseIndices the unsorted base indices (A:0, C:1, G:2, T:3). Now that I think about it, this is stupid.
|
||||
* @param baseProbs the unsorted base hypothesis probabilities.
|
||||
*/
|
||||
public FourProb(int[] baseIndices, double[] baseProbs) {
|
||||
Integer[] perm = Utils.SortPermutation(baseProbs);
|
||||
double[] ascendingBaseProbs = Utils.PermuteArray(baseProbs, perm);
|
||||
|
|
@ -21,11 +32,40 @@ public class FourProb {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the base at the specified rank.
|
||||
*
|
||||
* @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
|
||||
* @return the index (0, 1, 2, 3).
|
||||
*/
|
||||
public int indexAtRank(int rank) { return baseIndices[rank]; }
|
||||
|
||||
/**
|
||||
* Returns the base label of the base at the specified rank.
|
||||
* @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
|
||||
* @return the base label (A, C, G, T).
|
||||
*/
|
||||
public char baseAtRank(int rank) { return baseIndexToBase(indexAtRank(rank)); }
|
||||
|
||||
/**
|
||||
* Returns the probability of the base at the specified rank.
|
||||
* @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
|
||||
* @return the probability of the base (0.0-1.0)
|
||||
*/
|
||||
public double probAtRank(int rank) { return baseProbs[rank]; }
|
||||
|
||||
/**
|
||||
* Returns the quality score of the base at the specified rank.
|
||||
* @param rank (0 = best, 3 = worst) the rank of the base whose index should be returned
|
||||
* @return the quality score of the base (0-40)
|
||||
*/
|
||||
public byte qualAtRank(int rank) { return QualityUtils.probToQual(probAtRank(rank)); }
|
||||
|
||||
/**
|
||||
* A utility method to convert a base index into a base label.
|
||||
* @param baseIndex the index of the base (0, 1, 2, 3).
|
||||
* @return A, C, G, T, or '.' if the base index can't be understood.
|
||||
*/
|
||||
private char baseIndexToBase(int baseIndex) {
|
||||
switch (baseIndex) {
|
||||
case 0: return 'A';
|
||||
|
|
@ -36,6 +76,11 @@ public class FourProb {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prettily formats the FourProb info.
|
||||
*
|
||||
* @return a prettily formatted Sting containing the base and quality score in rank order.
|
||||
*/
|
||||
public String toString() {
|
||||
return (
|
||||
"[" + baseAtRank(0) + ":" + qualAtRank(0) + " "
|
||||
|
|
|
|||
|
|
@ -1,16 +1,47 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
/**
|
||||
* QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating
|
||||
* quality scores.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class QualityUtils {
|
||||
/**
|
||||
* Private constructor. No instantiating this class!
|
||||
*/
|
||||
private QualityUtils() {}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a quality score to a probability. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
*
|
||||
* @param qual a quality score (0-40)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public double qualToProb(byte qual) {
|
||||
return 1.0 - Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||
*
|
||||
* @param prob a probability (0.0-1.0)
|
||||
* @return a quality score (0-40)
|
||||
*/
|
||||
static public byte probToQual(double prob) {
|
||||
return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001));
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress a base and a probability into a single byte so that it can be output in a SAMRecord's SQ field.
|
||||
* Note: the highest probability this function can encode is 64%, so this function should only never be used on the best base hypothesis.
|
||||
* Another note: the probability encoded here gets rounded to the nearest 1%.
|
||||
*
|
||||
* @param baseIndex the base index
|
||||
* @param prob the base probability
|
||||
* @return a byte containing the index and the probability
|
||||
*/
|
||||
static public byte baseAndProbToCompressedQuality(int baseIndex, double prob) {
|
||||
byte compressedQual = (byte) baseIndex;
|
||||
byte cprob = (byte) (100.0*prob);
|
||||
|
|
|
|||
Loading…
Reference in New Issue