148 lines
5.7 KiB
Java
148 lines
5.7 KiB
Java
|
|
package org.broadinstitute.sting.secondarybase;
|
||
|
|
|
||
|
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||
|
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||
|
|
import org.broadinstitute.sting.secondarybase.BasecallingBaseModel;
|
||
|
|
import org.broadinstitute.sting.secondarybase.FourProb;
|
||
|
|
|
||
|
|
import java.io.File;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* BasecallingReadModel represents the statistical models for
|
||
|
|
* all bases in all cycles. It allows for easy training via
|
||
|
|
* the addTrainingPoint() method, and for the computation of
|
||
|
|
* the 4x4 likelihood matrix or the 1x4 probability vector
|
||
|
|
* (with contextual components marginalized out of the
|
||
|
|
* likelihood matrix).
|
||
|
|
*
|
||
|
|
* @author Kiran Garimella
|
||
|
|
*/
|
||
|
|
public class BasecallingReadModel {
|
||
|
|
private BasecallingBaseModel[] basemodels = null;
|
||
|
|
private boolean correctForContext = false;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Constructor for BasecallingReadModel.
|
||
|
|
*
|
||
|
|
* @param readLength the length of the read that this model will support
|
||
|
|
*/
|
||
|
|
public BasecallingReadModel(int readLength, boolean correctForContext) {
|
||
|
|
this.correctForContext = correctForContext;
|
||
|
|
|
||
|
|
basemodels = new BasecallingBaseModel[readLength];
|
||
|
|
|
||
|
|
for (int cycle = 0; cycle < readLength; cycle++) {
|
||
|
|
basemodels[cycle] = new BasecallingBaseModel(cycle == 0 ? false : correctForContext);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Add a single training point to the model means.
|
||
|
|
*
|
||
|
|
* @param cycle the cycle for which this point should be added
|
||
|
|
* @param fourintensity the four intensities of the current base
|
||
|
|
*/
|
||
|
|
public void addMeanPoint(int cycle, double[][] probMatrix, double[] fourintensity) {
|
||
|
|
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Add a single training point to the model covariances.
|
||
|
|
*
|
||
|
|
* @param cycle the cycle for which this point should be added
|
||
|
|
* @param fourintensity the four intensities of the current base
|
||
|
|
*/
|
||
|
|
public void addCovariancePoint(int cycle, double[][] probMatrix, double[] fourintensity) {
|
||
|
|
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Compute the likelihood matrix for a given cycle.
|
||
|
|
*
|
||
|
|
* @param cycle the cycle number for the current base
|
||
|
|
* @param fourintensity the four intensities for the current cycle's base
|
||
|
|
* @return 4x4 matrix of likelihoods
|
||
|
|
*/
|
||
|
|
public double[][] computeLikelihoods(int cycle, double[] fourintensity) {
|
||
|
|
return basemodels[cycle].computeLikelihoods(cycle, fourintensity);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Compute the probability distribution for the base at a given cycle.
|
||
|
|
*
|
||
|
|
* @param cycle the cycle number for the current base
|
||
|
|
* @param fourintensity the four intensities for the current cycle's base
|
||
|
|
* @return an instance of FourProb, which encodes a base hypothesis, its probability,
|
||
|
|
* and the ranking among the other hypotheses
|
||
|
|
*/
|
||
|
|
public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
||
|
|
double[][] likes = computeLikelihoods(cycle, fourintensity);
|
||
|
|
|
||
|
|
double total = 0;
|
||
|
|
|
||
|
|
for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
|
||
|
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
||
|
|
double prior = 1.0;
|
||
|
|
if (correctForContext) {
|
||
|
|
double prob = QualityUtils.qualToProb(qualPrev);
|
||
|
|
if (basePrevIndex == BaseUtils.simpleBaseToBaseIndex(basePrev)) {
|
||
|
|
prior = prob;
|
||
|
|
} else {
|
||
|
|
prior = (1.0 - prob)/((double) (4*likes.length - 1));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
likes[basePrevIndex][baseCurIndex] = prior*likes[basePrevIndex][baseCurIndex];
|
||
|
|
total += likes[basePrevIndex][baseCurIndex];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
|
||
|
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
||
|
|
likes[basePrevIndex][baseCurIndex] /= total;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
FourProb fp = new FourProb(likes);
|
||
|
|
|
||
|
|
return fp;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Returns the base probability matrix
|
||
|
|
*
|
||
|
|
* @param cycle the cycle of the base
|
||
|
|
* @param basePrev the previous base
|
||
|
|
* @param baseCur the current base
|
||
|
|
* @param probCur the probability of the current base
|
||
|
|
* @return the base probability matrix (1x4 if no contextual correction, 4x4 otherwise)
|
||
|
|
*/
|
||
|
|
public double[][] getBaseProbabilityMatrix(int cycle, char basePrev, char baseCur, double probCur) {
|
||
|
|
double[][] dist = new double[(correctForContext && cycle > 0) ? 4 : 1][4];
|
||
|
|
|
||
|
|
int actualBasePrevIndex = (correctForContext && cycle > 0) ? BaseUtils.simpleBaseToBaseIndex(basePrev) : 0;
|
||
|
|
int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
|
||
|
|
|
||
|
|
double residualTheories = (double) (dist.length*dist[0].length - 1);
|
||
|
|
|
||
|
|
for (int basePrevIndex = 0; basePrevIndex < dist.length; basePrevIndex++) {
|
||
|
|
for (int baseCurIndex = 0; baseCurIndex < dist[basePrevIndex].length; baseCurIndex++) {
|
||
|
|
dist[basePrevIndex][baseCurIndex] = (basePrevIndex == actualBasePrevIndex && baseCurIndex == actualBaseCurIndex) ? probCur : ((1.0 - probCur)/residualTheories);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return dist;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Writes model parameters to a file per cycle.
|
||
|
|
*
|
||
|
|
* @param dir the directory where the parameters should be written
|
||
|
|
*/
|
||
|
|
public void write(File dir) {
|
||
|
|
for (int cycle = 0; cycle < basemodels.length; cycle++) {
|
||
|
|
File outparam = new File(dir.getPath() + "/param." + cycle + ".r");
|
||
|
|
basemodels[cycle].write(outparam);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|