101 lines
3.8 KiB
Java
101 lines
3.8 KiB
Java
package org.broadinstitute.sting.playground.fourbasecaller;
|
|
|
|
import java.io.File;
|
|
|
|
/**
|
|
* BasecallingReadModel represents the statistical models for
|
|
* all bases in all cycles. It allows for easy, one-pass
|
|
* training via the addTrainingPoint() method, and for the
|
|
* computation of the 4x4 likelihood matrix or the 1x4
|
|
* probability vector (with contextual components marginalized
|
|
* out of the likelihood matrix).
|
|
*
|
|
* @author Kiran Garimella
|
|
*/
|
|
public class BasecallingReadModel {
|
|
private BasecallingBaseModel[] basemodels = null;
|
|
|
|
/**
|
|
* Constructor for BasecallingReadModel.
|
|
*
|
|
* @param readLength the length of the read that this model will support
|
|
*/
|
|
public BasecallingReadModel(int readLength) {
|
|
basemodels = new BasecallingBaseModel[readLength];
|
|
|
|
for (int i = 0; i < readLength; i++) {
|
|
basemodels[i] = new BasecallingBaseModel();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add a single training point to the model.
|
|
*
|
|
* @param cycle the cycle for which this point should be added
|
|
* @param basePrev the previous base
|
|
* @param baseCur the current base
|
|
* @param qualCur the current base's quality
|
|
* @param fourintensity the four intensities of the current base
|
|
*/
|
|
public void addTrainingPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
|
basemodels[cycle].addTrainingPoint(basePrev, baseCur, qualCur, fourintensity);
|
|
}
|
|
|
|
/**
|
|
* Compute the likelihood matrix for a given cycle.
|
|
*
|
|
* @param cycle the cycle number for the current base
|
|
* @param basePrev the previous cycle's base
|
|
* @param qualPrev the quality score for the previous cycle's base
|
|
* @param fourintensity the four intensities for the current cycle's base
|
|
* @return 4x4 matrix of likelihoods
|
|
*/
|
|
public double[][] computeLikelihoods(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
|
return basemodels[cycle].computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);
|
|
}
|
|
|
|
/**
|
|
* Compute the probability distribution for the base at a given cycle.
|
|
* Contextual components of the likelihood matrix are marginalized out.
|
|
*
|
|
* @param cycle the cycle number for the current base
|
|
* @param basePrev the previous cycle's base
|
|
* @param qualPrev the quality score for the previous cycle's base
|
|
* @param fourintensity the four intensities for the current cycle's base
|
|
* @return an instance of FourProb, which encodes a base hypothesis, its probability,
|
|
* and the ranking among the other hypotheses
|
|
*/
|
|
public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) {
|
|
double[][] likes = computeLikelihoods(cycle, basePrev, qualPrev, fourintensity);
|
|
|
|
double[] probs = new double[4];
|
|
int[] baseindices = { 0, 1, 2, 3 };
|
|
double total = 0;
|
|
|
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
|
for (int basePrevIndex = 0; basePrevIndex < 4; basePrevIndex++) {
|
|
probs[baseCurIndex] += likes[basePrevIndex][baseCurIndex];
|
|
}
|
|
total += probs[baseCurIndex];
|
|
}
|
|
|
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
|
probs[baseCurIndex] /= total;
|
|
}
|
|
|
|
return new FourProb(baseindices, probs);
|
|
}
|
|
|
|
/**
|
|
* Writes model parameters to a file per cycle.
|
|
*
|
|
* @param dir the directory where the parameters should be written
|
|
*/
|
|
public void write(File dir) {
|
|
for (int cycle = 0; cycle < basemodels.length; cycle++) {
|
|
File outparam = new File(dir.getPath() + "/param." + cycle);
|
|
basemodels[cycle].write(outparam);
|
|
}
|
|
}
|
|
}
|