Trains and calls a read at a time rather than a base at a time (which, given it's name, it should have done in the first place)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@705 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-05-14 16:57:00 +00:00
parent e4770885fd
commit 5824dea0c1
1 changed files with 77 additions and 0 deletions

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.secondarybase.BasecallingBaseModel;
import org.broadinstitute.sting.secondarybase.FourProb;
import java.io.File;
import java.util.ArrayList;
/**
* BasecallingReadModel represents the statistical models for
@ -36,6 +37,20 @@ public class BasecallingReadModel {
}
}
public void train(BasecallingTrainingSet trainingSet) {
ArrayList<RawRead> trainingData = trainingSet.getTrainingData();
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
RawRead read = trainingData.get(readIndex);
addMeanPoints(read);
}
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
RawRead read = trainingData.get(readIndex);
addCovariancePoints(read);
}
}
/**
* Add a single training point to the model means.
*
@ -46,6 +61,27 @@ public class BasecallingReadModel {
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
}
public void addMeanPoints(RawRead read) {
byte[] seqs = read.getSequence();
byte[] quals = read.getQuals();
short[][] ints = read.getIntensities();
for (int cycle = 0; cycle < seqs.length; cycle++) {
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
char baseCur = (char) seqs[cycle];
double probCur = QualityUtils.qualToProb(quals[cycle]);
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
double[] fourIntensity = new double[4];
for (int channel = 0; channel < 4; channel++) {
fourIntensity[channel] = (double) ints[cycle][channel];
}
basemodels[cycle].addMeanPoint(probMatrix, fourIntensity);
}
}
/**
* Add a single training point to the model covariances.
*
@ -56,6 +92,27 @@ public class BasecallingReadModel {
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
}
public void addCovariancePoints(RawRead read) {
byte[] seqs = read.getSequence();
byte[] quals = read.getQuals();
short[][] ints = read.getIntensities();
for (int cycle = 0; cycle < seqs.length; cycle++) {
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
char baseCur = (char) seqs[cycle];
double probCur = QualityUtils.qualToProb(quals[cycle]);
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
double[] fourIntensity = new double[4];
for (int channel = 0; channel < 4; channel++) {
fourIntensity[channel] = (double) ints[cycle][channel];
}
basemodels[cycle].addCovariancePoint(probMatrix, fourIntensity);
}
}
/**
* Compute the likelihood matrix for a given cycle.
*
@ -107,6 +164,26 @@ public class BasecallingReadModel {
return fp;
}
public FourProbRead call(RawRead read) {
FourProbRead fpr = new FourProbRead(read.getReadLength());
for (int cycle = 0; cycle < read.getReadLength(); cycle++) {
char basePrev = (char) ((cycle == 0) ? '.' : read.getSequence()[cycle - 1]);
byte qualPrev = ((cycle == 0) ? 0 : read.getQuals()[cycle - 1]);
double[] fourIntensity = new double[4];
for (int channel = 0; channel < 4; channel++) {
fourIntensity[channel] = (double) read.getIntensities()[cycle][channel];
}
//fps[cycle] = computeProbabilities(cycle, basePrev, qualPrev, fourIntensity);
fpr.add(cycle, computeProbabilities(cycle, basePrev, qualPrev, fourIntensity));
}
return fpr;
}
/**
* Returns the base probability matrix
*