Trains and calls a read at a time rather than a base at a time (which, given it's name, it should have done in the first place)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@705 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e4770885fd
commit
5824dea0c1
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.secondarybase.BasecallingBaseModel;
|
|||
import org.broadinstitute.sting.secondarybase.FourProb;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* BasecallingReadModel represents the statistical models for
|
||||
|
|
@ -36,6 +37,20 @@ public class BasecallingReadModel {
|
|||
}
|
||||
}
|
||||
|
||||
public void train(BasecallingTrainingSet trainingSet) {
|
||||
ArrayList<RawRead> trainingData = trainingSet.getTrainingData();
|
||||
|
||||
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
|
||||
RawRead read = trainingData.get(readIndex);
|
||||
addMeanPoints(read);
|
||||
}
|
||||
|
||||
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
|
||||
RawRead read = trainingData.get(readIndex);
|
||||
addCovariancePoints(read);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a single training point to the model means.
|
||||
*
|
||||
|
|
@ -46,6 +61,27 @@ public class BasecallingReadModel {
|
|||
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
|
||||
}
|
||||
|
||||
public void addMeanPoints(RawRead read) {
|
||||
byte[] seqs = read.getSequence();
|
||||
byte[] quals = read.getQuals();
|
||||
short[][] ints = read.getIntensities();
|
||||
|
||||
for (int cycle = 0; cycle < seqs.length; cycle++) {
|
||||
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
|
||||
char baseCur = (char) seqs[cycle];
|
||||
double probCur = QualityUtils.qualToProb(quals[cycle]);
|
||||
|
||||
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
|
||||
|
||||
double[] fourIntensity = new double[4];
|
||||
for (int channel = 0; channel < 4; channel++) {
|
||||
fourIntensity[channel] = (double) ints[cycle][channel];
|
||||
}
|
||||
|
||||
basemodels[cycle].addMeanPoint(probMatrix, fourIntensity);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a single training point to the model covariances.
|
||||
*
|
||||
|
|
@ -56,6 +92,27 @@ public class BasecallingReadModel {
|
|||
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
|
||||
}
|
||||
|
||||
public void addCovariancePoints(RawRead read) {
|
||||
byte[] seqs = read.getSequence();
|
||||
byte[] quals = read.getQuals();
|
||||
short[][] ints = read.getIntensities();
|
||||
|
||||
for (int cycle = 0; cycle < seqs.length; cycle++) {
|
||||
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
|
||||
char baseCur = (char) seqs[cycle];
|
||||
double probCur = QualityUtils.qualToProb(quals[cycle]);
|
||||
|
||||
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
|
||||
|
||||
double[] fourIntensity = new double[4];
|
||||
for (int channel = 0; channel < 4; channel++) {
|
||||
fourIntensity[channel] = (double) ints[cycle][channel];
|
||||
}
|
||||
|
||||
basemodels[cycle].addCovariancePoint(probMatrix, fourIntensity);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the likelihood matrix for a given cycle.
|
||||
*
|
||||
|
|
@ -107,6 +164,26 @@ public class BasecallingReadModel {
|
|||
return fp;
|
||||
}
|
||||
|
||||
public FourProbRead call(RawRead read) {
|
||||
FourProbRead fpr = new FourProbRead(read.getReadLength());
|
||||
|
||||
for (int cycle = 0; cycle < read.getReadLength(); cycle++) {
|
||||
char basePrev = (char) ((cycle == 0) ? '.' : read.getSequence()[cycle - 1]);
|
||||
byte qualPrev = ((cycle == 0) ? 0 : read.getQuals()[cycle - 1]);
|
||||
|
||||
double[] fourIntensity = new double[4];
|
||||
for (int channel = 0; channel < 4; channel++) {
|
||||
fourIntensity[channel] = (double) read.getIntensities()[cycle][channel];
|
||||
}
|
||||
|
||||
//fps[cycle] = computeProbabilities(cycle, basePrev, qualPrev, fourIntensity);
|
||||
fpr.add(cycle, computeProbabilities(cycle, basePrev, qualPrev, fourIntensity));
|
||||
|
||||
}
|
||||
|
||||
return fpr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the base probability matrix
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue