Trains and calls a read at a time rather than a base at a time (which, given it's name, it should have done in the first place)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@705 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e4770885fd
commit
5824dea0c1
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.secondarybase.BasecallingBaseModel;
|
||||||
import org.broadinstitute.sting.secondarybase.FourProb;
|
import org.broadinstitute.sting.secondarybase.FourProb;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BasecallingReadModel represents the statistical models for
|
* BasecallingReadModel represents the statistical models for
|
||||||
|
|
@ -36,6 +37,20 @@ public class BasecallingReadModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void train(BasecallingTrainingSet trainingSet) {
|
||||||
|
ArrayList<RawRead> trainingData = trainingSet.getTrainingData();
|
||||||
|
|
||||||
|
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
|
||||||
|
RawRead read = trainingData.get(readIndex);
|
||||||
|
addMeanPoints(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) {
|
||||||
|
RawRead read = trainingData.get(readIndex);
|
||||||
|
addCovariancePoints(read);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a single training point to the model means.
|
* Add a single training point to the model means.
|
||||||
*
|
*
|
||||||
|
|
@ -46,6 +61,27 @@ public class BasecallingReadModel {
|
||||||
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
|
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addMeanPoints(RawRead read) {
|
||||||
|
byte[] seqs = read.getSequence();
|
||||||
|
byte[] quals = read.getQuals();
|
||||||
|
short[][] ints = read.getIntensities();
|
||||||
|
|
||||||
|
for (int cycle = 0; cycle < seqs.length; cycle++) {
|
||||||
|
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
|
||||||
|
char baseCur = (char) seqs[cycle];
|
||||||
|
double probCur = QualityUtils.qualToProb(quals[cycle]);
|
||||||
|
|
||||||
|
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
|
||||||
|
|
||||||
|
double[] fourIntensity = new double[4];
|
||||||
|
for (int channel = 0; channel < 4; channel++) {
|
||||||
|
fourIntensity[channel] = (double) ints[cycle][channel];
|
||||||
|
}
|
||||||
|
|
||||||
|
basemodels[cycle].addMeanPoint(probMatrix, fourIntensity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a single training point to the model covariances.
|
* Add a single training point to the model covariances.
|
||||||
*
|
*
|
||||||
|
|
@ -56,6 +92,27 @@ public class BasecallingReadModel {
|
||||||
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
|
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addCovariancePoints(RawRead read) {
|
||||||
|
byte[] seqs = read.getSequence();
|
||||||
|
byte[] quals = read.getQuals();
|
||||||
|
short[][] ints = read.getIntensities();
|
||||||
|
|
||||||
|
for (int cycle = 0; cycle < seqs.length; cycle++) {
|
||||||
|
char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]);
|
||||||
|
char baseCur = (char) seqs[cycle];
|
||||||
|
double probCur = QualityUtils.qualToProb(quals[cycle]);
|
||||||
|
|
||||||
|
double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur);
|
||||||
|
|
||||||
|
double[] fourIntensity = new double[4];
|
||||||
|
for (int channel = 0; channel < 4; channel++) {
|
||||||
|
fourIntensity[channel] = (double) ints[cycle][channel];
|
||||||
|
}
|
||||||
|
|
||||||
|
basemodels[cycle].addCovariancePoint(probMatrix, fourIntensity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the likelihood matrix for a given cycle.
|
* Compute the likelihood matrix for a given cycle.
|
||||||
*
|
*
|
||||||
|
|
@ -107,6 +164,26 @@ public class BasecallingReadModel {
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FourProbRead call(RawRead read) {
|
||||||
|
FourProbRead fpr = new FourProbRead(read.getReadLength());
|
||||||
|
|
||||||
|
for (int cycle = 0; cycle < read.getReadLength(); cycle++) {
|
||||||
|
char basePrev = (char) ((cycle == 0) ? '.' : read.getSequence()[cycle - 1]);
|
||||||
|
byte qualPrev = ((cycle == 0) ? 0 : read.getQuals()[cycle - 1]);
|
||||||
|
|
||||||
|
double[] fourIntensity = new double[4];
|
||||||
|
for (int channel = 0; channel < 4; channel++) {
|
||||||
|
fourIntensity[channel] = (double) read.getIntensities()[cycle][channel];
|
||||||
|
}
|
||||||
|
|
||||||
|
//fps[cycle] = computeProbabilities(cycle, basePrev, qualPrev, fourIntensity);
|
||||||
|
fpr.add(cycle, computeProbabilities(cycle, basePrev, qualPrev, fourIntensity));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return fpr;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the base probability matrix
|
* Returns the base probability matrix
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue