Minor tweaks to how probs are supplied.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@676 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
095dacd154
commit
f1de3d6366
|
|
@ -69,21 +69,13 @@ public class BasecallingBaseModel {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a single training point to the model to estimate the means.
|
* Add a single training point to the model to estimate the means.
|
||||||
*
|
|
||||||
* @param basePrev the previous cycle's base call (A, C, G, T)
|
|
||||||
* @param baseCur the current cycle's base call (A, C, G, T)
|
|
||||||
*/
|
*/
|
||||||
public void addMeanPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
public void addMeanPoint(double[][] probMatrix, double[] fourIntensity) {
|
||||||
int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
|
|
||||||
int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
|
|
||||||
double actualWeight = QualityUtils.qualToProb(qualCur);
|
|
||||||
|
|
||||||
for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
|
for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
|
||||||
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
||||||
// We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
|
double weight = probMatrix[basePrevIndex][baseCurIndex];
|
||||||
double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));
|
|
||||||
|
|
||||||
DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity);
|
DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourIntensity);
|
||||||
weightedChannelIntensities.assign(F.mult(weight));
|
weightedChannelIntensities.assign(F.mult(weight));
|
||||||
|
|
||||||
sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus);
|
sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus);
|
||||||
|
|
@ -96,26 +88,16 @@ public class BasecallingBaseModel {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a single training point to the model to estimate the covariances.
|
* Add a single training point to the model to estimate the covariances.
|
||||||
*
|
|
||||||
* @param basePrev the previous cycle's base call (A, C, G, T)
|
|
||||||
* @param baseCur the current cycle's base call (A, C, G, T)
|
|
||||||
* @param qualCur the quality score for the current cycle's base call
|
|
||||||
* @param fourintensity the four intensities for the current cycle's base call
|
|
||||||
*/
|
*/
|
||||||
public void addCovariancePoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
public void addCovariancePoint(double[][] probMatrix, double[] fourIntensity) {
|
||||||
int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev);
|
|
||||||
int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
|
|
||||||
double actualWeight = QualityUtils.qualToProb(qualCur);
|
|
||||||
|
|
||||||
for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
|
for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) {
|
||||||
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
||||||
// We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses.
|
double weight = probMatrix[basePrevIndex][baseCurIndex];
|
||||||
double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1)));
|
|
||||||
|
|
||||||
DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy();
|
DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy();
|
||||||
mean.assign(F.div(counts[basePrevIndex][baseCurIndex]));
|
mean.assign(F.div(counts[basePrevIndex][baseCurIndex]));
|
||||||
|
|
||||||
DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity);
|
DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourIntensity);
|
||||||
sub.assign(mean, F.minus);
|
sub.assign(mean, F.minus);
|
||||||
|
|
||||||
DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4);
|
DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4);
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,11 @@ import java.io.File;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BasecallingReadModel represents the statistical models for
|
* BasecallingReadModel represents the statistical models for
|
||||||
* all bases in all cycles. It allows for easy, one-pass
|
* all bases in all cycles. It allows for easy training via
|
||||||
* training via the addTrainingPoint() method, and for the
|
* the addTrainingPoint() method, and for the computation of
|
||||||
* computation of the 4x4 likelihood matrix or the 1x4
|
* the 4x4 likelihood matrix or the 1x4 probability vector
|
||||||
* probability vector (with contextual components marginalized
|
* (with contextual components marginalized out of the
|
||||||
* out of the likelihood matrix).
|
* likelihood matrix).
|
||||||
*
|
*
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
|
|
@ -38,26 +38,20 @@ public class BasecallingReadModel {
|
||||||
* Add a single training point to the model means.
|
* Add a single training point to the model means.
|
||||||
*
|
*
|
||||||
* @param cycle the cycle for which this point should be added
|
* @param cycle the cycle for which this point should be added
|
||||||
* @param basePrev the previous base
|
|
||||||
* @param baseCur the current base
|
|
||||||
* @param qualCur the current base's quality
|
|
||||||
* @param fourintensity the four intensities of the current base
|
* @param fourintensity the four intensities of the current base
|
||||||
*/
|
*/
|
||||||
public void addMeanPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
public void addMeanPoint(int cycle, double[][] probMatrix, double[] fourintensity) {
|
||||||
basemodels[cycle].addMeanPoint(basePrev, baseCur, qualCur, fourintensity);
|
basemodels[cycle].addMeanPoint(probMatrix, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a single training point to the model covariances.
|
* Add a single training point to the model covariances.
|
||||||
*
|
*
|
||||||
* @param cycle the cycle for which this point should be added
|
* @param cycle the cycle for which this point should be added
|
||||||
* @param basePrev the previous base
|
|
||||||
* @param baseCur the current base
|
|
||||||
* @param qualCur the current base's quality
|
|
||||||
* @param fourintensity the four intensities of the current base
|
* @param fourintensity the four intensities of the current base
|
||||||
*/
|
*/
|
||||||
public void addCovariancePoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) {
|
public void addCovariancePoint(int cycle, double[][] probMatrix, double[] fourintensity) {
|
||||||
basemodels[cycle].addCovariancePoint(basePrev, baseCur, qualCur, fourintensity);
|
basemodels[cycle].addCovariancePoint(probMatrix, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -108,21 +102,35 @@ public class BasecallingReadModel {
|
||||||
|
|
||||||
FourProb fp = new FourProb(likes);
|
FourProb fp = new FourProb(likes);
|
||||||
|
|
||||||
/*
|
|
||||||
System.out.println(fp.baseAtRank(0) + ":");
|
|
||||||
for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) {
|
|
||||||
for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) {
|
|
||||||
System.out.print(likes[basePrevIndex][baseCurIndex] + " ");
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
*/
|
|
||||||
|
|
||||||
//return new FourProb(likes);
|
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the base probability matrix
|
||||||
|
*
|
||||||
|
* @param cycle the cycle of the base
|
||||||
|
* @param basePrev the previous base
|
||||||
|
* @param baseCur the current base
|
||||||
|
* @param probCur the probability of the current base
|
||||||
|
* @return the base probability matrix (1x4 if no contextual correction, 4x4 otherwise)
|
||||||
|
*/
|
||||||
|
public double[][] getBaseProbabilityMatrix(int cycle, char basePrev, char baseCur, double probCur) {
|
||||||
|
double[][] dist = new double[(correctForContext && cycle > 0) ? 4 : 1][4];
|
||||||
|
|
||||||
|
int actualBasePrevIndex = (correctForContext && cycle > 0) ? BaseUtils.simpleBaseToBaseIndex(basePrev) : 0;
|
||||||
|
int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur);
|
||||||
|
|
||||||
|
double residualTheories = (double) (dist.length*dist[0].length - 1);
|
||||||
|
|
||||||
|
for (int basePrevIndex = 0; basePrevIndex < dist.length; basePrevIndex++) {
|
||||||
|
for (int baseCurIndex = 0; baseCurIndex < dist[basePrevIndex].length; baseCurIndex++) {
|
||||||
|
dist[basePrevIndex][baseCurIndex] = (basePrevIndex == actualBasePrevIndex && baseCurIndex == actualBaseCurIndex) ? probCur : ((1.0 - probCur)/residualTheories);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dist;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes model parameters to a file per cycle.
|
* Writes model parameters to a file per cycle.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,12 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
//double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
|
//double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
|
||||||
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
||||||
|
|
||||||
model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
|
//model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
|
||||||
|
model.addMeanPoint(cycle,
|
||||||
|
model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
|
||||||
|
baseCur,
|
||||||
|
QualityUtils.qualToProb(qualCur)),
|
||||||
|
fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryid++;
|
queryid++;
|
||||||
|
|
@ -119,10 +124,13 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
||||||
char baseCur = bases.charAt(cycle);
|
char baseCur = bases.charAt(cycle);
|
||||||
byte qualCur = quals[cycle];
|
byte qualCur = quals[cycle];
|
||||||
//double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]);
|
|
||||||
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
||||||
|
|
||||||
model.addCovariancePoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity);
|
model.addCovariancePoint(cycle,
|
||||||
|
model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1),
|
||||||
|
baseCur,
|
||||||
|
QualityUtils.qualToProb(qualCur)),
|
||||||
|
fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryid++;
|
queryid++;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue