From f1de3d6366237d277f88c479b4485c2301603b9f Mon Sep 17 00:00:00 2001 From: kiran Date: Tue, 12 May 2009 19:47:41 +0000 Subject: [PATCH] Minor tweaks to how probs are supplied. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@676 348d0f76-0448-11de-a6fe-93d51630548a --- .../fourbasecaller/BasecallingBaseModel.java | 30 ++------- .../fourbasecaller/BasecallingReadModel.java | 62 +++++++++++-------- .../fourbasecaller/FourBaseRecaller.java | 14 ++++- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java index 3b5a63b9f..721899b0e 100755 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java @@ -69,21 +69,13 @@ public class BasecallingBaseModel { /** * Add a single training point to the model to estimate the means. - * - * @param basePrev the previous cycle's base call (A, C, G, T) - * @param baseCur the current cycle's base call (A, C, G, T) */ - public void addMeanPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) { - int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev); - int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur); - double actualWeight = QualityUtils.qualToProb(qualCur); - + public void addMeanPoint(double[][] probMatrix, double[] fourIntensity) { for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. - double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1))); + double weight = probMatrix[basePrevIndex][baseCurIndex]; - DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity); + DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourIntensity); weightedChannelIntensities.assign(F.mult(weight)); sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus); @@ -96,26 +88,16 @@ public class BasecallingBaseModel { /** * Add a single training point to the model to estimate the covariances. - * - * @param basePrev the previous cycle's base call (A, C, G, T) - * @param baseCur the current cycle's base call (A, C, G, T) - * @param qualCur the quality score for the current cycle's base call - * @param fourintensity the four intensities for the current cycle's base call */ - public void addCovariancePoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) { - int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev); - int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur); - double actualWeight = QualityUtils.qualToProb(qualCur); - + public void addCovariancePoint(double[][] probMatrix, double[] fourIntensity) { for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. - double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1))); + double weight = probMatrix[basePrevIndex][baseCurIndex]; DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy(); mean.assign(F.div(counts[basePrevIndex][baseCurIndex])); - DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity); + DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourIntensity); sub.assign(mean, F.minus); DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4); diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java index b37f187d0..ff5334c79 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java @@ -7,11 +7,11 @@ import java.io.File; /** * BasecallingReadModel represents the statistical models for - * all bases in all cycles. It allows for easy, one-pass - * training via the addTrainingPoint() method, and for the - * computation of the 4x4 likelihood matrix or the 1x4 - * probability vector (with contextual components marginalized - * out of the likelihood matrix). + * all bases in all cycles. It allows for easy training via + * the addTrainingPoint() method, and for the computation of + * the 4x4 likelihood matrix or the 1x4 probability vector + * (with contextual components marginalized out of the + * likelihood matrix). * * @author Kiran Garimella */ @@ -38,26 +38,20 @@ public class BasecallingReadModel { * Add a single training point to the model means. * * @param cycle the cycle for which this point should be added - * @param basePrev the previous base - * @param baseCur the current base - * @param qualCur the current base's quality * @param fourintensity the four intensities of the current base */ - public void addMeanPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) { - basemodels[cycle].addMeanPoint(basePrev, baseCur, qualCur, fourintensity); + public void addMeanPoint(int cycle, double[][] probMatrix, double[] fourintensity) { + basemodels[cycle].addMeanPoint(probMatrix, fourintensity); } /** * Add a single training point to the model covariances. * * @param cycle the cycle for which this point should be added - * @param basePrev the previous base - * @param baseCur the current base - * @param qualCur the current base's quality * @param fourintensity the four intensities of the current base */ - public void addCovariancePoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) { - basemodels[cycle].addCovariancePoint(basePrev, baseCur, qualCur, fourintensity); + public void addCovariancePoint(int cycle, double[][] probMatrix, double[] fourintensity) { + basemodels[cycle].addCovariancePoint(probMatrix, fourintensity); } /** @@ -108,21 +102,35 @@ public class BasecallingReadModel { FourProb fp = new FourProb(likes); - /* - System.out.println(fp.baseAtRank(0) + ":"); - for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) { - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - System.out.print(likes[basePrevIndex][baseCurIndex] + " "); - } - System.out.println(); - } - System.out.println(); - */ - - //return new FourProb(likes); return fp; } + /** + * Returns the base probability matrix + * + * @param cycle the cycle of the base + * @param basePrev the previous base + * @param baseCur the current base + * @param probCur the probability of the current base + * @return the base probability matrix (1x4 if no contextual correction, 4x4 otherwise) + */ + public double[][] getBaseProbabilityMatrix(int cycle, char basePrev, char baseCur, double probCur) { + double[][] dist = new double[(correctForContext && cycle > 0) ? 4 : 1][4]; + + int actualBasePrevIndex = (correctForContext && cycle > 0) ? BaseUtils.simpleBaseToBaseIndex(basePrev) : 0; + int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur); + + double residualTheories = (double) (dist.length*dist[0].length - 1); + + for (int basePrevIndex = 0; basePrevIndex < dist.length; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < dist[basePrevIndex].length; baseCurIndex++) { + dist[basePrevIndex][baseCurIndex] = (basePrevIndex == actualBasePrevIndex && baseCurIndex == actualBaseCurIndex) ? probCur : ((1.0 - probCur)/residualTheories); + } + } + + return dist; + } + /** * Writes model parameters to a file per cycle. * diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java index 0aeeea8b4..8d679e7e8 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java @@ -93,7 +93,12 @@ public class FourBaseRecaller extends CommandLineProgram { //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]); double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset]; - model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity); + //model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity); + model.addMeanPoint(cycle, + model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), + baseCur, + QualityUtils.qualToProb(qualCur)), + fourintensity); } queryid++; @@ -119,10 +124,13 @@ public class FourBaseRecaller extends CommandLineProgram { for (int cycle = 0; cycle < bases.length(); cycle++) { char baseCur = bases.charAt(cycle); byte qualCur = quals[cycle]; - //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]); double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset]; - model.addCovariancePoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity); + model.addCovariancePoint(cycle, + model.getBaseProbabilityMatrix(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), + baseCur, + QualityUtils.qualToProb(qualCur)), + fourintensity); } queryid++;