diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java index 69b2d3a5d..3b5a63b9f 100755 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingBaseModel.java @@ -7,6 +7,7 @@ import cern.colt.matrix.DoubleFactory2D; import cern.colt.matrix.linalg.Algebra; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.io.*; @@ -21,38 +22,46 @@ import java.io.*; * @author Kiran Garimella */ public class BasecallingBaseModel { - private double[] counts; - private DoubleMatrix1D[] sums; - private DoubleMatrix2D[] unscaledCovarianceSums; + private double[][] counts; + private DoubleMatrix1D[][] sums; + private DoubleMatrix2D[][] unscaledCovarianceSums; - private DoubleMatrix1D[] means; - private DoubleMatrix2D[] inverseCovariances; - private double[] norms; + private DoubleMatrix1D[][] means; + private DoubleMatrix2D[][] inverseCovariances; + private double[][] norms; private cern.jet.math.Functions F = cern.jet.math.Functions.functions; private Algebra alg; + private boolean correctForContext = false; + private int numTheories = 1; + private boolean readyToCall = false; /** * Constructor for BasecallingBaseModel */ - public BasecallingBaseModel() { - counts = new double[4]; + public BasecallingBaseModel(boolean correctForContext) { + this.correctForContext = correctForContext; + this.numTheories = (correctForContext) ? 4 : 1; + + counts = new double[this.numTheories][4]; - sums = new DoubleMatrix1D[4]; - unscaledCovarianceSums = new DoubleMatrix2D[4]; + sums = new DoubleMatrix1D[this.numTheories][4]; + unscaledCovarianceSums = new DoubleMatrix2D[this.numTheories][4]; - means = new DoubleMatrix1D[4]; - inverseCovariances = new DoubleMatrix2D[4]; - norms = new double[4]; + means = new DoubleMatrix1D[this.numTheories][4]; + inverseCovariances = new DoubleMatrix2D[this.numTheories][4]; + norms = new double[this.numTheories][4]; - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - sums[baseCurIndex] = (DoubleFactory1D.dense).make(4); - unscaledCovarianceSums[baseCurIndex] = (DoubleFactory2D.dense).make(4, 4); + for (int basePrevIndex = 0; basePrevIndex < this.numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + sums[basePrevIndex][baseCurIndex] = (DoubleFactory1D.dense).make(4); + unscaledCovarianceSums[basePrevIndex][baseCurIndex] = (DoubleFactory2D.dense).make(4, 4); - means[baseCurIndex] = (DoubleFactory1D.dense).make(4); - inverseCovariances[baseCurIndex] = (DoubleFactory2D.dense).make(4, 4); + means[basePrevIndex][baseCurIndex] = (DoubleFactory1D.dense).make(4); + inverseCovariances[basePrevIndex][baseCurIndex] = (DoubleFactory2D.dense).make(4, 4); + } } alg = new Algebra(); @@ -61,23 +70,25 @@ public class BasecallingBaseModel { /** * Add a single training point to the model to estimate the means. * + * @param basePrev the previous cycle's base call (A, C, G, T) * @param baseCur the current cycle's base call (A, C, G, T) - * @param qualCur the quality score for the current cycle's base call - * @param fourintensity the four intensities for the current cycle's base call */ - public void addMeanPoint(char baseCur, byte qualCur, double[] fourintensity) { - int actualBaseCurIndex = baseToBaseIndex(baseCur); + public void addMeanPoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) { + int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev); + int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur); double actualWeight = QualityUtils.qualToProb(qualCur); - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. - double weight = (baseCurIndex == actualBaseCurIndex) ? actualWeight : ((1.0 - actualWeight)/3.0); + for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. + double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1))); - DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity); - weightedChannelIntensities.assign(F.mult(weight)); + DoubleMatrix1D weightedChannelIntensities = (DoubleFactory1D.dense).make(fourintensity); + weightedChannelIntensities.assign(F.mult(weight)); - sums[baseCurIndex].assign(weightedChannelIntensities, F.plus); - counts[baseCurIndex] += weight; + sums[basePrevIndex][baseCurIndex].assign(weightedChannelIntensities, F.plus); + counts[basePrevIndex][baseCurIndex] += weight; + } } readyToCall = false; @@ -85,30 +96,34 @@ public class BasecallingBaseModel { /** * Add a single training point to the model to estimate the covariances. - * + * + * @param basePrev the previous cycle's base call (A, C, G, T) * @param baseCur the current cycle's base call (A, C, G, T) * @param qualCur the quality score for the current cycle's base call * @param fourintensity the four intensities for the current cycle's base call */ - public void addCovariancePoint(char baseCur, byte qualCur, double[] fourintensity) { - int actualBaseCurIndex = baseToBaseIndex(baseCur); + public void addCovariancePoint(char basePrev, char baseCur, byte qualCur, double[] fourintensity) { + int actualBasePrevIndex = BaseUtils.simpleBaseToBaseIndex(basePrev); + int actualBaseCurIndex = BaseUtils.simpleBaseToBaseIndex(baseCur); double actualWeight = QualityUtils.qualToProb(qualCur); - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. - double weight = (baseCurIndex == actualBaseCurIndex) ? actualWeight : ((1.0 - actualWeight)/3.0); + for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + // We want to upweight the correct theory as much as we can and spread the remainder out evenly between all other hypotheses. + double weight = (baseCurIndex == actualBaseCurIndex && (!correctForContext || basePrevIndex == actualBasePrevIndex)) ? actualWeight : ((1.0 - actualWeight)/((double) (4*numTheories - 1))); - DoubleMatrix1D mean = sums[baseCurIndex].copy(); - mean.assign(F.div(counts[baseCurIndex])); + DoubleMatrix1D mean = sums[basePrevIndex][baseCurIndex].copy(); + mean.assign(F.div(counts[basePrevIndex][baseCurIndex])); - DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity); - sub.assign(mean, F.minus); + DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity); + sub.assign(mean, F.minus); - DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4); - alg.multOuter(sub, sub, cov); + DoubleMatrix2D cov = (DoubleFactory2D.dense).make(4, 4); + alg.multOuter(sub, sub, cov); - cov.assign(F.mult(weight)); - unscaledCovarianceSums[baseCurIndex].assign(cov, F.plus); + cov.assign(F.mult(weight)); + unscaledCovarianceSums[basePrevIndex][baseCurIndex].assign(cov, F.plus); + } } readyToCall = false; @@ -118,16 +133,18 @@ public class BasecallingBaseModel { * Precompute all the matrix inversions and determinants we'll need for computing the likelihood distributions. */ public void prepareToCallBases() { - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - means[baseCurIndex] = sums[baseCurIndex].copy(); - means[baseCurIndex].assign(F.div(counts[baseCurIndex])); + for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + means[basePrevIndex][baseCurIndex] = sums[basePrevIndex][baseCurIndex].copy(); + means[basePrevIndex][baseCurIndex].assign(F.div(counts[basePrevIndex][baseCurIndex])); - inverseCovariances[baseCurIndex] = unscaledCovarianceSums[baseCurIndex].copy(); - inverseCovariances[baseCurIndex].assign(F.div(counts[baseCurIndex])); - DoubleMatrix2D invcov = alg.inverse(inverseCovariances[baseCurIndex]); - inverseCovariances[baseCurIndex] = invcov; + inverseCovariances[basePrevIndex][baseCurIndex] = unscaledCovarianceSums[basePrevIndex][baseCurIndex].copy(); + inverseCovariances[basePrevIndex][baseCurIndex].assign(F.div(counts[basePrevIndex][baseCurIndex])); + DoubleMatrix2D invcov = alg.inverse(inverseCovariances[basePrevIndex][baseCurIndex]); + inverseCovariances[basePrevIndex][baseCurIndex] = invcov; - norms[baseCurIndex] = Math.pow(alg.det(invcov), 0.5)/Math.pow(2.0*Math.PI, 2.0); + norms[basePrevIndex][baseCurIndex] = Math.pow(alg.det(invcov), 0.5)/Math.pow(2.0*Math.PI, 2.0); + } } readyToCall = true; @@ -141,22 +158,24 @@ public class BasecallingBaseModel { * @return a 4x4 matrix of likelihoods, where the row is the previous cycle base hypothesis and * the column is the current cycle base hypothesis */ - public double[] computeLikelihoods(int cycle, double[] fourintensity) { + public double[][] computeLikelihoods(int cycle, double[] fourintensity) { if (!readyToCall) { prepareToCallBases(); } - double[] likedist = new double[4]; - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - double norm = norms[baseCurIndex]; + double[][] likedist = new double[numTheories][4]; + for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + double norm = norms[basePrevIndex][baseCurIndex]; - DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity); - sub.assign(means[baseCurIndex], F.minus); + DoubleMatrix1D sub = (DoubleFactory1D.dense).make(fourintensity); + sub.assign(means[basePrevIndex][baseCurIndex], F.minus); - DoubleMatrix1D Ax = alg.mult(inverseCovariances[baseCurIndex], sub); - double exparg = -0.5*alg.mult(sub, Ax); + DoubleMatrix1D Ax = alg.mult(inverseCovariances[basePrevIndex][baseCurIndex], sub); + double exparg = -0.5*alg.mult(sub, Ax); - likedist[baseCurIndex] = norm*Math.exp(exparg); + likedist[basePrevIndex][baseCurIndex] = norm*Math.exp(exparg); + } } return likedist; @@ -166,66 +185,33 @@ public class BasecallingBaseModel { try { PrintWriter writer = new PrintWriter(outparam); - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { - writer.print("mean_" + baseIndexToBase(baseCurIndex) + " = c("); - for (int channel = 0; channel < 4; channel++) { - writer.print(sums[baseCurIndex].getQuick(channel)/counts[baseCurIndex]); + for (int basePrevIndex = 0; basePrevIndex < numTheories; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + writer.print("mean_" + BaseUtils.baseIndexToSimpleBase(baseCurIndex) + " = c("); + for (int channel = 0; channel < 4; channel++) { + writer.print(sums[basePrevIndex][baseCurIndex].getQuick(channel)/counts[basePrevIndex][baseCurIndex]); - if (channel < 3) { - writer.print(", "); + if (channel < 3) { + writer.print(", "); + } } - } - writer.println(");"); + writer.println(");"); - DoubleMatrix2D cov = unscaledCovarianceSums[baseCurIndex].copy(); - cov.assign(F.div(counts[baseCurIndex])); + DoubleMatrix2D cov = unscaledCovarianceSums[basePrevIndex][baseCurIndex].copy(); + cov.assign(F.div(counts[basePrevIndex][baseCurIndex])); - writer.print("cov_" + baseIndexToBase(baseCurIndex) + " = matrix(c("); - for (int channel1 = 0; channel1 < 4; channel1++) { - for (int channel2 = 0; channel2 < 4; channel2++) { - writer.print(cov.get(channel2, channel1) + (channel1 == 3 && channel2 == 3 ? "" : ",")); + writer.print("cov_" + BaseUtils.baseIndexToSimpleBase(baseCurIndex) + " = matrix(c("); + for (int channel1 = 0; channel1 < 4; channel1++) { + for (int channel2 = 0; channel2 < 4; channel2++) { + writer.print(cov.get(channel2, channel1) + (channel1 == 3 && channel2 == 3 ? "" : ",")); + } } + writer.println("), nr=4, nc=4);\n"); } - writer.println("), nr=4, nc=4);\n"); } writer.close(); } catch (IOException e) { } } - - /** - * Utility method for converting a base ([Aa*], [Cc], [Gg], [Tt]) to an index (0, 1, 2, 3); - * - * @param base - * @return 0, 1, 2, 3, or -1 if the base can't be understood. - */ - private int baseToBaseIndex(char base) { - switch (base) { - case 'A': - case 'a': - case '*': return 0; - - case 'C': - case 'c': return 1; - - case 'G': - case 'g': return 2; - - case 'T': - case 't': return 3; - } - - return -1; - } - - private char baseIndexToBase(int baseIndex) { - switch (baseIndex) { - case 0: return 'A'; - case 1: return 'C'; - case 2: return 'G'; - case 3: return 'T'; - default: return '.'; - } - } } diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java index 4ec080623..b37f187d0 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/BasecallingReadModel.java @@ -1,5 +1,8 @@ package org.broadinstitute.sting.playground.fourbasecaller; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; + import java.io.File; /** @@ -14,34 +17,47 @@ import java.io.File; */ public class BasecallingReadModel { private BasecallingBaseModel[] basemodels = null; + private boolean correctForContext = false; /** * Constructor for BasecallingReadModel. * * @param readLength the length of the read that this model will support */ - public BasecallingReadModel(int readLength) { + public BasecallingReadModel(int readLength, boolean correctForContext) { + this.correctForContext = correctForContext; + basemodels = new BasecallingBaseModel[readLength]; - for (int i = 0; i < readLength; i++) { - basemodels[i] = new BasecallingBaseModel(); + for (int cycle = 0; cycle < readLength; cycle++) { + basemodels[cycle] = new BasecallingBaseModel(cycle == 0 ? false : correctForContext); } } /** - * Add a single training point to the model. + * Add a single training point to the model means. * * @param cycle the cycle for which this point should be added + * @param basePrev the previous base * @param baseCur the current base * @param qualCur the current base's quality * @param fourintensity the four intensities of the current base */ - public void addMeanPoint(int cycle, char baseCur, byte qualCur, double[] fourintensity) { - basemodels[cycle].addMeanPoint(baseCur, qualCur, fourintensity); + public void addMeanPoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) { + basemodels[cycle].addMeanPoint(basePrev, baseCur, qualCur, fourintensity); } - public void addCovariancePoint(int cycle, char baseCur, byte qualCur, double[] fourintensity) { - basemodels[cycle].addCovariancePoint(baseCur, qualCur, fourintensity); + /** + * Add a single training point to the model covariances. + * + * @param cycle the cycle for which this point should be added + * @param basePrev the previous base + * @param baseCur the current base + * @param qualCur the current base's quality + * @param fourintensity the four intensities of the current base + */ + public void addCovariancePoint(int cycle, char basePrev, char baseCur, byte qualCur, double[] fourintensity) { + basemodels[cycle].addCovariancePoint(basePrev, baseCur, qualCur, fourintensity); } /** @@ -51,7 +67,7 @@ public class BasecallingReadModel { * @param fourintensity the four intensities for the current cycle's base * @return 4x4 matrix of likelihoods */ - public double[] computeLikelihoods(int cycle, double[] fourintensity) { + public double[][] computeLikelihoods(int cycle, double[] fourintensity) { return basemodels[cycle].computeLikelihoods(cycle, fourintensity); } @@ -63,15 +79,48 @@ public class BasecallingReadModel { * @return an instance of FourProb, which encodes a base hypothesis, its probability, * and the ranking among the other hypotheses */ - public FourProb computeProbabilities(int cycle, double[] fourintensity) { - double[] likes = computeLikelihoods(cycle, fourintensity); + public FourProb computeProbabilities(int cycle, char basePrev, byte qualPrev, double[] fourintensity) { + double[][] likes = computeLikelihoods(cycle, fourintensity); double total = 0; - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { total += likes[baseCurIndex]; } - for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { likes[baseCurIndex] /= total; } + for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + double prior = 1.0; + if (correctForContext) { + double prob = QualityUtils.qualToProb(qualPrev); + if (basePrevIndex == BaseUtils.simpleBaseToBaseIndex(basePrev)) { + prior = prob; + } else { + prior = (1.0 - prob)/((double) (4*likes.length - 1)); + } + } + likes[basePrevIndex][baseCurIndex] = prior*likes[basePrevIndex][baseCurIndex]; + total += likes[basePrevIndex][baseCurIndex]; + } + } - return new FourProb(likes); + for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + likes[basePrevIndex][baseCurIndex] /= total; + } + } + + FourProb fp = new FourProb(likes); + + /* + System.out.println(fp.baseAtRank(0) + ":"); + for (int basePrevIndex = 0; basePrevIndex < likes.length; basePrevIndex++) { + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + System.out.print(likes[basePrevIndex][baseCurIndex] + " "); + } + System.out.println(); + } + System.out.println(); + */ + + //return new FourProb(likes); + return fp; } /** diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java index 344fa9d3a..05c84f9a8 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java @@ -30,6 +30,7 @@ public class FourBaseRecaller extends CommandLineProgram { public int CALLING_LIMIT = 1000000000; public Boolean RAW = false; public Boolean OLD = false; + public Boolean CONTEXT = false; public static void main(String[] argv) { Instance = new FourBaseRecaller(); @@ -46,6 +47,7 @@ public class FourBaseRecaller extends CommandLineProgram { m_parser.addOptionalArg("clim", "C", "Number of reads to basecall", "CALLING_LIMIT"); m_parser.addOptionalFlag("raw", "R", "Use raw intensities?", "RAW"); m_parser.addOptionalFlag("old", "1", "Old Bustard 1.1 mode?", "OLD"); + m_parser.addOptionalFlag("context", "X", "Correct for context?", "CONTEXT"); } protected int execute() { @@ -72,7 +74,7 @@ public class FourBaseRecaller extends CommandLineProgram { fread = ffp.next(); int cycle_offset = (END <= 1) ? 0 : bread.getIntensities().length/2; - BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length()); + BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length(), CONTEXT); int queryid; // learn mean parameters @@ -90,9 +92,10 @@ public class FourBaseRecaller extends CommandLineProgram { for (int cycle = 0; cycle < bases.length(); cycle++) { char baseCur = bases.charAt(cycle); byte qualCur = quals[cycle]; + //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]); double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset]; - model.addMeanPoint(cycle, baseCur, qualCur, fourintensity); + model.addMeanPoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity); } queryid++; @@ -118,9 +121,10 @@ public class FourBaseRecaller extends CommandLineProgram { for (int cycle = 0; cycle < bases.length(); cycle++) { char baseCur = bases.charAt(cycle); byte qualCur = quals[cycle]; + //double[] fourprob = getBaseProbabilityDistribution(bases.charAt(cycle), quals[cycle]); double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset]; - model.addCovariancePoint(cycle, baseCur, qualCur, fourintensity); + model.addCovariancePoint(cycle, cycle == 0 ? '.' : bases.charAt(cycle - 1), baseCur, qualCur, fourintensity); } queryid++; @@ -143,7 +147,7 @@ public class FourBaseRecaller extends CommandLineProgram { int[][] base_counts = new int[4][bread.getFirstReadSequence().length()]; int bases_incorrect = 0, bases_total = 0; - if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob"); } + if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob kiran_base_prev"); } queryid = 0; do { @@ -161,14 +165,17 @@ public class FourBaseRecaller extends CommandLineProgram { for (int cycle = 0; cycle < bases.length(); cycle++) { double[] fourintensity = (RAW || OLD) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset]; - FourProb fp = model.computeProbabilities(cycle, fourintensity); + char basePrev = (cycle == 0 ? '.' : (char) asciiseq[cycle - 1]); + byte qualPrev = (cycle == 0 ? 40 : bestqual[cycle - 1]); + + FourProb fp = model.computeProbabilities(cycle, basePrev, qualPrev, fourintensity); asciiseq[cycle] = (byte) fp.baseAtRank(0); bestqual[cycle] = fp.qualAtRank(0); nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1)); if (debugout != null && bases.charAt(cycle) != '.' && base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle] < 1000) { - debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle]); + debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle] + " " + basePrev); base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle]++; } @@ -188,11 +195,27 @@ public class FourBaseRecaller extends CommandLineProgram { } sfw.close(); - System.out.println("Nonmatch rate: " + ((double) bases_incorrect)/((double) bases_total)); + System.out.println("Disagreement rate: " + ((double) bases_incorrect)/((double) bases_total)); return 0; } + private double[] getBaseProbabilityDistribution(char base, byte qual) { + double[] dist = new double[4]; + + int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); + dist[baseIndex] = QualityUtils.qualToProb(qual); + double residualprob = (1.0 - dist[baseIndex])/3.0; + + for (int i = 0; i < 4; i++) { + if (i != baseIndex) { + dist[i] = residualprob; + } + } + + return dist; + } + private SAMRecord constructSAMRecord(String readNamePrefix, String bases, byte[] bestqual, byte[] nextbestqual, BustardReadData bread, SAMFileHeader sfh) { SAMRecord sr = new SAMRecord(sfh); diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java index 64ff062b3..d38999abd 100755 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourProb.java @@ -15,9 +15,16 @@ public class FourProb { /** * Constructor for FourProb. * - * @param baseProbs the unsorted base hypothesis probabilities (in ACGT order). + * @param baseLikes the unsorted base hypothesis probabilities (in ACGT order). */ - public FourProb(double[] baseProbs) { + public FourProb(double[][] baseLikes) { + double[] baseProbs = new double[4]; + for (int baseCurIndex = 0; baseCurIndex < 4; baseCurIndex++) { + for (int basePrevIndex = 0; basePrevIndex < baseLikes.length; basePrevIndex++) { + baseProbs[baseCurIndex] += baseLikes[basePrevIndex][baseCurIndex]; + } + } + int[] baseIndices = {0, 1, 2, 3}; Integer[] perm = Utils.SortPermutation(baseProbs);