diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index 846447760..e11ad1341 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -18,17 +18,22 @@ import java.util.*; // j.maguire 3-7-2009 public class SingleSampleGenotyper extends LocusWalker { + @Argument(fullName="metrics",required=true) + public String metricsFileName; + + @Argument(fullName="lodThreshold",shortName="lod",required=false,defaultValue="0.0") + public Double lodThreshold; + @Argument(fullName="fourBaseMode",required=false,defaultValue="false") public Boolean fourBaseMode; - @Argument(fullName="decideOnBase",required=false,defaultValue="false") - public Boolean decideOnBase; - - private AlleleMetrics metrics; + public AlleleMetrics metrics; public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; } // We are keeping all the reads public boolean requiresReads() { return true; } - public void initialize() { metrics = new AlleleMetrics("metrics.out"); } + public void initialize() { + metrics = new AlleleMetrics(metricsFileName, lodThreshold); + } public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) { String rodString = getRodString(tracker); @@ -36,11 +41,13 @@ public class SingleSampleGenotyper extends LocusWalker= 0 && altBaseIndex >= 0) { - System.out.println(context.getLocation().toString() + " " + refBaseIndex + " " + altBaseIndex + " " + probs.length + " " + rodString); + /* for (int i = 0; i < probs.length; i++) { System.out.printf(" [ %4.4f %4.4f %4.4f %4.4f ]\n", probs[i][0], probs[i][1], probs[i][2], probs[i][3]); } - + */ + double[] obsWeights = getObservationWeights(probs, refBaseIndex, altBaseIndex); + /* System.out.print(" Weights: "); for (int i = 0; i < obsWeights.length; i++) { - System.out.printf("%4.4f ", obsWeights[i]); + System.out.print(obsWeights[i] + " "); } System.out.println(); + */ double[] genotypePriors = { 0.999, 1e-3, 1e-5 }; - double[] genotypeBalances = { 0.02, 0.5, 0.98 }; + //double[] genotypeBalances = { 0.02, 0.5, 0.98 }; + double[] genotypeBalances = { 0.002, 0.5, 1.0 }; double[] posteriors = new double[3]; - double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, lodVsNextBest = 0.0, pBest = Double.MIN_VALUE; + double qhat = 0.0, qstar = 0.0, lodVsRef = 0.0, pBest = Double.MIN_VALUE; for (int hypothesis = 0; hypothesis < 3; hypothesis++) { posteriors[hypothesis] = 0.0; for (int weightIndex = 0; weightIndex < obsWeights.length; weightIndex++) { - posteriors[hypothesis] += obsWeights[weightIndex]*binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]); + //if (hypothesis == 0) { + // System.out.println("DEBUG1: " + hypothesis + " " + weightIndex + " " + obsWeights[weightIndex] + " " + binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis])); + //} + if (hypothesis == 0) { + //System.out.println(weightIndex + " " + probs.length + " " + genotypeBalances[hypothesis] + " " + binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis])); + } + double binomialWeight = binomialProb(weightIndex, probs.length, genotypeBalances[hypothesis]);; + if (hypothesis == 0 && weightIndex < obsWeights.length/4) { + binomialWeight = 1.0; + } + + posteriors[hypothesis] += obsWeights[weightIndex]*binomialWeight; } posteriors[hypothesis] *= genotypePriors[hypothesis]; - System.out.printf(" Hypothesis %d %f %f %f\n", hypothesis, genotypeBalances[hypothesis], posteriors[hypothesis], Math.log10(posteriors[hypothesis]/posteriors[0])); + double refLikelihood = Double.isInfinite(Math.log10(posteriors[0])) ? -10000.0 : Math.log10(posteriors[0]); + + //System.out.println(" Hypothesis " + hypothesis + " " + genotypeBalances[hypothesis] + " " + posteriors[hypothesis] + " " + (Math.log10(posteriors[hypothesis]) - refLikelihood)); if (posteriors[hypothesis] > pBest) { qhat = genotypeBalances[hypothesis]; qstar = genotypeBalances[hypothesis]; - lodVsRef = Math.log10(posteriors[hypothesis]/posteriors[0]); + lodVsRef = Math.log10(posteriors[hypothesis]) - refLikelihood; pBest = posteriors[hypothesis]; } } double pRef = posteriors[0]; - - System.out.println("\n"); - return new AlleleFrequencyEstimate(context.getLocation(), - ref, - BaseUtils.baseIndexToSimpleBase(altBaseIndex), - 2, - qhat, - qstar, - lodVsRef, - lodVsNextBest, - pBest, - pRef, - probs.length, - ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()), - probs, - posteriors); + double[] sposteriors = Arrays.copyOf(posteriors, posteriors.length); + Arrays.sort(sposteriors); + double bestLogPosterior = (Double.isInfinite(Math.log10(sposteriors[2]))) ? -10000.0 : Math.log10(sposteriors[2]); + double nextBestLogPosterior = (Double.isInfinite(Math.log10(sposteriors[1]))) ? -10000.0 : Math.log10(sposteriors[1]); + double lodVsNextBest = bestLogPosterior - nextBestLogPosterior; + //System.out.println("\n"); + + AlleleFrequencyEstimate freq = new AlleleFrequencyEstimate(context.getLocation(), + ref, + BaseUtils.baseIndexToSimpleBase(altBaseIndex), + 2, + qhat, + qstar, + (Math.abs(qhat - genotypeBalances[0]) < 0.001 ? -lodVsNextBest : lodVsRef), + lodVsNextBest, + pBest, + pRef, + probs.length, + ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()), + probs, + posteriors); + + for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) { + if ( datum != null ) { + if ( datum instanceof rodGFF ) { + rodGFF hapmap_chip_genotype = (rodGFF) datum; + + //System.out.println(hapmap_chip_genotype.toString()); + //System.out.println(freq.asTabularString()); + } + } + } + + return freq; } return null; @@ -136,11 +178,13 @@ public class SingleSampleGenotyper extends LocusWalker 0)) + while (/*(output_paths.size() < 10) &&*/ (paths.size() > 0)) { // 4. Choose the next best path int[] next_best_path = paths.get(paths.size()-1); @@ -291,22 +337,28 @@ public class SingleSampleGenotyper extends LocusWalker