diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index fab26c9d2..e9ed6b153 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -225,12 +226,24 @@ public class AFCalcPerformanceTest { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); + logger.info(call); final SimpleTimer timer = new SimpleTimer().start(); final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); - call.newNanoTime = timer.getElapsedTimeNano(); - call.newPNonRef = result.getLog10PosteriorOfAFGT0(); - logger.info(call); - logger.info("\t\t" + result); + final long newNanoTime = timer.getElapsedTimeNano(); + if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) { + logger.info("**** ONE IS POLY"); + } + logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0()); + final double speedup = call.runtime / (1.0 * newNanoTime); + logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x"); + for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) { + if ( a.isNonReference() ) { + final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : ""; + logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE); + final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; + logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost); + } + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index c737416c5..7cacb2060 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -238,6 +238,19 @@ public class AFCalcResult { return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; } + /** + * Are any of the alleles polymorphic w.r.t. #isPolymorphic? + * + * @param log10minPNonRef the confidence threshold, in log10 space + * @return true if any are poly, false otherwise + */ + public boolean anyPolymorphic(final double log10minPNonRef) { + for ( final Allele a : getAllelesUsedInGenotyping() ) + if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) ) + return true; + return false; + } + /** * Returns the log10 probability that allele is segregating * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 3794ba240..9394c99d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -1,20 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.AutoFormattingTime; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + * + * Once opened, calls can be writen to disk with printCallInfo */ public class ExactCallLogger implements Cloneable { private PrintStream callReport = null; @@ -38,32 +36,28 @@ public class ExactCallLogger implements Cloneable { */ public static class ExactCall { final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; + final long runtime; + final AFCalcResult originalCall; - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) { this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; + this.runtime = runtime; + this.originalCall = originalCall; } @Override public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s", vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + originalCall.getLog10PosteriorOfAFGT0(), + new AutoFormattingTime(runtime / 1e9).toString()); } } - protected void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final AFCalcResult result) { + protected final void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; @@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable { callReport.flush(); } + @Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"}) private void printCallElement(final VariantContext vc, final Object variable, final Object key, @@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable { * Read in a list of ExactCall objects from reader, keeping only those * with starts in startsToKeep or all sites (if this is empty) * - * @param reader - * @param startsToKeep - * @param parser - * @return + * @param reader a just-opened reader sitting at the start of the file + * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept + * @param parser a genome loc parser to create genome locs + * @return a list of ExactCall objects in reader * @throws IOException */ public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { @@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable { // skip the header line reader.readLine(); + // skip the first "type" line + reader.readLine(); + while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List alleles = new ArrayList(); final List genotypes = new ArrayList(); + final double[] posteriors = new double[2]; + final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final List mle = new ArrayList(); + final Map log10pNonRefByAllele = new HashMap(); long runtimeNano = -1; GenomeLoc currentLoc = null; @@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable { if (currentLoc == null) currentLoc = lineLoc; - if (variable.equals("log10PosteriorOfAFzero")) { + if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); + final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); + calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { @@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); + } else if (variable.equals("log10PosteriorOfAFEq0")) { + posteriors[0] = Double.valueOf(value); + } else if (variable.equals("log10PosteriorOfAFGt0")) { + posteriors[1] = Double.valueOf(value); + } else if (variable.equals("MLE")) { + mle.add(Integer.valueOf(value)); + } else if (variable.equals("pNonRefByAllele")) { + final Allele a = Allele.create(key); + log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else {