From 1c52db4cdd21ef331e87ec6e96411b690edae2d4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 30 Sep 2012 11:34:32 -0400 Subject: [PATCH] Add exactCallsLog output file to ExactModel and StandardCallerArgumentCollection -- This allows us to log all of the information about the exact model call (alleles, priors, PLs, result, and runtime) to a file for later debugging / optimization --- .../haplotypecaller/HaplotypeCaller.java | 10 ++-- .../StandardCallerArgumentCollection.java | 11 ++-- .../genotyper/ExactAFCalculationModel.java | 54 +++++++++++++++++++ .../genotyper/UnifiedArgumentCollection.java | 3 +- 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f4d8a88e0..71e4f5f8a 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -237,9 +237,13 @@ public class HaplotypeCaller extends ActiveRegionWalker implem UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling - UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING); - UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); - UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); + UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); + + // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested + UnifiedArgumentCollection simpleUAC = UAC.clone(); + simpleUAC.exactCallsLog = null; + UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); // initialize the output VCF header annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index f30fc0316..16707de51 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -1,13 +1,12 @@ package org.broadinstitute.sting.gatk.arguments; -import org.broadinstitute.sting.commandline.Advanced; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; + /** * Created with IntelliJ IDEA. * User: rpoplin @@ -59,4 +58,8 @@ public class StandardCallerArgumentCollection { @Advanced @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 3; + + @Hidden + @Argument(shortName = "logExactCalls", doc="x") + public File exactCallsLog = null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index ba7f0f622..98d5fcad6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -27,12 +27,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.PrintStream; import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { + private SimpleTimer callTimer = new SimpleTimer(); + private PrintStream callReport = null; // private final static boolean DEBUG = false; @@ -40,6 +48,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); + if ( UAC.exactCallsLog != null ) + initializeOutputFile(UAC.exactCallsLog); + } + + public void initializeOutputFile(final File outputFile) { + try { + if (outputFile != null) { + callReport = new PrintStream( new FileOutputStream(outputFile) ); + callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); + } + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotCreateOutputFile(outputFile, e); + } } public List getLog10PNonRef(final VariantContext vc, @@ -61,11 +82,44 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false); } + callTimer.start(); linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result); + final long nanoTime = callTimer.getElapsedTimeNano(); + + if ( callReport != null ) + printCallInfo(vc, alleles, GLs, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero()); return alleles; } + private void printCallInfo(final VariantContext vc, + final List alleles, + final GenotypesContext GLs, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final double log10PosteriorOfAFzero) { + printCallElement(vc, "type", "ignore", vc.getType()); + + int allelei = 0; + for ( final Allele a : alleles ) + printCallElement(vc, "allele", allelei++, a.getDisplayString()); + + for ( final Genotype g : GLs ) + printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); + + for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ ) + printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); + + printCallElement(vc, "runtime.nano", "ignore", runtimeNano); + printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero); + + callReport.flush(); + } + + private void printCallElement(final VariantContext vc, final Object variable, final Object key, final Object value) { + final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); + callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); + } private static final int PL_INDEX_OF_HOM_REF = 0; private static List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 30c0f3e18..40c9c85f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -186,7 +186,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false) boolean EXCLUDE_FILTERED_REFERENCE_SITES = false; - // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! public UnifiedArgumentCollection clone() { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); @@ -224,6 +223,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection uac.minReferenceDepth = minReferenceDepth; uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES; uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO; + uac.exactCallsLog = exactCallsLog; // todo- arguments to remove uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; @@ -242,5 +242,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.OutputMode = SCAC.OutputMode; this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING; this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING; + this.exactCallsLog = SCAC.exactCallsLog; } }