Add exactCallsLog output file to ExactModel and StandardCallerArgumentCollection
-- This allows us to log all of the information about the exact model call (alleles, priors, PLs, result, and runtime) to a file for later debugging / optimization
This commit is contained in:
parent
118e974731
commit
1c52db4cdd
|
|
@ -237,9 +237,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
|
||||
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
UnifiedArgumentCollection simpleUAC = UAC.clone();
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the output VCF header
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -59,4 +58,8 @@ public class StandardCallerArgumentCollection {
|
|||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 3;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "logExactCalls", doc="x")
|
||||
public File exactCallsLog = null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,12 +27,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
private SimpleTimer callTimer = new SimpleTimer();
|
||||
private PrintStream callReport = null;
|
||||
|
||||
// private final static boolean DEBUG = false;
|
||||
|
||||
|
|
@ -40,6 +48,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
if ( UAC.exactCallsLog != null )
|
||||
initializeOutputFile(UAC.exactCallsLog);
|
||||
}
|
||||
|
||||
public void initializeOutputFile(final File outputFile) {
|
||||
try {
|
||||
if (outputFile != null) {
|
||||
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
||||
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||
|
|
@ -61,11 +82,44 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
||||
}
|
||||
|
||||
callTimer.start();
|
||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
||||
final long nanoTime = callTimer.getElapsedTimeNano();
|
||||
|
||||
if ( callReport != null )
|
||||
printCallInfo(vc, alleles, GLs, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
private void printCallInfo(final VariantContext vc,
|
||||
final List<Allele> alleles,
|
||||
final GenotypesContext GLs,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final long runtimeNano,
|
||||
final double log10PosteriorOfAFzero) {
|
||||
printCallElement(vc, "type", "ignore", vc.getType());
|
||||
|
||||
int allelei = 0;
|
||||
for ( final Allele a : alleles )
|
||||
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||
|
||||
for ( final Genotype g : GLs )
|
||||
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||
|
||||
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
||||
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||
|
||||
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
||||
|
||||
callReport.flush();
|
||||
}
|
||||
|
||||
private void printCallElement(final VariantContext vc, final Object variable, final Object key, final Object value) {
|
||||
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||
}
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
|
|
|
|||
|
|
@ -186,7 +186,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
|
||||
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
|
||||
|
||||
|
||||
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
||||
public UnifiedArgumentCollection clone() {
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
|
|
@ -224,6 +223,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
uac.minReferenceDepth = minReferenceDepth;
|
||||
uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES;
|
||||
uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO;
|
||||
uac.exactCallsLog = exactCallsLog;
|
||||
|
||||
// todo- arguments to remove
|
||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||
|
|
@ -242,5 +242,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
this.OutputMode = SCAC.OutputMode;
|
||||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
this.exactCallsLog = SCAC.exactCallsLog;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue