Add exactCallsLog output file to ExactModel and StandardCallerArgumentCollection
-- This allows us to log all of the information about the exact model call (alleles, priors, PLs, result, and runtime) to a file for later debugging / optimization
This commit is contained in:
parent
118e974731
commit
1c52db4cdd
|
|
@ -237,9 +237,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
|
||||||
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
|
||||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
|
||||||
|
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||||
|
UnifiedArgumentCollection simpleUAC = UAC.clone();
|
||||||
|
simpleUAC.exactCallsLog = null;
|
||||||
|
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||||
|
|
||||||
// initialize the output VCF header
|
// initialize the output VCF header
|
||||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.arguments;
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Advanced;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* Created with IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
@ -59,4 +58,8 @@ public class StandardCallerArgumentCollection {
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
public int MAX_ALTERNATE_ALLELES = 3;
|
public int MAX_ALTERNATE_ALLELES = 3;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(shortName = "logExactCalls", doc="x")
|
||||||
|
public File exactCallsLog = null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,12 +27,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
private SimpleTimer callTimer = new SimpleTimer();
|
||||||
|
private PrintStream callReport = null;
|
||||||
|
|
||||||
// private final static boolean DEBUG = false;
|
// private final static boolean DEBUG = false;
|
||||||
|
|
||||||
|
|
@ -40,6 +48,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
|
if ( UAC.exactCallsLog != null )
|
||||||
|
initializeOutputFile(UAC.exactCallsLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeOutputFile(final File outputFile) {
|
||||||
|
try {
|
||||||
|
if (outputFile != null) {
|
||||||
|
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||||
|
}
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||||
|
|
@ -61,11 +82,44 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
callTimer.start();
|
||||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
||||||
|
final long nanoTime = callTimer.getElapsedTimeNano();
|
||||||
|
|
||||||
|
if ( callReport != null )
|
||||||
|
printCallInfo(vc, alleles, GLs, log10AlleleFrequencyPriors, nanoTime, result.getLog10PosteriorOfAFzero());
|
||||||
|
|
||||||
return alleles;
|
return alleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void printCallInfo(final VariantContext vc,
|
||||||
|
final List<Allele> alleles,
|
||||||
|
final GenotypesContext GLs,
|
||||||
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final long runtimeNano,
|
||||||
|
final double log10PosteriorOfAFzero) {
|
||||||
|
printCallElement(vc, "type", "ignore", vc.getType());
|
||||||
|
|
||||||
|
int allelei = 0;
|
||||||
|
for ( final Allele a : alleles )
|
||||||
|
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||||
|
|
||||||
|
for ( final Genotype g : GLs )
|
||||||
|
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||||
|
|
||||||
|
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
||||||
|
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||||
|
|
||||||
|
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||||
|
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
||||||
|
|
||||||
|
callReport.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printCallElement(final VariantContext vc, final Object variable, final Object key, final Object value) {
|
||||||
|
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||||
|
}
|
||||||
|
|
||||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||||
|
|
|
||||||
|
|
@ -186,7 +186,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
|
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
|
||||||
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
|
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
|
||||||
|
|
||||||
|
|
||||||
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
||||||
public UnifiedArgumentCollection clone() {
|
public UnifiedArgumentCollection clone() {
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
|
|
@ -224,6 +223,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
uac.minReferenceDepth = minReferenceDepth;
|
uac.minReferenceDepth = minReferenceDepth;
|
||||||
uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES;
|
uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES;
|
||||||
uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO;
|
uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO;
|
||||||
|
uac.exactCallsLog = exactCallsLog;
|
||||||
|
|
||||||
// todo- arguments to remove
|
// todo- arguments to remove
|
||||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||||
|
|
@ -242,5 +242,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
this.OutputMode = SCAC.OutputMode;
|
this.OutputMode = SCAC.OutputMode;
|
||||||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||||
|
this.exactCallsLog = SCAC.exactCallsLog;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue