Merge pull request #1326 from broadinstitute/sl_issue_1293

Added maxNumPLValues argument to allow users to set maximum number of PL values in output.
This commit is contained in:
samuelklee 2016-04-28 10:52:27 -04:00
commit aa0c76a166
10 changed files with 281 additions and 120 deletions

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.engine.arguments; package org.broadinstitute.gatk.engine.arguments;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.gatk.utils.commandline.Advanced; import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
@ -113,7 +114,7 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/** /**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN_ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it * then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter. * that you not play around with this parameter.
@ -124,6 +125,15 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 6; public int MAX_ALTERNATE_ALLELES = 6;
/**
* Determines the maximum number of PL values that will be logged in the output. If the number of genotypes
* (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument,
* then output of all of the PL values will be suppressed.
*/
@Advanced
@Argument(fullName = "max_num_PL_values", shortName = "maxNumPLValues", doc = "Maximum number of PL values to output", required = false)
public int MAX_NUM_PL_VALUES = AFCalculator.MAX_NUM_PL_VALUES_DEFAULT;
/** /**
* By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model, * By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model,
* see e.g. Waterson (1975) or Tajima (1996). * see e.g. Waterson (1975) or Tajima (1996).

View File

@ -56,22 +56,22 @@ import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFInfoHeaderLine; import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.SampleList;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.SampleList;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
@ -85,7 +85,7 @@ import java.util.*;
*/ */
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> { public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
protected final AFCalculatorProvider afCalculatorProvider ; protected final AFCalculatorProvider afCalculatorProvider;
protected Logger logger; protected Logger logger;
@ -104,6 +104,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final GenomeLocParser genomeLocParser; protected final GenomeLocParser genomeLocParser;
protected static int maxNumPLValuesObserved = 0;
protected static int numTimesMaxNumPLValuesExceeded = 0;
/** /**
* Construct a new genotyper engine, on a specific subset of samples. * Construct a new genotyper engine, on a specific subset of samples.
* *
@ -226,7 +229,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
final int defaultPloidy = configuration.genotypeArgs.samplePloidy; final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES; final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles); final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model)); final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult); final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
@ -730,4 +734,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return 1.0 - Math.pow(10.0, normalizedLog10ACeq0Posterior); return 1.0 - Math.pow(10.0, normalizedLog10ACeq0Posterior);
} }
} }
/**
* Logs the number of times the maximum allowed number of PLs was exceeded and the largest number of PLs observed. The corresponding counters are reset.
*/
public void printFinalMaxNumPLValuesWarning() {
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
final AFCalculator afCalculator = afCalculatorProvider.getInstance(defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
afCalculator.printFinalMaxNumPLValuesWarning();
}
} }

View File

@ -56,22 +56,22 @@ import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.genotyper.SampleList;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.baq.BAQ; import org.broadinstitute.gatk.utils.baq.BAQ;
import org.broadinstitute.gatk.utils.classloader.PluginManager; import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.SampleList;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.io.PrintStream; import java.io.PrintStream;
@ -470,7 +470,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
allAllelesToUse, false, model, perReadAlleleLikelihoodMap); allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
final int defaultPloidy = configuration.genotypeArgs.samplePloidy; final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES; final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model)); final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).setMaxNumPLValues(maxNumPLValues)
.getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
} }
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext, private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,

View File

@ -53,11 +53,12 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.SimpleTimer;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.GenotypesContext; import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.SimpleTimer;
import java.io.File; import java.io.File;
import java.util.List; import java.util.List;
@ -67,10 +68,13 @@ import java.util.List;
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
*/ */
public abstract class AFCalculator implements Cloneable { public abstract class AFCalculator implements Cloneable {
private final static Logger defaultLogger = Logger.getLogger(AFCalculator.class); private static final Logger defaultLogger = Logger.getLogger(AFCalculator.class);
public static final int MAX_NUM_PL_VALUES_DEFAULT = 100;
protected Logger logger = defaultLogger; protected Logger logger = defaultLogger;
protected int maxNumPLValues = MAX_NUM_PL_VALUES_DEFAULT; // if PL vectors longer than this # of elements, don't log them
protected static int maxNumPLValuesObserved = 0;
protected static long numTimesMaxNumPLValuesExceeded = 0;
private SimpleTimer callTimer = new SimpleTimer(); private SimpleTimer callTimer = new SimpleTimer();
private StateTracker stateTracker; private StateTracker stateTracker;
@ -102,10 +106,19 @@ public abstract class AFCalculator implements Cloneable {
* *
* @param logger * @param logger
*/ */
public void setLogger(Logger logger) { public void setLogger(final Logger logger) {
this.logger = logger; this.logger = logger;
} }
/**
* Set the maximum number of PL values to log. If the number of PL values exceeds this, no PL values will be logged.
* @param maxNumPLValues maximum number of PL values to log
*/
public AFCalculator setMaxNumPLValues(final int maxNumPLValues) {
this.maxNumPLValues = maxNumPLValues;
return this;
}
/** /**
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
* *
@ -242,4 +255,50 @@ public abstract class AFCalculator implements Cloneable {
return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele]; return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele];
} }
/**
* Strips PLs from the specified GenotypeBuilder if their number exceeds the maximum allowed. Corresponding counters are updated.
* @param gb the GenotypeBuilder to modify
* @param vc the VariantContext
* @param sampleName the sample name
* @param newLikelihoods the PL array
*/
protected void removePLsIfMaxNumPLValuesExceeded(final GenotypeBuilder gb, final VariantContext vc, final String sampleName, final double[] newLikelihoods) {
final int numPLValuesFound = newLikelihoods.length;
if (numPLValuesFound > maxNumPLValues) {
logMaxNumPLValuesWarning(vc, sampleName, numPLValuesFound);
numTimesMaxNumPLValuesExceeded++;
gb.noPL();
if (numPLValuesFound > maxNumPLValuesObserved) {
maxNumPLValuesObserved = numPLValuesFound;
}
}
}
private void logMaxNumPLValuesWarning(final VariantContext vc, final String sampleName, final int numPLValuesFound) {
final String message = String.format("Maximum allowed number of PLs (%d) exceeded for sample %s at %s:%d-%d with %d possible genotypes. " +
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " +
"unless the --max_num_PL_values argument is increased accordingly",
maxNumPLValues, sampleName, vc.getContig(), vc.getStart(), vc.getEnd(), numPLValuesFound);
if ( numTimesMaxNumPLValuesExceeded == 0 ) {
logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed.");
} else {
logger.debug(message);
}
}
/**
* Logs the number of times the maximum allowed number of PLs was exceeded and the largest number of PLs observed. The corresponding counters are reset.
*/
public void printFinalMaxNumPLValuesWarning() {
if ( numTimesMaxNumPLValuesExceeded > 0 ) {
final String message = String.format("Maximum allowed number of PLs (%d) was exceeded %d time(s); the largest number of PLs found was %d. " +
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " +
"unless the --max_num_PL_values argument is increased accordingly",
maxNumPLValues, numTimesMaxNumPLValuesExceeded, maxNumPLValuesObserved);
logger.warn(message);
}
maxNumPLValuesObserved = 0;
numTimesMaxNumPLValuesExceeded = 0;
}
} }

View File

@ -51,10 +51,10 @@
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import java.util.*; import java.util.*;
@ -65,8 +65,8 @@ abstract class ExactAFCalculator extends AFCalculator {
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// useful so that we don't keep printing out the same warning message // useful so that we don't keep printing out the same warning messages
protected static boolean printedWarning = false; protected static boolean printedMaxAltAllelesWarning = false;
/** /**
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first. * Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first.
@ -168,15 +168,14 @@ abstract class ExactAFCalculator extends AFCalculator {
if (altAlleleReduction == 0) if (altAlleleReduction == 0)
return vc; return vc;
String message = "this tool is currently set to genotype at most " + maximumAlternativeAlleles final String message = String.format("This tool is currently set to genotype at most %d " +
+ " alternate alleles in a given context, but the context at " + vc.getContig() + ":" + vc.getStart() "alternate alleles in a given context, but the context at %s: %d has %d " +
+ " has " + (vc.getAlternateAlleles().size()) "alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument",
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"; maximumAlternativeAlleles, vc.getContig(), vc.getStart(), vc.getAlternateAlleles().size());
if ( !printedWarning ) { if ( !printedMaxAltAllelesWarning ) {
printedWarning = true; printedMaxAltAllelesWarning = true;
message += ". This warning message is output just once per run and further warnings will be suppressed unless the DEBUG logging level is used."; logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed.");
logger.warn(message);
} else { } else {
logger.debug(message); logger.debug(message);
} }

View File

@ -71,6 +71,8 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider {
private final int maximumAltAlleleCount; private final int maximumAltAlleleCount;
private final int maximumNumPLValues;
private final int ploidy; private final int ploidy;
/** /**
@ -125,17 +127,18 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider {
final Logger logger, final boolean verifyRequests) { final Logger logger, final boolean verifyRequests) {
if (configuration == null) if (configuration == null)
throw new IllegalArgumentException("null configuration"); throw new IllegalArgumentException("null genotype-arguments configuration");
if (configuration == null)
throw new IllegalArgumentException("null configuration genotype arguments");
if (configuration.samplePloidy < 1) if (configuration.samplePloidy < 1)
throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy); throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy);
if (configuration.MAX_ALTERNATE_ALLELES < 0) if (configuration.MAX_ALTERNATE_ALLELES < 0)
throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1)); throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1));
if (configuration.MAX_NUM_PL_VALUES < 0)
throw new IllegalArgumentException("invalid maximum number of PL values " + configuration.MAX_NUM_PL_VALUES);
ploidy = configuration.samplePloidy; ploidy = configuration.samplePloidy;
maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES; maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES;
singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance(); maximumNumPLValues = configuration.MAX_NUM_PL_VALUES;
singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance().setMaxNumPLValues(maximumNumPLValues);
singleton.setLogger(logger); singleton.setLogger(logger);
this.verifyRequests = verifyRequests; this.verifyRequests = verifyRequests;
} }

View File

@ -58,15 +58,16 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalcula
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*; import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
protected GeneralPloidyExactAFCalculator() { protected GeneralPloidyExactAFCalculator() {
} }
@ -491,6 +492,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
final boolean assignGenotypes, final double[] newLikelihoods) { final boolean assignGenotypes, final double[] newLikelihoods) {
final GenotypeBuilder gb = new GenotypeBuilder(g); final GenotypeBuilder gb = new GenotypeBuilder(g);
final String sampleName = g.getSampleName();
// add likelihoods // add likelihoods
gb.PL(newLikelihoods); gb.PL(newLikelihoods);
@ -500,7 +502,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
if (newSACs != null) if (newSACs != null)
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs); gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
if (assignGenotypes) if (assignGenotypes)
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy);
else else
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
@ -528,13 +530,18 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
} }
/** /**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs * Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs
* *
* @param gb the GenotypeBuilder to modify
* @param vc the VariantContext
* @param sampleName the sample name
* @param newLikelihoods the PL array * @param newLikelihoods the PL array
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs) * @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
* @param numChromosomes Number of chromosomes per pool * @param numChromosomes Number of chromosomes per pool
*/ */
private void assignGenotype(final GenotypeBuilder gb, private void assignGenotype(final GenotypeBuilder gb,
final VariantContext vc,
final String sampleName,
final double[] newLikelihoods, final double[] newLikelihoods,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final int numChromosomes) { final int numChromosomes) {
@ -547,13 +554,10 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
gb.alleles(alleleCounts.asAlleleList(allelesToUse)); gb.alleles(alleleCounts.asAlleleList(allelesToUse));
// remove PLs if necessary removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods);
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
gb.noPL();
// TODO - deprecated so what is the appropriate method to call? // TODO - deprecated so what is the appropriate method to call?
if ( numNewAltAlleles > 0 ) if ( numNewAltAlleles > 0 )
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
} }
} }

View File

@ -75,8 +75,6 @@ import java.util.*;
*/ */
public class IndependentAllelesExactAFCalculator extends ExactAFCalculator { public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
private static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
/** /**
* Array that caches the allele list that corresponds to the ith ploidy. * Array that caches the allele list that corresponds to the ith ploidy.
* *
@ -460,78 +458,83 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
@Override @Override
@Requires("vc != null && allelesToUse != null") @Requires("vc != null && allelesToUse != null")
public GenotypesContext subsetAlleles(VariantContext vc, int defaultPloidy, List<Allele> allelesToUse, boolean assignGenotypes) { public GenotypesContext subsetAlleles(VariantContext vc, int defaultPloidy, List<Allele> allelesToUse, boolean assignGenotypes) {
// the genotypes with PLs // the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes(); final GenotypesContext oldGTs = vc.getGenotypes();
// samples // samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName(); final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
// the new genotypes to create // the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create(); final GenotypesContext newGTs = GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int numNewAltAlleles = allelesToUse.size() - 1; final int numNewAltAlleles = allelesToUse.size() - 1;
// create the new genotypes // create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) { for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k)); final Genotype g = oldGTs.get(sampleIndices.get(k));
final int declaredPloidy = g.getPloidy(); final int declaredPloidy = g.getPloidy();
final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
if ( !g.hasLikelihoods() ) { if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy))); newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy)));
continue; continue;
}
// create the new likelihoods array from the alleles we are allowed to use
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
double[] newLikelihoods;
// Optimization: if # of new alt alleles = 0 (pure ref call), keep original likelihoods so we skip normalization
// and subsetting
if ( numOriginalAltAlleles == numNewAltAlleles || numNewAltAlleles == 0) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 )
gb.noPL();
else
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
newGTs.add(gb.make());
}
} }
return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse); // create the new likelihoods array from the alleles we are allowed to use
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
double[] newLikelihoods;
// Optimization: if # of new alt alleles = 0 (pure ref call), keep original likelihoods so we skip normalization
// and subsetting
if ( numOriginalAltAlleles == numNewAltAlleles || numNewAltAlleles == 0) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
} else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
final String sampleName = g.getSampleName();
if ( numNewAltAlleles == 0 )
gb.noPL();
else
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
else
assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy);
newGTs.add(gb.make());
}
}
return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse);
} }
/** /**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs * Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs
* *
* @param gb the GenotypeBuilder to modify
* @param vc the VariantContext
* @param sampleName the sample name
* @param newLikelihoods the PL array * @param newLikelihoods the PL array
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs) * @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
* @param numChromosomes Number of chromosomes per pool * @param numChromosomes Number of chromosomes per pool
*/ */
private void assignGenotype(final GenotypeBuilder gb, private void assignGenotype(final GenotypeBuilder gb,
final VariantContext vc,
final String sampleName,
final double[] newLikelihoods, final double[] newLikelihoods,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final int numChromosomes) { final int numChromosomes) {
@ -544,9 +547,7 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
gb.alleles(alleleCounts.asAlleleList(allelesToUse)); gb.alleles(alleleCounts.asAlleleList(allelesToUse));
// remove PLs if necessary removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods);
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
gb.noPL();
if ( numNewAltAlleles > 0 ) if ( numNewAltAlleles > 0 )
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));

View File

@ -55,30 +55,24 @@ import com.google.java.contract.Ensures;
import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriter;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*; import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
import org.broadinstitute.gatk.engine.io.DirectOutputTracker; import org.broadinstitute.gatk.engine.io.DirectOutputTracker;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils;
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
import org.broadinstitute.gatk.utils.genotyper.*;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub; import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker; import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation;
import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
@ -91,21 +85,33 @@ import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState; import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
import org.broadinstitute.gatk.utils.clipping.ReadClipper; import org.broadinstitute.gatk.utils.clipping.ReadClipper;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.fragments.FragmentCollection; import org.broadinstitute.gatk.utils.fragments.FragmentCollection;
import org.broadinstitute.gatk.utils.fragments.FragmentUtils; import org.broadinstitute.gatk.utils.fragments.FragmentUtils;
import org.broadinstitute.gatk.utils.genotyper.*;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.gvcf.GVCFWriter; import org.broadinstitute.gatk.utils.gvcf.GVCFWriter;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter; import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.pairhmm.PairHMM; import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.sam.AlignmentUtils; import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.*; import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -1154,6 +1160,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Override @Override
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
genotypingEngine.printFinalMaxNumPLValuesWarning();
if ( HCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it if ( HCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it
referenceConfidenceModel.close(); referenceConfidenceModel.close();
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed //TODO remove the need to call close here for debugging, the likelihood output stream should be managed

View File

@ -51,13 +51,18 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller; package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.broadinstitute.gatk.engine.GATKVCFUtils; import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.testng.Assert;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -352,4 +357,60 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec); executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
} }
@Test
public void testHaplotypeCallerMaxNumPLValues() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 70",
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7ea93210277fa4b590790a81c4b3994b"));
spec.disableShadowBCF();
executeTest("testHaplotypeCallerMaxNumPLValues", spec);
}
@Test
public void testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel() throws IOException {
// Need to see log WARN messages
final Level level = logger.getLevel();
logger.setLevel(Level.WARN);
final File logFile = createTempFile("testMaxNumPLValuesExceededWithWarnLogLevel.log", ".tmp");
final String logFileName = logFile.getAbsolutePath();
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s",
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals",
GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841"));
spec.disableShadowBCF();
executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel", spec);
// Make sure the "Maximum allowed number of PLs exceeded" messages are in the log
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes. " +
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) unless the --max_num_PL_values argument is increased accordingly. " +
"Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed."));
Assert.assertFalse(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes."));
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70."));
// Set the log level back
logger.setLevel(level);
}
@Test
public void testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel() throws IOException {
// Need to see log DEBUG messages
final Level level = logger.getLevel();
logger.setLevel(Level.DEBUG);
final File logFile = createTempFile("testMaxNumPLValuesExceededWithDebugLogLevel.log", ".tmp");
final String logFileName = logFile.getAbsolutePath();
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s",
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals",
GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841"));
spec.disableShadowBCF();
executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel", spec);
// Make sure the "Maximum allowed number of PLs exceeded" messages are in the log
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes."));
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes."));
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70."));
// Set the log level back
logger.setLevel(level);
}
} }