Added maxNumPLValues argument to allow users to set maximum number of PL values in output.
This commit is contained in:
parent
46d41255ba
commit
e08940a5a8
|
|
@ -51,6 +51,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.arguments;
|
package org.broadinstitute.gatk.engine.arguments;
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Advanced;
|
import org.broadinstitute.gatk.utils.commandline.Advanced;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||||
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||||
|
|
@ -113,7 +114,7 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
||||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN_ALLELES),
|
||||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||||
* that you not play around with this parameter.
|
* that you not play around with this parameter.
|
||||||
|
|
@ -124,6 +125,15 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
||||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
public int MAX_ALTERNATE_ALLELES = 6;
|
public int MAX_ALTERNATE_ALLELES = 6;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the maximum number of PL values that will be logged in the output. If the number of genotypes
|
||||||
|
* (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument,
|
||||||
|
* then output of all of the PL values will be suppressed.
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName = "max_num_PL_values", shortName = "maxNumPLValues", doc = "Maximum number of PL values to output", required = false)
|
||||||
|
public int MAX_NUM_PL_VALUES = AFCalculator.MAX_NUM_PL_VALUES_DEFAULT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model,
|
* By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model,
|
||||||
* see e.g. Waterson (1975) or Tajima (1996).
|
* see e.g. Waterson (1975) or Tajima (1996).
|
||||||
|
|
|
||||||
|
|
@ -56,22 +56,22 @@ import com.google.java.contract.Requires;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.MathUtils;
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
|
@ -85,7 +85,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
|
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
|
||||||
|
|
||||||
protected final AFCalculatorProvider afCalculatorProvider ;
|
protected final AFCalculatorProvider afCalculatorProvider;
|
||||||
|
|
||||||
protected Logger logger;
|
protected Logger logger;
|
||||||
|
|
||||||
|
|
@ -104,6 +104,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
|
|
||||||
protected final GenomeLocParser genomeLocParser;
|
protected final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
protected static int maxNumPLValuesObserved = 0;
|
||||||
|
protected static int numTimesMaxNumPLValuesExceeded = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new genotyper engine, on a specific subset of samples.
|
* Construct a new genotyper engine, on a specific subset of samples.
|
||||||
*
|
*
|
||||||
|
|
@ -226,7 +229,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
|
|
||||||
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
||||||
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
||||||
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles);
|
final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
|
||||||
|
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
||||||
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
||||||
|
|
||||||
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
|
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
|
||||||
|
|
@ -724,4 +728,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
return 1.0 - Math.pow(10.0, normalizedLog10ACeq0Posterior);
|
return 1.0 - Math.pow(10.0, normalizedLog10ACeq0Posterior);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs the number of times the maximum allowed number of PLs was exceeded and the largest number of PLs observed. The corresponding counters are reset.
|
||||||
|
*/
|
||||||
|
public void printFinalMaxNumPLValuesWarning() {
|
||||||
|
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
||||||
|
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
||||||
|
final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
|
||||||
|
final AFCalculator afCalculator = afCalculatorProvider.getInstance(defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
||||||
|
afCalculator.printFinalMaxNumPLValuesWarning();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,22 +56,22 @@ import htsjdk.variant.variantcontext.GenotypesContext;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||||
import org.broadinstitute.gatk.utils.classloader.PluginManager;
|
import org.broadinstitute.gatk.utils.classloader.PluginManager;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -469,7 +469,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
||||||
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||||
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
||||||
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
||||||
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
|
||||||
|
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).setMaxNumPLValues(maxNumPLValues)
|
||||||
|
.getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
|
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
|
||||||
|
|
|
||||||
|
|
@ -53,11 +53,12 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.gatk.utils.SimpleTimer;
|
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
|
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.gatk.utils.SimpleTimer;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -67,10 +68,13 @@ import java.util.List;
|
||||||
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
||||||
*/
|
*/
|
||||||
public abstract class AFCalculator implements Cloneable {
|
public abstract class AFCalculator implements Cloneable {
|
||||||
private final static Logger defaultLogger = Logger.getLogger(AFCalculator.class);
|
private static final Logger defaultLogger = Logger.getLogger(AFCalculator.class);
|
||||||
|
public static final int MAX_NUM_PL_VALUES_DEFAULT = 100;
|
||||||
|
|
||||||
protected Logger logger = defaultLogger;
|
protected Logger logger = defaultLogger;
|
||||||
|
protected int maxNumPLValues = MAX_NUM_PL_VALUES_DEFAULT; // if PL vectors longer than this # of elements, don't log them
|
||||||
|
protected static int maxNumPLValuesObserved = 0;
|
||||||
|
protected static long numTimesMaxNumPLValuesExceeded = 0;
|
||||||
|
|
||||||
private SimpleTimer callTimer = new SimpleTimer();
|
private SimpleTimer callTimer = new SimpleTimer();
|
||||||
private StateTracker stateTracker;
|
private StateTracker stateTracker;
|
||||||
|
|
@ -102,10 +106,19 @@ public abstract class AFCalculator implements Cloneable {
|
||||||
*
|
*
|
||||||
* @param logger
|
* @param logger
|
||||||
*/
|
*/
|
||||||
public void setLogger(Logger logger) {
|
public void setLogger(final Logger logger) {
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the maximum number of PL values to log. If the number of PL values exceeds this, no PL values will be logged.
|
||||||
|
* @param maxNumPLValues maximum number of PL values to log
|
||||||
|
*/
|
||||||
|
public AFCalculator setMaxNumPLValues(final int maxNumPLValues) {
|
||||||
|
this.maxNumPLValues = maxNumPLValues;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
|
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
|
||||||
*
|
*
|
||||||
|
|
@ -242,4 +255,50 @@ public abstract class AFCalculator implements Cloneable {
|
||||||
return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele];
|
return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strips PLs from the specified GenotypeBuilder if their number exceeds the maximum allowed. Corresponding counters are updated.
|
||||||
|
* @param gb the GenotypeBuilder to modify
|
||||||
|
* @param vc the VariantContext
|
||||||
|
* @param sampleName the sample name
|
||||||
|
* @param newLikelihoods the PL array
|
||||||
|
*/
|
||||||
|
protected void removePLsIfMaxNumPLValuesExceeded(final GenotypeBuilder gb, final VariantContext vc, final String sampleName, final double[] newLikelihoods) {
|
||||||
|
final int numPLValuesFound = newLikelihoods.length;
|
||||||
|
if (numPLValuesFound > maxNumPLValues) {
|
||||||
|
logMaxNumPLValuesWarning(vc, sampleName, numPLValuesFound);
|
||||||
|
numTimesMaxNumPLValuesExceeded++;
|
||||||
|
gb.noPL();
|
||||||
|
if (numPLValuesFound > maxNumPLValuesObserved) {
|
||||||
|
maxNumPLValuesObserved = numPLValuesFound;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logMaxNumPLValuesWarning(final VariantContext vc, final String sampleName, final int numPLValuesFound) {
|
||||||
|
final String message = String.format("Maximum allowed number of PLs (%d) exceeded for sample %s at %s:%d-%d with %d possible genotypes. " +
|
||||||
|
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " +
|
||||||
|
"unless the --max_num_PL_values argument is increased accordingly",
|
||||||
|
maxNumPLValues, sampleName, vc.getContig(), vc.getStart(), vc.getEnd(), numPLValuesFound);
|
||||||
|
|
||||||
|
if ( numTimesMaxNumPLValuesExceeded == 0 ) {
|
||||||
|
logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed.");
|
||||||
|
} else {
|
||||||
|
logger.debug(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs the number of times the maximum allowed number of PLs was exceeded and the largest number of PLs observed. The corresponding counters are reset.
|
||||||
|
*/
|
||||||
|
public void printFinalMaxNumPLValuesWarning() {
|
||||||
|
if ( numTimesMaxNumPLValuesExceeded > 0 ) {
|
||||||
|
final String message = String.format("Maximum allowed number of PLs (%d) was exceeded %d time(s); the largest number of PLs found was %d. " +
|
||||||
|
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) " +
|
||||||
|
"unless the --max_num_PL_values argument is increased accordingly",
|
||||||
|
maxNumPLValues, numTimesMaxNumPLValuesExceeded, maxNumPLValuesObserved);
|
||||||
|
logger.warn(message);
|
||||||
|
}
|
||||||
|
maxNumPLValuesObserved = 0;
|
||||||
|
numTimesMaxNumPLValuesExceeded = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -51,10 +51,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import htsjdk.variant.variantcontext.*;
|
||||||
import org.broadinstitute.gatk.utils.MathUtils;
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
import htsjdk.variant.variantcontext.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -65,8 +65,8 @@ abstract class ExactAFCalculator extends AFCalculator {
|
||||||
|
|
||||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||||
|
|
||||||
// useful so that we don't keep printing out the same warning message
|
// useful so that we don't keep printing out the same warning messages
|
||||||
protected static boolean printedWarning = false;
|
protected static boolean printedMaxAltAllelesWarning = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first.
|
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first.
|
||||||
|
|
@ -157,15 +157,14 @@ abstract class ExactAFCalculator extends AFCalculator {
|
||||||
if (altAlleleReduction == 0)
|
if (altAlleleReduction == 0)
|
||||||
return vc;
|
return vc;
|
||||||
|
|
||||||
String message = "this tool is currently set to genotype at most " + maximumAlternativeAlleles
|
final String message = String.format("This tool is currently set to genotype at most %d " +
|
||||||
+ " alternate alleles in a given context, but the context at " + vc.getContig() + ":" + vc.getStart()
|
"alternate alleles in a given context, but the context at %s: %d has %d " +
|
||||||
+ " has " + (vc.getAlternateAlleles().size())
|
"alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument",
|
||||||
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument";
|
maximumAlternativeAlleles, vc.getContig(), vc.getStart(), vc.getAlternateAlleles().size());
|
||||||
|
|
||||||
if ( !printedWarning ) {
|
if ( !printedMaxAltAllelesWarning ) {
|
||||||
printedWarning = true;
|
printedMaxAltAllelesWarning = true;
|
||||||
message += ". This warning message is output just once per run and further warnings will be suppressed unless the DEBUG logging level is used.";
|
logger.warn(message + ". Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed.");
|
||||||
logger.warn(message);
|
|
||||||
} else {
|
} else {
|
||||||
logger.debug(message);
|
logger.debug(message);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,8 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider {
|
||||||
|
|
||||||
private final int maximumAltAlleleCount;
|
private final int maximumAltAlleleCount;
|
||||||
|
|
||||||
|
private final int maximumNumPLValues;
|
||||||
|
|
||||||
private final int ploidy;
|
private final int ploidy;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -125,17 +127,18 @@ public class FixedAFCalculatorProvider extends AFCalculatorProvider {
|
||||||
final Logger logger, final boolean verifyRequests) {
|
final Logger logger, final boolean verifyRequests) {
|
||||||
|
|
||||||
if (configuration == null)
|
if (configuration == null)
|
||||||
throw new IllegalArgumentException("null configuration");
|
throw new IllegalArgumentException("null genotype-arguments configuration");
|
||||||
if (configuration == null)
|
|
||||||
throw new IllegalArgumentException("null configuration genotype arguments");
|
|
||||||
if (configuration.samplePloidy < 1)
|
if (configuration.samplePloidy < 1)
|
||||||
throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy);
|
throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy);
|
||||||
if (configuration.MAX_ALTERNATE_ALLELES < 0)
|
if (configuration.MAX_ALTERNATE_ALLELES < 0)
|
||||||
throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1));
|
throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1));
|
||||||
|
if (configuration.MAX_NUM_PL_VALUES < 0)
|
||||||
|
throw new IllegalArgumentException("invalid maximum number of PL values " + configuration.MAX_NUM_PL_VALUES);
|
||||||
|
|
||||||
ploidy = configuration.samplePloidy;
|
ploidy = configuration.samplePloidy;
|
||||||
maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES;
|
maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES;
|
||||||
singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance();
|
maximumNumPLValues = configuration.MAX_NUM_PL_VALUES;
|
||||||
|
singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance().setMaxNumPLValues(maximumNumPLValues);
|
||||||
singleton.setLogger(logger);
|
singleton.setLogger(logger);
|
||||||
this.verifyRequests = verifyRequests;
|
this.verifyRequests = verifyRequests;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,15 +58,16 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalcula
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||||
import org.broadinstitute.gatk.utils.MathUtils;
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
|
|
||||||
|
|
||||||
protected GeneralPloidyExactAFCalculator() {
|
protected GeneralPloidyExactAFCalculator() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -491,6 +492,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
||||||
final boolean assignGenotypes, final double[] newLikelihoods) {
|
final boolean assignGenotypes, final double[] newLikelihoods) {
|
||||||
|
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||||
|
final String sampleName = g.getSampleName();
|
||||||
|
|
||||||
// add likelihoods
|
// add likelihoods
|
||||||
gb.PL(newLikelihoods);
|
gb.PL(newLikelihoods);
|
||||||
|
|
@ -500,7 +502,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
||||||
if (newSACs != null)
|
if (newSACs != null)
|
||||||
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
|
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
|
||||||
if (assignGenotypes)
|
if (assignGenotypes)
|
||||||
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
|
assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy);
|
||||||
else
|
else
|
||||||
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
|
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
|
||||||
|
|
||||||
|
|
@ -528,13 +530,18 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
* Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs
|
||||||
*
|
*
|
||||||
|
* @param gb the GenotypeBuilder to modify
|
||||||
|
* @param vc the VariantContext
|
||||||
|
* @param sampleName the sample name
|
||||||
* @param newLikelihoods the PL array
|
* @param newLikelihoods the PL array
|
||||||
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
||||||
* @param numChromosomes Number of chromosomes per pool
|
* @param numChromosomes Number of chromosomes per pool
|
||||||
*/
|
*/
|
||||||
private void assignGenotype(final GenotypeBuilder gb,
|
private void assignGenotype(final GenotypeBuilder gb,
|
||||||
|
final VariantContext vc,
|
||||||
|
final String sampleName,
|
||||||
final double[] newLikelihoods,
|
final double[] newLikelihoods,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final int numChromosomes) {
|
final int numChromosomes) {
|
||||||
|
|
@ -547,13 +554,10 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
gb.alleles(alleleCounts.asAlleleList(allelesToUse));
|
gb.alleles(alleleCounts.asAlleleList(allelesToUse));
|
||||||
|
|
||||||
// remove PLs if necessary
|
removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods);
|
||||||
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
|
|
||||||
gb.noPL();
|
|
||||||
|
|
||||||
// TODO - deprecated so what is the appropriate method to call?
|
// TODO - deprecated so what is the appropriate method to call?
|
||||||
if ( numNewAltAlleles > 0 )
|
if ( numNewAltAlleles > 0 )
|
||||||
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -75,8 +75,6 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
private static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Array that caches the allele list that corresponds to the ith ploidy.
|
* Array that caches the allele list that corresponds to the ith ploidy.
|
||||||
*
|
*
|
||||||
|
|
@ -502,9 +500,9 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
||||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||||
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
|
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||||
|
final String sampleName = g.getSampleName();
|
||||||
|
|
||||||
if ( numNewAltAlleles == 0 )
|
if ( numNewAltAlleles == 0 )
|
||||||
gb.noPL();
|
gb.noPL();
|
||||||
|
|
@ -515,7 +513,7 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
||||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||||
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
|
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
|
||||||
else
|
else
|
||||||
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
|
assignGenotype(gb, vc, sampleName, newLikelihoods, allelesToUse, ploidy);
|
||||||
newGTs.add(gb.make());
|
newGTs.add(gb.make());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -525,13 +523,18 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
* Assign genotypes (GTs) to the samples in the VariantContext greedily based on the PLs
|
||||||
*
|
*
|
||||||
|
* @param gb the GenotypeBuilder to modify
|
||||||
|
* @param vc the VariantContext
|
||||||
|
* @param sampleName the sample name
|
||||||
* @param newLikelihoods the PL array
|
* @param newLikelihoods the PL array
|
||||||
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
||||||
* @param numChromosomes Number of chromosomes per pool
|
* @param numChromosomes Number of chromosomes per pool
|
||||||
*/
|
*/
|
||||||
private void assignGenotype(final GenotypeBuilder gb,
|
private void assignGenotype(final GenotypeBuilder gb,
|
||||||
|
final VariantContext vc,
|
||||||
|
final String sampleName,
|
||||||
final double[] newLikelihoods,
|
final double[] newLikelihoods,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final int numChromosomes) {
|
final int numChromosomes) {
|
||||||
|
|
@ -544,9 +547,7 @@ public class IndependentAllelesExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
gb.alleles(alleleCounts.asAlleleList(allelesToUse));
|
gb.alleles(alleleCounts.asAlleleList(allelesToUse));
|
||||||
|
|
||||||
// remove PLs if necessary
|
removePLsIfMaxNumPLValuesExceeded(gb, vc, sampleName, newLikelihoods);
|
||||||
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
|
|
||||||
gb.noPL();
|
|
||||||
|
|
||||||
if ( numNewAltAlleles > 0 )
|
if ( numNewAltAlleles > 0 )
|
||||||
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||||
|
|
|
||||||
|
|
@ -55,30 +55,24 @@ import com.google.java.contract.Ensures;
|
||||||
import htsjdk.samtools.SAMFileWriter;
|
import htsjdk.samtools.SAMFileWriter;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||||
import htsjdk.variant.vcf.*;
|
import htsjdk.variant.vcf.VCFConstants;
|
||||||
|
import htsjdk.variant.vcf.VCFHeader;
|
||||||
|
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||||
|
import htsjdk.variant.vcf.VCFStandardHeaderLines;
|
||||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
||||||
|
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
|
||||||
import org.broadinstitute.gatk.engine.io.DirectOutputTracker;
|
import org.broadinstitute.gatk.engine.io.DirectOutputTracker;
|
||||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
|
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
|
|
||||||
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
|
|
||||||
import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils;
|
|
||||||
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
|
|
||||||
import org.broadinstitute.gatk.utils.genotyper.*;
|
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
|
||||||
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
|
import org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub;
|
||||||
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.engine.walkers.*;
|
import org.broadinstitute.gatk.engine.walkers.*;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardHCAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
|
||||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
||||||
|
|
@ -91,21 +85,33 @@ import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
|
||||||
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
|
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
|
||||||
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
|
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.gatk.utils.commandline.*;
|
import org.broadinstitute.gatk.utils.commandline.*;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
|
||||||
|
import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.fragments.FragmentCollection;
|
import org.broadinstitute.gatk.utils.fragments.FragmentCollection;
|
||||||
import org.broadinstitute.gatk.utils.fragments.FragmentUtils;
|
import org.broadinstitute.gatk.utils.fragments.FragmentUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.genotyper.*;
|
||||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||||
import org.broadinstitute.gatk.utils.gvcf.GVCFWriter;
|
import org.broadinstitute.gatk.utils.gvcf.GVCFWriter;
|
||||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||||
|
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.DroppedReadsTracker;
|
||||||
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
||||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||||
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
|
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.gatk.utils.variant.*;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
|
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||||
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
@ -1154,6 +1160,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
|
genotypingEngine.printFinalMaxNumPLValuesWarning();
|
||||||
if ( HCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it
|
if ( HCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) ((GVCFWriter)vcfWriter).close(false); // GROSS -- engine forces us to close our own VCF writer since we wrapped it
|
||||||
referenceConfidenceModel.close();
|
referenceConfidenceModel.close();
|
||||||
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
||||||
|
|
|
||||||
|
|
@ -51,13 +51,18 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||||
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -352,4 +357,60 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
|
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerMaxNumPLValues() {
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 70",
|
||||||
|
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7ea93210277fa4b590790a81c4b3994b"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testHaplotypeCallerMaxNumPLValues", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel() throws IOException {
|
||||||
|
// Need to see log WARN messages
|
||||||
|
final Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.WARN);
|
||||||
|
|
||||||
|
final File logFile = createTempFile("testMaxNumPLValuesExceededWithWarnLogLevel.log", ".tmp");
|
||||||
|
final String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s",
|
||||||
|
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals",
|
||||||
|
GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithWarnLogLevel", spec);
|
||||||
|
// Make sure the "Maximum allowed number of PLs exceeded" messages are in the log
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes. " +
|
||||||
|
"No PLs will be output for these genotypes (which may cause incorrect results in subsequent analyses) unless the --max_num_PL_values argument is increased accordingly. " +
|
||||||
|
"Unless the DEBUG logging level is used, this warning message is output just once per run and further warnings are suppressed."));
|
||||||
|
Assert.assertFalse(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes."));
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70."));
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel() throws IOException {
|
||||||
|
// Need to see log DEBUG messages
|
||||||
|
final Level level = logger.getLevel();
|
||||||
|
logger.setLevel(Level.DEBUG);
|
||||||
|
|
||||||
|
final File logFile = createTempFile("testMaxNumPLValuesExceededWithDebugLogLevel.log", ".tmp");
|
||||||
|
final String logFileName = logFile.getAbsolutePath();
|
||||||
|
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 4 -maxNumPLValues 30 -log %s",
|
||||||
|
b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals",
|
||||||
|
GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER, logFileName);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9d69cb9dc67e0d0ee9863767428e6841"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testHaplotypeCallerMaxNumPLValuesExceededWithDebugLogLevel", spec);
|
||||||
|
// Make sure the "Maximum allowed number of PLs exceeded" messages are in the log
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10097101-10097101 with 35 possible genotypes."));
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) exceeded for sample NA12878 at 20:10316239-10316241 with 70 possible genotypes."));
|
||||||
|
Assert.assertTrue(FileUtils.readFileToString(logFile).contains("Maximum allowed number of PLs (30) was exceeded 2 time(s); the largest number of PLs found was 70."));
|
||||||
|
// Set the log level back
|
||||||
|
logger.setLevel(level);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue