From c66ef17cd0e546f17eb296433b4b5d3b9c90c509 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Oct 2012 13:52:14 -0400 Subject: [PATCH] Add a separate max alt alleles argument for indels that defaults to 2 instead of 3. PLEASE TAKE NOTE. --- .../genotyper/GeneralPloidyExactAFCalculation.java | 2 +- .../arguments/StandardCallerArgumentCollection.java | 10 ++++++++++ .../walkers/genotyper/AlleleFrequencyCalculation.java | 8 ++++---- .../walkers/genotyper/DiploidExactAFCalculation.java | 4 ++-- .../gatk/walkers/genotyper/ExactAFCalculation.java | 4 ++-- .../genotyper/OptimizedDiploidExactAFCalculation.java | 4 ++-- .../walkers/genotyper/UnifiedArgumentCollection.java | 7 ++----- 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java index 903d553da..da3ed2a02 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java @@ -47,7 +47,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation { } public GeneralPloidyExactAFCalculation(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, false, null, null, null); + super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null); this.ploidy = ploidy; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index b2e1a12c6..085a60191 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -59,6 +59,16 @@ public class StandardCallerArgumentCollection { @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 3; + /** + * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), + * then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it + * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend + * that you not play around with this parameter. + */ + @Advanced + @Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "Maximum number of alternate alleles to genotype for indels only", required = false) + public int MAX_ALTERNATE_ALLELES_FOR_INDELS = 2; + @Hidden @Argument(shortName = "logExactCalls", doc="x", required=false) public File exactCallsLog = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java index 4189dbd6d..fc578a5bd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java @@ -63,7 +63,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable { protected int nSamples; protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE; - protected boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS; + protected int MAX_ALTERNATE_ALLELES_FOR_INDELS; protected Logger logger; protected PrintStream verboseWriter; @@ -74,12 +74,12 @@ public abstract class AlleleFrequencyCalculation implements Cloneable { private PrintStream callReport = null; protected AlleleFrequencyCalculation(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) { - this(nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.exactCallsLog, logger, verboseWriter); + this(nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.exactCallsLog, logger, verboseWriter); } protected AlleleFrequencyCalculation(final int nSamples, final int maxAltAlleles, - final boolean capMaxAltsForIndels, + final int maxAltAllelesForIndels, final File exactCallsLog, final Logger logger, final PrintStream verboseWriter) { @@ -88,7 +88,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable { this.nSamples = nSamples; this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = maxAltAlleles; - this.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = capMaxAltsForIndels; + this.MAX_ALTERNATE_ALLELES_FOR_INDELS = maxAltAllelesForIndels; this.logger = logger == null ? defaultLogger : logger; this.verboseWriter = verboseWriter; if ( exactCallsLog != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidExactAFCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidExactAFCalculation.java index 4e449a8bb..40a30b710 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidExactAFCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidExactAFCalculation.java @@ -38,7 +38,7 @@ public class DiploidExactAFCalculation extends ExactAFCalculation { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 public DiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles, false, null, null, null); + super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null); } /** @@ -62,7 +62,7 @@ public class DiploidExactAFCalculation extends ExactAFCalculation { @Override protected VariantContext reduceScope(final VariantContext vc) { - final int myMaxAltAllelesToGenotype = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS && vc.getType().equals(VariantContext.Type.INDEL) ? 2 : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; + final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? MAX_ALTERNATE_ALLELES_FOR_INDELS : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java index 2dea9e951..b70309ed5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java @@ -45,8 +45,8 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation { super(UAC, nSamples, logger, verboseWriter); } - protected ExactAFCalculation(final int nSamples, int maxAltAlleles, boolean capMaxAltsForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) { - super(nSamples, maxAltAlleles, capMaxAltsForIndels, exactCallsLog, logger, verboseWriter); + protected ExactAFCalculation(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OptimizedDiploidExactAFCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OptimizedDiploidExactAFCalculation.java index 2b3b517ce..71f0a675d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OptimizedDiploidExactAFCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OptimizedDiploidExactAFCalculation.java @@ -38,7 +38,7 @@ public class OptimizedDiploidExactAFCalculation extends ExactAFCalculation { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 public OptimizedDiploidExactAFCalculation(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles, false, null, null, null); + super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null); } /** @@ -62,7 +62,7 @@ public class OptimizedDiploidExactAFCalculation extends ExactAFCalculation { @Override protected VariantContext reduceScope(final VariantContext vc) { - final int myMaxAltAllelesToGenotype = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS && vc.getType().equals(VariantContext.Type.INDEL) ? 2 : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; + final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? MAX_ALTERNATE_ALLELES_FOR_INDELS : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 9b80d6266..842ec876a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -75,10 +75,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; - @Hidden - @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) - public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false; - // indel-related arguments /** * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site. @@ -211,7 +207,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; uac.alleles = alleles; uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES; - uac.CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS; + uac.MAX_ALTERNATE_ALLELES_FOR_INDELS = MAX_ALTERNATE_ALLELES_FOR_INDELS; uac.GLmodel = GLmodel; uac.TREAT_ALL_READS_AS_SINGLE_POOL = TREAT_ALL_READS_AS_SINGLE_POOL; uac.referenceSampleRod = referenceSampleRod; @@ -239,6 +235,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.GenotypingMode = SCAC.GenotypingMode; this.heterozygosity = SCAC.heterozygosity; this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES; + this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS; this.OutputMode = SCAC.OutputMode; this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING; this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;