From 2c624f76c83f8cfec61001f89928ebe39ab8d713 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 20 Oct 2012 20:35:54 -0400 Subject: [PATCH] Refactoring the Unified (and Standard) Argument Collections because it was really ugly that the subclass had to do all the cloning for the super class. The clone() method is really not recommended best practice in Java anyways, so I changed it so that we use standard overloaded constructors. Confirmed that the Haplotype Caller --help docs do not include UG-specific arguments. --- .../haplotypecaller/HaplotypeCaller.java | 4 +- .../StandardCallerArgumentCollection.java | 26 ++++++ .../genotyper/UnifiedArgumentCollection.java | 90 +++++++------------ 3 files changed, 62 insertions(+), 58 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 5f2b5775c..6d6351fc5 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -241,14 +241,14 @@ public class HaplotypeCaller extends ActiveRegionWalker implem samplesList.addAll( samples ); // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested - UnifiedArgumentCollection simpleUAC = UAC.clone(); + UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC); simpleUAC.exactCallsLog = null; UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 085a60191..9b9f04228 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -69,7 +69,33 @@ public class StandardCallerArgumentCollection { @Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "Maximum number of alternate alleles to genotype for indels only", required = false) public int MAX_ALTERNATE_ALLELES_FOR_INDELS = 2; + /** + * If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads. + * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we + * will try to remove (N * contamination fraction) bases for each alternate allele. + */ + @Hidden + @Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) + public double CONTAMINATION_PERCENTAGE = 0.0; + @Hidden @Argument(shortName = "logExactCalls", doc="x", required=false) public File exactCallsLog = null; + + + public StandardCallerArgumentCollection() { } + + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! + public StandardCallerArgumentCollection(final StandardCallerArgumentCollection SCAC) { + this.alleles = SCAC.alleles; + this.GenotypingMode = SCAC.GenotypingMode; + this.heterozygosity = SCAC.heterozygosity; + this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES; + this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS; + this.OutputMode = SCAC.OutputMode; + this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING; + this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING; + this.CONTAMINATION_PERCENTAGE = SCAC.CONTAMINATION_PERCENTAGE; + this.exactCallsLog = SCAC.exactCallsLog; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 3eda2017c..17137c5e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -186,63 +186,41 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false) boolean EXCLUDE_FILTERED_REFERENCE_SITES = false; - // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! - public UnifiedArgumentCollection clone() { - UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); - - uac.GLmodel = GLmodel; - uac.AFmodel = AFmodel; - uac.heterozygosity = heterozygosity; - uac.PCR_error = PCR_error; - uac.GenotypingMode = GenotypingMode; - uac.OutputMode = OutputMode; - uac.NO_SLOD = NO_SLOD; - uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; - uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; - uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; - uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE; - uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION; - uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING; - uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE; - uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY; - uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY; - uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY; - uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; - uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; - uac.alleles = alleles; - uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES; - uac.MAX_ALTERNATE_ALLELES_FOR_INDELS = MAX_ALTERNATE_ALLELES_FOR_INDELS; - uac.GLmodel = GLmodel; - uac.TREAT_ALL_READS_AS_SINGLE_POOL = TREAT_ALL_READS_AS_SINGLE_POOL; - uac.referenceSampleRod = referenceSampleRod; - uac.referenceSampleName = referenceSampleName; - uac.samplePloidy = samplePloidy; - uac.maxQualityScore = minQualityScore; - uac.phredScaledPrior = phredScaledPrior; - uac.minPower = minPower; - uac.minReferenceDepth = minReferenceDepth; - uac.EXCLUDE_FILTERED_REFERENCE_SITES = EXCLUDE_FILTERED_REFERENCE_SITES; - uac.IGNORE_LANE_INFO = IGNORE_LANE_INFO; - uac.exactCallsLog = exactCallsLog; - uac.pairHMM = pairHMM; - - // todo- arguments to remove - uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; - return uac; - } - public UnifiedArgumentCollection() { } - public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) { - super(); - this.alleles = SCAC.alleles; - this.GenotypingMode = SCAC.GenotypingMode; - this.heterozygosity = SCAC.heterozygosity; - this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES; - this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS; - this.OutputMode = SCAC.OutputMode; - this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING; - this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING; - this.exactCallsLog = SCAC.exactCallsLog; + public UnifiedArgumentCollection(final StandardCallerArgumentCollection SCAC) { + super(SCAC); + } + + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! + public UnifiedArgumentCollection(final UnifiedArgumentCollection uac) { + this.GLmodel = uac.GLmodel; + this.AFmodel = uac.AFmodel; + this.PCR_error = uac.PCR_error; + this.NO_SLOD = uac.NO_SLOD; + this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; + this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE; + this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION; + this.MIN_INDEL_COUNT_FOR_GENOTYPING = uac.MIN_INDEL_COUNT_FOR_GENOTYPING; + this.MIN_INDEL_FRACTION_PER_SAMPLE = uac.MIN_INDEL_FRACTION_PER_SAMPLE; + this.INDEL_HETEROZYGOSITY = uac.INDEL_HETEROZYGOSITY; + this.INDEL_GAP_OPEN_PENALTY = uac.INDEL_GAP_OPEN_PENALTY; + this.INDEL_GAP_CONTINUATION_PENALTY = uac.INDEL_GAP_CONTINUATION_PENALTY; + this.OUTPUT_DEBUG_INDEL_INFO = uac.OUTPUT_DEBUG_INDEL_INFO; + this.INDEL_HAPLOTYPE_SIZE = uac.INDEL_HAPLOTYPE_SIZE; + this.TREAT_ALL_READS_AS_SINGLE_POOL = uac.TREAT_ALL_READS_AS_SINGLE_POOL; + this.referenceSampleRod = uac.referenceSampleRod; + this.referenceSampleName = uac.referenceSampleName; + this.samplePloidy = uac.samplePloidy; + this.maxQualityScore = uac.minQualityScore; + this.phredScaledPrior = uac.phredScaledPrior; + this.minPower = uac.minPower; + this.minReferenceDepth = uac.minReferenceDepth; + this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES; + this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO; + this.pairHMM = uac.pairHMM; + + // todo- arguments to remove + this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES; } }