Pulling out common caller arguments into its own StandardCallerArgumentCollection base class so that every caller isn't exposed to the unused arguments from every other caller.
This commit is contained in:
parent
c67d708c51
commit
464d49509a
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
|
@ -189,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
protected String[] annotationClassesToUse = { "Standard" };
|
protected String[] annotationClassesToUse = { "Standard" };
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
|
||||||
|
|
||||||
// the calculation arguments
|
// the calculation arguments
|
||||||
private UnifiedGenotyperEngine UG_engine = null;
|
private UnifiedGenotyperEngine UG_engine = null;
|
||||||
|
|
@ -240,7 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||||
samplesList.addAll( samples );
|
samplesList.addAll( samples );
|
||||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
|
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Advanced;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 8/20/12
|
||||||
|
* A collection of arguments that are common to the various callers.
|
||||||
|
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class StandardCallerArgumentCollection {
|
||||||
|
/**
|
||||||
|
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
||||||
|
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
|
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
||||||
|
|
||||||
|
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||||
|
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||||
|
|
||||||
|
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||||
|
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||||
|
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
||||||
|
* is the default).
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
|
||||||
|
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This argument allows you to emit low quality calls as filtered records.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
|
||||||
|
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||||
|
*/
|
||||||
|
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
||||||
|
public RodBinding<VariantContext> alleles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||||
|
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||||
|
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||||
|
* that you not play around with this parameter.
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
|
public int MAX_ALTERNATE_ALLELES = 3;
|
||||||
|
}
|
||||||
|
|
@ -26,11 +26,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
|
||||||
public class UnifiedArgumentCollection {
|
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||||
|
|
||||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
||||||
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||||
|
|
@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||||
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||||
|
|
||||||
/**
|
|
||||||
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
|
||||||
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
|
||||||
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||||
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
|
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
|
||||||
|
|
@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||||
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||||
|
|
||||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
|
||||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
|
||||||
|
|
||||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
|
||||||
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
|
||||||
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
|
||||||
* is the default).
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
|
|
||||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This argument allows you to emit low quality calls as filtered records.
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
|
|
||||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Note that calculating the SLOD increases the runtime by an appreciable amount.
|
* Note that calculating the SLOD increases the runtime by an appreciable amount.
|
||||||
*/
|
*/
|
||||||
|
|
@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
|
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
|
||||||
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
|
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
|
||||||
|
|
||||||
/**
|
|
||||||
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
|
||||||
*/
|
|
||||||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
|
||||||
public RodBinding<VariantContext> alleles;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
||||||
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
|
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
|
||||||
|
|
@ -107,16 +75,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
||||||
public Double MAX_DELETION_FRACTION = 0.05;
|
public Double MAX_DELETION_FRACTION = 0.05;
|
||||||
|
|
||||||
/**
|
|
||||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
|
||||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
|
||||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
|
||||||
* that you not play around with this parameter.
|
|
||||||
*/
|
|
||||||
@Advanced
|
|
||||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
|
||||||
public int MAX_ALTERNATE_ALLELES = 3;
|
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
|
@Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
|
||||||
public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
|
public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;
|
||||||
|
|
@ -139,7 +97,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
||||||
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument informs the prior probability of having an indel at a site.
|
* This argument informs the prior probability of having an indel at a site.
|
||||||
*/
|
*/
|
||||||
|
|
@ -274,5 +231,16 @@ public class UnifiedArgumentCollection {
|
||||||
return uac;
|
return uac;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public UnifiedArgumentCollection() { }
|
||||||
|
|
||||||
|
public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
|
||||||
|
super();
|
||||||
|
this.alleles = SCAC.alleles;
|
||||||
|
this.GenotypingMode = SCAC.GenotypingMode;
|
||||||
|
this.heterozygosity = SCAC.heterozygosity;
|
||||||
|
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
|
||||||
|
this.OutputMode = SCAC.OutputMode;
|
||||||
|
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||||
|
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue