diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 5559db77b..25db11101 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -47,10 +47,11 @@ package org.broadinstitute.sting.gatk.arguments; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.utils.collections.DefaultHashMap; -import org.broadinstitute.sting.utils.variant.HomoSapiens; +import org.broadinstitute.sting.utils.variant.HomoSapiensConstants; import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; @@ -98,16 +99,16 @@ public class StandardCallerArgumentCollection implements Cloneable { * which determines how many chromosomes each individual in the species carries. */ @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept", required = false) - public Double snpHeterozygosity = HomoSapiens.SNP_HETEROZYGOSITY; + public Double snpHeterozygosity = HomoSapiensConstants.SNP_HETEROZYGOSITY; /** * This argument informs the prior probability of having an indel at a site. */ @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept", required = false) - public double indelHeterozygosity = HomoSapiens.INDEL_HETEROZYGOSITY; + public double indelHeterozygosity = HomoSapiensConstants.INDEL_HETEROZYGOSITY; @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false) - public org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode genotypingMode = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode.DISCOVERY; + public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY; /** * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with @@ -215,7 +216,7 @@ public class StandardCallerArgumentCollection implements Cloneable { * Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy */ @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) - public int samplePloidy = HomoSapiens.DEFAULT_PLOIDY; + public int samplePloidy = HomoSapiensConstants.DEFAULT_PLOIDY; @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false) public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY; @@ -232,7 +233,6 @@ public class StandardCallerArgumentCollection implements Cloneable { @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false) public boolean annotateAllSitesWithPLs = false; - /** * Creates a Standard caller argument collection with default values. */ @@ -257,12 +257,7 @@ public class StandardCallerArgumentCollection implements Cloneable { if (!field.getDeclaringClass().isAssignableFrom(clazz)) continue; final int fieldModifiers = field.getModifiers(); - if (Modifier.isPrivate((fieldModifiers))) - continue; - if (Modifier.isFinal(fieldModifiers)) - continue; - if (Modifier.isStatic(fieldModifiers)) - continue; + if ((fieldModifiers & UNCOPYABLE_MODIFIER_MASK) != 0) continue; field.set(result,field.get(this)); } return result; @@ -283,4 +278,10 @@ public class StandardCallerArgumentCollection implements Cloneable { throw new IllegalStateException("unreachable code"); } } + + /** + * Holds a modifiers mask that identifies those fields that cannot be copied between + * StandardCallerArgumentCollections. + */ + private final int UNCOPYABLE_MODIFIER_MASK = Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java index c3412e2bc..cadc905dc 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java @@ -134,7 +134,7 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General final List alleles = new ArrayList(); if ( allAllelesToUse != null ) { alleles.addAll(allAllelesToUse); - } else if ( UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { + } else if ( UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), true, logger, UAC.alleles); // ignore places where we don't have a SNP diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index dc5220e6a..2b9953782 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -71,7 +71,7 @@ public abstract class GenotypeLikelihoodsCalculationModel { public static final String DUMMY_LANE = "Lane1"; public static final String DUMMY_SAMPLE_NAME = "DummySample1"; - public enum Name { + public enum Model { SNP, INDEL, GENERALPLOIDYSNP, diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingEngine.java index c0dcfd54e..ce616377d 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingEngine.java @@ -141,6 +141,7 @@ public abstract class GenotypingEngine stratifiedContexts, - final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model, + final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final boolean inheritAttributesFromInputVC, final Map perReadAlleleLikelihoodMap) { @@ -207,7 +212,7 @@ public abstract class GenotypingEngine -0.0 which isn't nice double log10Confidence = ! outputAlternativeAlleles.siteIsMonomorphic || - configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs + configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0; @@ -227,8 +232,9 @@ public abstract class GenotypingEngine= configuration.STANDARD_CONFIDENCE_FOR_CALLING || - (configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES + (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING); } @@ -358,7 +364,7 @@ public abstract class GenotypingEngine= configuration.STANDARD_CONFIDENCE_FOR_CALLING, false); } - protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Name model ) { + protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { switch (model) { case SNP: case GENERALPLOIDYSNP: @@ -488,16 +494,30 @@ public abstract class GenotypingEngine= 0", "theta >= 0.0 && theta <= 1.0"}) @Ensures("MathUtils.goodLog10Probability(result)") protected final double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double theta) { + if (theta <= 0 || theta >= 1) throw new IllegalArgumentException("theta must be greater than 0 and less than 1"); final double log10PofNonRef = Math.log10(theta / 2.0) + getRefBinomialProbLog10(depth); return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef)); } + /** + * Calculates the hom-reference binomial log10 probability given the depth. + * + * @param depth the query depth. + * + * @throws IllegalArgumentException if {@code depth} is less than 0. + * + * @return a valid log10 probability between 0 and {@link Double#NEGATIVE_INFINITY}. + */ protected final double getRefBinomialProbLog10(final int depth) { + if (depth < 0) + throw new IllegalArgumentException("depth cannot be less than 0"); return MathUtils.log10BinomialProbability(depth, 0); } @@ -601,37 +621,39 @@ public abstract class GenotypingEngine composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, final AlignmentContext rawContext, final Map stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List alleleCountsofMLE, final boolean bestGuessIsRef, final AFCalcResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, - final GenotypeLikelihoodsCalculationModel.Name model, final Map perReadAlleleLikelihoodMap) { + final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { final HashMap attributes = new HashMap<>(); - final boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; - - // inherit attributed from input vc if requested + // inherit attributes from input vc if requested if (inheritAttributesFromInputVC) attributes.putAll(vc.getAttributes()); - // if the site was downsampled, record that fact + // if the site was down-sampled, record that fact if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); - // add the MLE AC and AF annotations if ( alleleCountsofMLE.size() > 0 ) { attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); - int AN = 0; - for (final Genotype g : genotypes) { - for (final Allele a : g.getAlleles()) - if (!a.isNoCall()) AN++; - } - final ArrayList MLEfrequencies = new ArrayList(alleleCountsofMLE.size()); - // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) - for ( int AC : alleleCountsofMLE ) - MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN)); + final ArrayList MLEfrequencies = calculateMLEAlleleFrequencies(alleleCountsofMLE, genotypes); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } return attributes; } + private ArrayList calculateMLEAlleleFrequencies(List alleleCountsofMLE, GenotypesContext genotypes) { + int AN = 0; + for (final Genotype g : genotypes) + for (final Allele a : g.getAlleles()) + if (!a.isNoCall()) AN++; + + final ArrayList MLEfrequencies = new ArrayList(alleleCountsofMLE.size()); + // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) + for (final int AC : alleleCountsofMLE ) + MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN)); + return MLEfrequencies; + } + } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingMode.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingOutputMode.java similarity index 99% rename from protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingMode.java rename to protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingOutputMode.java index f6fc87c5a..f8860ac5a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingMode.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypingOutputMode.java @@ -54,7 +54,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -public enum GenotypingMode { +public enum GenotypingOutputMode { /** * The genotyper will choose the most likely alternate allele diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 4a0bf371e..8fe7cc10a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -215,7 +215,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final boolean ignoreSNPAllelesWhenGenotypingIndels) { List alleles = new ArrayList(); - if (UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES) { + if (UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) { VariantContext vc = null; for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { if (vc_input != null && diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 5ec3ac494..91679fe6f 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -77,7 +77,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); - useAlleleFromVCF = UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES; + useAlleleFromVCF = UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 64960200a..bdfaae22b 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -54,7 +54,7 @@ import org.broadinstitute.variant.variantcontext.VariantContext; public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) - public GenotypeLikelihoodsCalculationModel.Name GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP; + public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; /** * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily @@ -187,14 +187,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(shortName = "min_call_power", fullName = "min_power_threshold_for_calling", doc="The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false) double minPower = 0.95; - @Hidden - @Argument(shortName = "min_depth", fullName = "min_reference_depth", doc="The minimum depth required in the reference sample in order to make a call.", required = false) - int minReferenceDepth = 100; - - @Hidden - @Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false) - boolean EXCLUDE_FILTERED_REFERENCE_SITES = false; - /** * Create a new UAC with defaults for all UAC arguments */ diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 227fd3f20..2bdeff5db 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -274,8 +274,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif // warn the user for misusing EMIT_ALL_SITES if ( UAC.outputMode == OutputMode.EMIT_ALL_SITES && - UAC.genotypingMode == GenotypingMode.DISCOVERY && - UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Name.SNP ) + UAC.genotypingOutputMode == GenotypingOutputMode.DISCOVERY && + UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP ) logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode"); // initialize the verbose writer diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotypingEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotypingEngine.java index abf3721f3..ba7b76e8e 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotypingEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotypingEngine.java @@ -58,10 +58,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.variantcontext.VariantContext; @@ -86,7 +86,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine> glcm; - private final List modelsToUse = new ArrayList<>(2); + private final List modelsToUse = new ArrayList<>(2); // the various loggers and writers @@ -129,23 +129,25 @@ public class UnifiedGenotypingEngine extends GenotypingEngine sampleNames, final PrintStream verboseWriter) { + super(toolkit,configuration,annotationEngine,sampleNames); + this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; genomeLocParser = toolkit.getGenomeLocParser(); - // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ this.verboseWriter = verboseWriter; determineGLModelsToUse(); - // do argument checking - if (configuration.annotateAllSitesWithPLs) { - if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Name.SNP)) - throw new IllegalArgumentException("Invalid genotype likelihood model specification: " + - "only diploid SNP model can be used in conjunction with option allSitePLs"); - } + initializeGenotypeLikelihoodsCalculationModels(); + } + + /** + * Initialize {@link #glcm}. + */ + private void initializeGenotypeLikelihoodsCalculationModels() { + glcm = new ThreadLocal>() { - glcm = new ThreadLocal>() { @Override protected Map initialValue() { return getGenotypeLikelihoodsCalculationObject(logger,UnifiedGenotypingEngine.this.configuration); @@ -166,20 +168,23 @@ public class UnifiedGenotypingEngine extends GenotypingEngine results = new ArrayList<>(2); - final List models = getGLModelsToUse(tracker, rawContext); + final List models = getGLModelsToUse(tracker, rawContext); final Map perReadAlleleLikelihoodMap = new HashMap<>(); - if ( models.isEmpty() ) { - results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null); - } + final VariantCallContext defaultResult = configuration.outputMode == OutputMode.EMIT_ALL_SITES + && configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + ? emptyCallContext(tracker, refContext, rawContext) + : null; + + if ( models.isEmpty() ) + results.add(defaultResult); else { - for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) { + for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) { perReadAlleleLikelihoodMap.clear(); final Map stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); - if ( stratifiedContexts == null ) { - results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null); - } + if ( stratifiedContexts == null ) + results.add(defaultResult); else { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) @@ -195,6 +200,28 @@ public class UnifiedGenotypingEngine extends GenotypingEngine getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { + + final Map glcm = new HashMap<>(); + final List> glmClasses = new PluginManager(GenotypeLikelihoodsCalculationModel.class).getPlugins(); + + for (final Class glmClass : glmClasses) { + final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); + try { + final Object args[] = new Object[]{UAC,logger}; + final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); + glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); + } + catch (Exception e) { + throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage()); + } + } + + return glcm; + } + /** * Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods. * @@ -208,12 +235,12 @@ public class UnifiedGenotypingEngine extends GenotypingEngine perReadAlleleLikelihoodMap) { - final List models = getGLModelsToUse(tracker, rawContext); + final List models = getGLModelsToUse(tracker, rawContext); if ( models.isEmpty() ) { return null; } - for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) { + for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) { final Map stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); // return the first valid one we encounter if ( stratifiedContexts != null ) @@ -237,13 +264,13 @@ public class UnifiedGenotypingEngine extends GenotypingEngine models = getGLModelsToUse(tracker, rawContext); + final List models = getGLModelsToUse(tracker, rawContext); if ( models.isEmpty() ) { return null; } // return the first one - final GenotypeLikelihoodsCalculationModel.Name model = models.get(0); + final GenotypeLikelihoodsCalculationModel.Model model = models.get(0); final Map stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null); } @@ -255,7 +282,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine alternateAllelesToUse, final boolean useBAQedPileup, - final GenotypeLikelihoodsCalculationModel.Name model, + final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); } - public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model) { + public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { return calculateGenotypes(null, null, null, null, vc, model, null); } @@ -288,20 +315,20 @@ public class UnifiedGenotypingEngine extends GenotypingEngine stratifiedContexts, final VariantContext vc, - final GenotypeLikelihoodsCalculationModel.Name model, + final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false, perReadAlleleLikelihoodMap); } @Override protected boolean forceKeepAllele(final Allele allele) { - return configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs; + return configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs; } @Override public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, Map stratifiedContexts, - final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model, + final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final boolean inheritAttributesFromInputVC, final Map perReadAlleleLikelihoodMap) { boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; @@ -326,7 +353,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, final AlignmentContext rawContext, final Map stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List alleleCountsofMLE, final boolean bestGuessIsRef, final AFCalcResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, - final GenotypeLikelihoodsCalculationModel.Name model, final Map perReadAlleleLikelihoodMap) { + final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { final Map result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef, AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap); @@ -336,45 +363,62 @@ public class UnifiedGenotypingEngine extends GenotypingEngine getFilteredAndStratifiedContexts(ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Name model) { + private double calculateSLOD(final Map stratifiedContexts, + final RefMetaDataTracker tracker, + final ReferenceContext refContext, final AFCalcResult AFresult, + final List allAllelesToUse, + final GenotypeLikelihoodsCalculationModel.Model model, + final Map perReadAlleleLikelihoodMap) { + // the overall lod + //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; + final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); + //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); + + // the forward lod + final AFCalcResult forwardAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.FORWARD,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap); + final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0(); + final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0(); + //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); + + // the reverse lod + final AFCalcResult reverseAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.REVERSE,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap); + final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0(); + final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0(); + //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); + + final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; + final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF; + //if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + + // strand score is max bias between forward and reverse strands + double strandScore = Math.max(forwardLod, reverseLod); + // rescale by a factor of 10 + strandScore *= 10.0; + //logger.debug(String.format("SLOD=%f", strandScore)); + return strandScore; + } + + private AFCalcResult getDirectionalAfCalcResult(final AlignmentContextUtils.ReadOrientation orientation, + final Map stratifiedContexts, + final RefMetaDataTracker tracker, + final ReferenceContext refContext, List allAllelesToUse, + final GenotypeLikelihoodsCalculationModel.Model model, + final Map perReadAlleleLikelihoodMap) { + final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation, + allAllelesToUse, false, model, perReadAlleleLikelihoodMap); + return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); + } + + private Map getFilteredAndStratifiedContexts(final ReferenceContext refContext, + final AlignmentContext rawContext, + final GenotypeLikelihoodsCalculationModel.Model model) { if ( !BaseUtils.isRegularBase(refContext.getBase()) ) return null; @@ -394,7 +438,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine + * Whether to use the general ones or the concrete diploid ones need to depend on what the user has said + * in its parameter glm. Iff he selected GENERALPLOIDYINDEL or GENERALPLOIDYSNP is the general set, otherwise + *

+ * + * the standard set (SNP and INDEL). + *

+ * Even if the user did not select to use both models, GGA force the inclusion of both: snp and indels. + *

+ *

+ * Also, we must fail + *

+ * + * The models are added to the field {@link #modelsToUse}. + */ private void determineGLModelsToUse() { - String modelPrefix = ""; - if ( !configuration.GLmodel.name().contains(GPSTRING) && configuration.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY ) - modelPrefix = GPSTRING; - // GGA mode => must initialize both the SNP and indel models - if ( configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || - configuration.GLmodel.name().toUpperCase().contains("BOTH") ) { - modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "SNP")); - modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "INDEL")); - } - else { - modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + configuration.GLmodel.name().toUpperCase())); + modelsToUse.clear(); + + boolean useGeneral = false; + boolean useSNP = false; + boolean useINDEL = false; + + switch (configuration.GLmodel) { + case BOTH: + useSNP = useINDEL = true; break; + case SNP: + useSNP = true; break; + case INDEL: + useINDEL = true; break; + case GENERALPLOIDYINDEL: + useINDEL = useGeneral = true; break; + case GENERALPLOIDYSNP: + useSNP = useGeneral = true; break; + default: //Paranoia + throw new IllegalStateException("unexpected genotype likelihood model " + configuration.GLmodel); } + + // Force the use of both models in GGA: + if (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) + useSNP = useINDEL = true; + + // If annotateAllSitesWithPLs requested , SNP model must be used. + if (!useSNP && configuration.annotateAllSitesWithPLs) + throw new UserException.BadArgumentValue("glm","Invalid genotype likelihood model specification: " + + "only diploid SNP model can be used in conjunction with option allSitePLs"); + + // Finally add the relevant model + if (useSNP) + modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYSNP : + GenotypeLikelihoodsCalculationModel.Model.SNP); + if (useINDEL) + modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYINDEL : + GenotypeLikelihoodsCalculationModel.Model.INDEL); } // decide whether we are currently processing SNPs, indels, neither, or both - private List getGLModelsToUse(final RefMetaDataTracker tracker, + private List getGLModelsToUse(final RefMetaDataTracker tracker, final AlignmentContext rawContext) { - if ( configuration.genotypingMode != GenotypingMode.GENOTYPE_GIVEN_ALLELES ) + if ( configuration.genotypingOutputMode != GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) return modelsToUse; if ( modelsToUse.size() != 2 ) @@ -480,24 +567,5 @@ public class UnifiedGenotypingEngine extends GenotypingEngine getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { - - final Map glcm = new HashMap<>(); - final List> glmClasses = new PluginManager(GenotypeLikelihoodsCalculationModel.class).getPlugins(); - - for (final Class glmClass : glmClasses) { - final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); - try { - final Object args[] = new Object[]{UAC,logger}; - final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); - glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); - } - catch (Exception e) { - throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage()); - } - } - - return glcm; - } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 45a2ffda9..3e3b76d71 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -130,6 +130,7 @@ import java.util.*; *

Caveats

*
    *
  • The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.
  • + *
  • Currently the -ploidy parameter only support the default 2 (diploid). Eventually one will be able to change its value for haploid and polyploid analyses.
  • *
* * @author rpoplin @@ -532,7 +533,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if ( emitReferenceConfidence() ) { - if (SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES) + if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time"); SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; @@ -562,7 +563,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class); simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY; - simpleUAC.genotypingMode = GenotypingMode.DISCOVERY; + simpleUAC.genotypingOutputMode = GenotypingOutputMode.DISCOVERY; simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.CONTAMINATION_FRACTION = 0.0; @@ -574,7 +575,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if( SCAC.CONTAMINATION_FRACTION_FILE != null ) SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger)); - if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES && consensusMode ) + if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode ) throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); // initialize the output VCF header @@ -653,7 +654,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } trimmer.initialize(getToolkit().getGenomeLocParser(), SCAC.DEBUG, - SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence()); + SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence()); } private void initializeReferenceConfidenceModel(final Set samples, final Set headerInfo) { @@ -723,7 +724,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { - if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { + if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), false, logger, SCAC.alleles); if( vcFromAllelesRod != null ) { return new ActivityProfileState(ref.getLocus(), 1.0); @@ -748,7 +749,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } final List alleles = Arrays.asList(FAKE_REF_ALLELE , FAKE_ALT_ALLELE); - final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Name.SNP); + final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.SNP); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() ); @@ -772,7 +773,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In return referenceModelForNoVariation(originalActiveRegion, true); final List givenAlleles = new ArrayList<>(); - if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { + if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { for ( final VariantContext vc : metaDataTracker.getValues(SCAC.alleles) ) { if ( vc.isNotFiltered() ) { givenAlleles.add(vc); // do something with these VCs during GGA mode diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 8daf9c3df..fa743813a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -54,7 +54,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -96,13 +96,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine alleleList = new ArrayList<>(); diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java index 6c49fe076..50687378c 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java @@ -339,7 +339,7 @@ public class GenotypeAndValidate extends RodWalker= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; if (deletions >= 0) @@ -349,12 +349,12 @@ public class GenotypeAndValidate extends RodWalker= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; - uac.GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP; + uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac); // Adding the INDEL calling arguments for UG UnifiedArgumentCollection uac_indel = uac.clone(); - uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Name.INDEL; + uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel); // make sure we have callConf set to the threshold set by the UAC so we can use it later. diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java index 7a7addab3..6f6d15806 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java @@ -59,7 +59,7 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.sting.utils.variant.HomoSapiens; +import org.broadinstitute.sting.utils.variant.HomoSapiensConstants; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; @@ -170,7 +170,7 @@ public class CalculateGenotypePosteriors extends RodWalker { * */ @Argument(fullName="globalPrior",shortName="G",doc="The global Dirichlet prior parameters for the allele frequency",required=false) - public double globalPrior = HomoSapiens.SNP_HETEROZYGOSITY; + public double globalPrior = HomoSapiensConstants.SNP_HETEROZYGOSITY; /** * When a variant is not seen in a panel, whether to infer (and with what effective strength) that only reference diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java index 10a37d3db..2cb12878b 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java @@ -115,9 +115,9 @@ public class RegenotypeVariants extends RodWalker implements T public void initialize() { final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); - UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Name.BOTH; + UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.outputMode = OutputMode.EMIT_ALL_SITES; - UAC.genotypingMode = GenotypingMode.GENOTYPE_GIVEN_ALLELES; + UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; String trackName = variantCollection.variants.getName(); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java index 0fa277b8b..8edd872ee 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; import java.util.Comparator; +import java.util.Random; /** * BaseUtils contains some basic utilities for manipulating nucleotides. @@ -553,6 +554,62 @@ public class BaseUtils { return getRandomBaseIndex(-1); } + /** + * Return random bases. + * + * @param length base count and length of returned array. + * + * @throws IllegalArgumentException if {@code length} is less than 0. + * + * @return never {@code null} + */ + @SuppressWarnings("unused") + public static byte[] getRandomBases(final int length) { + if (length < 0) + throw new IllegalArgumentException("length must zero or greater"); + final byte[] result = new byte[length]; + fillWithRandomBases(result); + return result; + } + + /** + * Fills an array with random bases. + * + * @param dest the array to fill. + * + * @throws IllegalArgumentException if {@code result} is {@code null}. + */ + public static void fillWithRandomBases(final byte[] dest) { + fillWithRandomBases(dest,0,dest.length); + } + + /** + * Fill an array section with random bases. + * + * @param dest array to fill. + * @param fromIndex first index to be filled (inclusive). + * @param toIndex index after last to be filled (exclusive). + * + * @throws IllegalArgumentException if {@code dest} is {@code null}, + * {@code fromIndex} or {@code toIndex} is negative, + * {@code fromIndex} or {@code toIndex} are greater than {@code dest} length, + * or {@code fromIndex} greater than {@code toIndex}. + */ + public static void fillWithRandomBases(final byte[] dest, final int fromIndex, final int toIndex) { + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + if (dest == null) + throw new IllegalArgumentException("the dest array cannot be null"); + if (fromIndex > toIndex) + throw new IllegalArgumentException("fromIndex cannot be larger than toIndex"); + if (fromIndex < 0) + throw new IllegalArgumentException("both indexes must be positive"); + if (toIndex > dest.length) + throw new IllegalArgumentException("both indexes must be less or equal to the destination array length"); + + for (int i = fromIndex; i < toIndex; i++) + dest[i] = baseIndexToSimpleBase(rnd.nextInt(4)); + } + /** * Return a random base index, excluding some base index. * diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index d72e1f67d..57f7b4175 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -44,7 +44,7 @@ public class GATKVariantContextUtils { private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class); - public static final int DEFAULT_PLOIDY = HomoSapiens.DEFAULT_PLOIDY; + public static final int DEFAULT_PLOIDY = HomoSapiensConstants.DEFAULT_PLOIDY; public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiens.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiensConstants.java similarity index 91% rename from public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiens.java rename to public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiensConstants.java index 816c06f35..4418ca104 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiens.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/HomoSapiensConstants.java @@ -28,11 +28,11 @@ package org.broadinstitute.sting.utils.variant; /** * Homo sapiens genome constants. * - *

NOTE: reference to this constants is a indication that your code is (human) species assumption dependant.

+ *

NOTE: reference to these constants is an indication that your code is (human) species assumption dependant.

* * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -public class HomoSapiens { +public class HomoSapiensConstants { /** * Standard heterozygous rate for SNP variation. diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index f8b7124f5..0e0e52dba 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -52,9 +52,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; - - - @BeforeSuite public void setup() throws IOException { // alleles @@ -1765,28 +1762,29 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(GATKVariantContextUtils.overlapsRegion(vc, genomeLoc), expected); } + + private final String[] OVERLAP_WITH_CHROMOSOMES = { "chr1", "chr20" }; + private final int[] OVERLAP_WITH_EVENT_SIZES = { -10, -1, 0, 1, 10 }; // 0 == SNP , -X xbp deletion, +X xbp insertion. + private final int[] OVERLAP_WITH_EVENT_STARTS = { 10000000, 10000001, + 10000005, 10000010, + 10000009, 10000011, + 20000000 }; + @DataProvider(name="overlapWithData") public Object[][] overlapWithData() { - final String[] chrs = new String[] { "chr1", "chr20" }; - final int[] eventSizes = new int[] { -10 , -1 , 0 , 1 , 10 }; // 0 == SNP. - final int[] eventStarts = new int[] { 10000000, 10000001, 10000005, 10000010, 10000009, 10000011, 20000000}; - final int totalLocations = chrs.length * eventSizes.length * eventStarts.length + 1; - final int totalEvents = chrs.length * eventSizes.length * eventStarts.length; + final int totalLocations = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length + 1; + final int totalEvents = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length; final GenomeLoc[] locs = new GenomeLoc[totalLocations]; final VariantContext[] events = new VariantContext[totalEvents]; - int nextIndex = 0; - for (final String chr : chrs ) - for (final int size : eventSizes ) - for (final int starts : eventStarts ) { - locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size)); - events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList( - Allele.create(randomBases(size <= 0 ? 1 : size + 1,true),true),Allele.create(randomBases(size < 0 ? - size + 1 : 1,false),false))).make(); - } + generateAllLocationsAndVariantContextCombinations(OVERLAP_WITH_CHROMOSOMES, OVERLAP_WITH_EVENT_SIZES, + OVERLAP_WITH_EVENT_STARTS, locs, events); - locs[nextIndex++] = GenomeLoc.UNMAPPED; + return generateAllParameterCombinationsForOverlapWithData(locs, events); + } + private Object[][] generateAllParameterCombinationsForOverlapWithData(GenomeLoc[] locs, VariantContext[] events) { final List result = new LinkedList<>(); for (final GenomeLoc loc : locs) for (final VariantContext event : events) @@ -1795,17 +1793,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { return result.toArray(new Object[result.size()][]); } + private void generateAllLocationsAndVariantContextCombinations(final String[] chrs, final int[] eventSizes, + final int[] eventStarts, final GenomeLoc[] locs, + final VariantContext[] events) { + int nextIndex = 0; + for (final String chr : chrs ) + for (final int size : eventSizes ) + for (final int starts : eventStarts ) { + locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size)); + events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList( + Allele.create(randomBases(size <= 0 ? 1 : size + 1, true), true), Allele.create(randomBases(size < 0 ? -size + 1 : 1, false), false))).make(); + } + + locs[nextIndex++] = GenomeLoc.UNMAPPED; + } + private byte[] randomBases(final int length, final boolean reference) { final byte[] bases = new byte[length]; bases[0] = (byte) (reference ? 'A' : 'C'); - for (int i = 1; i < bases.length; i++) - switch (GenomeAnalysisEngine.getRandomGenerator().nextInt(4)) { - case 0: bases[i] = 'A'; break; - case 1: bases[i] = 'C'; break; - case 2: bases[i] = 'G'; break; - default: - bases[i] = 'T'; - } + BaseUtils.fillWithRandomBases(bases, 1, bases.length); return bases; } }