diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java index 3f7794b24..731895b70 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java @@ -46,10 +46,10 @@ package org.broadinstitute.gatk.engine.arguments; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode; import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.gatk.utils.collections.DefaultHashMap; import htsjdk.variant.variantcontext.VariantContext; @@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable { */ @Hidden @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) - public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel; + public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel; @Hidden @Argument(shortName = "logExactCalls", doc="x", required=false) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java index ade6de0d6..b32e38ce2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java @@ -64,32 +64,23 @@ import java.util.List; public abstract class AFCalc implements Cloneable { private final static Logger defaultLogger = Logger.getLogger(AFCalc.class); - protected final int nSamples; - protected final int maxAlternateAllelesToGenotype; protected Logger logger = defaultLogger; private SimpleTimer callTimer = new SimpleTimer(); - private final StateTracker stateTracker; + private StateTracker stateTracker; private ExactCallLogger exactCallLogger = null; /** * Create a new AFCalc object capable of calculating the prob. that alleles are - * segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels - * for indels for samples with ploidy + * segregating among many samples. * - * @param nSamples number of samples, must be > 0 - * @param maxAltAlleles maxAltAlleles for SNPs - * @param ploidy the ploidy, must be > 0 + *

+ * Restrictions in ploidy and number of alternative alleles that a instance can handle will be determined + * by its implementation class {@link AFCalculatorImplementation} + *

*/ - protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); - if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); - if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy); - - this.nSamples = nSamples; - this.maxAlternateAllelesToGenotype = maxAltAlleles; - this.stateTracker = new StateTracker(maxAltAlleles); + protected AFCalc() { } /** @@ -118,19 +109,18 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i) * @return result (for programming convenience) */ - public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { + public AFCalcResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) { if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); - if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null"); // reset the result, so we can store our new result there - stateTracker.reset(); + final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles); - final VariantContext vcWorking = reduceScope(vc); + final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles); callTimer.start(); - final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors); + final AFCalcResult result = computeLog10PNonRef(vcWorking, defaultPloidy, log10AlleleFrequencyPriors, stateTracker); final long nanoTime = callTimer.getElapsedTimeNano(); if ( exactCallLogger != null ) @@ -150,7 +140,7 @@ public abstract class AFCalc implements Cloneable { */ @Requires("stateTracker.getnEvaluations() >= 0") @Ensures("result != null") - protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) { + protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors); } @@ -174,7 +164,7 @@ public abstract class AFCalc implements Cloneable { */ @Requires({"vc != null", "vc.getNAlleles() > 1"}) @Ensures("result != null") - protected abstract VariantContext reduceScope(final VariantContext vc); + protected abstract VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles); /** * Actually carry out the log10PNonRef calculation on vc, storing results in results @@ -185,8 +175,8 @@ public abstract class AFCalc implements Cloneable { * @return a AFCalcResult object describing the results of this calculation */ @Requires({"vc != null", "log10AlleleFrequencyPriors != null", "vc.getNAlleles() > 1"}) - protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors); + protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker); /** * Subset VC to the just allelesToUse, updating genotype likelihoods @@ -194,15 +184,15 @@ public abstract class AFCalc implements Cloneable { * Must be overridden by concrete subclasses * * @param vc variant context with alleles and genotype likelihoods + * @param defaultPloidy default ploidy to assume in case {@code vc} does not indicate it for a sample. * @param allelesToUse alleles to subset * @param assignGenotypes - * @param ploidy * @return GenotypesContext object */ public abstract GenotypesContext subsetAlleles(final VariantContext vc, + final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy); + final boolean assignGenotypes); // --------------------------------------------------------------------------- // @@ -210,12 +200,41 @@ public abstract class AFCalc implements Cloneable { // // --------------------------------------------------------------------------- - public int getMaxAltAlleles() { - return maxAlternateAllelesToGenotype; - } - protected StateTracker getStateTracker() { + /** + * Retrieves the state tracker. + * + *

+ * The tracker will be reset if so requested or if it needs to be resized due to an increase in the + * maximum number of alleles is must be able to handle. + *

+ * + * @param reset make sure the tracker is reset. + * @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if + * the current tracker is able to handle that number. + * + * @return never {@code null} + */ + protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) { + if (stateTracker == null) + stateTracker = new StateTracker(maximumAlternativeAlleleCount); + else if (reset) + stateTracker.reset(maximumAlternativeAlleleCount); + else + stateTracker.ensureMaximumAlleleCapacity(maximumAlternativeAlleleCount); return stateTracker; } + /** + * Used by testing code. + * + * Please don't use this method in production. + * + * @deprecated + */ + @Deprecated + protected int getAltAlleleCountOfMAP(final int allele) { + return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele]; + } + } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java index c291890ff..61882fe0f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcFactory.java @@ -46,147 +46,38 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; -import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection; -import org.broadinstitute.gatk.utils.classloader.PluginManager; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; - -import java.lang.reflect.Constructor; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - /** * Factory to make AFCalculations */ public class AFCalcFactory { - /** - * Enumeration of usable AF calculation, their constraints (i.e. ploidy). - * - * Note that the order these occur in the enum is the order of preference, so - * the first value is taken over the second when multiple calculations satisfy - * the needs of the request (i.e., considering ploidy). - */ - public enum Calculation { - /** default implementation */ - EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1), - /** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */ - EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1), - /** original biallelic exact model, for testing only */ - EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2), - - /** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */ - EXACT_GENERAL_PLOIDY("GeneralPloidyExactAFCalc", -1, -1); - - /** - * Must be a name because we look this up dynamically - */ - public final String className; - public final int maxAltAlleles; - public final int requiredPloidy; - - private Calculation(final String className, final int requiredPloidy, final int maxAltAlleles) { - this.className = className; - this.requiredPloidy = requiredPloidy; - this.maxAltAlleles = maxAltAlleles; - } - - private Calculation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) { - this(clazz.getSimpleName(), requiredPloidy, maxAltAlleles); - } - - public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) { - return (requiredPloidy == -1 || requiredPloidy == requestedPloidy) - && (maxAltAlleles == -1 || maxAltAlleles >= requestedMaxAltAlleles); - } - - public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; } - - /** - * Returns the best (fastest) model give the required ploidy and alternative allele count. - * @param requiredPloidy required ploidy - * @param requiredAlternativeAlleleCount required alternative allele count. - * @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead. - * @return never {@code null} - */ - public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) { - final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel; - if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return preferred; - if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return EXACT_INDEPENDENT; - if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return EXACT_REFERENCE; - return EXACT_GENERAL_PLOIDY; - } - } - - private static final Map> afClasses; - static { - afClasses = new PluginManager(AFCalc.class).getPluginsByName(); - } - - private AFCalcFactory() { - - } - - private static Class getClassByName(final String name) { - for ( final Class clazz : afClasses.values() ) { - if ( clazz.getSimpleName().contains(name) ) { - return clazz; - } - } - - return null; - } - - /** - * Create a new AFCalc based on the parameters in the UAC - * - * @param UAC the UnifiedArgumentCollection containing the command-line parameters for the caller - * @param nSamples the number of samples we will be using - * @param logger an optional (can be null) logger to override the default in the model - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC, - final int nSamples, final boolean emitConfModel, - final Logger logger) { - final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, - UAC.requestedAlleleFrequencyCalculationModel); - - final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy); - - if ( logger != null ) calc.setLogger(logger); - if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog); - - return calc; - } /** * Create a new AFCalc, choosing the best implementation based on the given parameters, assuming * that we will only be requesting bi-allelic variants to diploid genotypes * - * @param nSamples the number of samples we'll be using - * * @return an initialized AFCalc + * + * @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the + * ploidy as this is a sign of a hidden diploid assumption which is bad. */ - public static AFCalc createAFCalc(final int nSamples) { - return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2); + @Deprecated + public static AFCalc createCalculatorForDiploidBiAllelicAnalysis() { + return AFCalculatorImplementation.bestValue(2,1,null).newInstance(); } /** * Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes * - * @param calc the calculation we'd like to use - * @param nSamples the number of samples we'll be using - * @param maxAltAlleles the max. alt alleles for both SNPs and indels * + * @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the + * ploidy as this is a sign of a hidden diploid assumption which is bad. * @return an initialized AFCalc */ - public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) { - return createAFCalc(calc, nSamples, maxAltAlleles, 2); + @Deprecated + public static AFCalc createCalculatorForDiploidAnalysis() { + return AFCalculatorImplementation.bestValue(2,AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,null).newInstance(); } /** @@ -198,66 +89,47 @@ public class AFCalcFactory { * * @return an initialized AFCalc */ - public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy); + public static AFCalc createCalculator(final int nSamples, final int maxAltAlleles, final int ploidy) { + return createCalculator(chooseBestImplementation(ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy); } /** * Choose the best calculation for nSamples and ploidy * - * @param nSamples * @param ploidy * @param maxAltAlleles * @return */ - private static Calculation chooseBestCalculation(final int nSamples, final int ploidy, final int maxAltAlleles) { - for ( final Calculation calc : Calculation.values() ) { + private static AFCalculatorImplementation chooseBestImplementation(final int ploidy, final int maxAltAlleles) { + for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) { if ( calc.usableForParams(ploidy, maxAltAlleles) ) { return calc; } } - throw new IllegalStateException("no calculation found that supports nSamples " + nSamples + " ploidy " + ploidy + " and maxAltAlleles " + maxAltAlleles); + throw new IllegalStateException("no calculation found that supports nSamples " + ploidy + " and maxAltAlleles " + maxAltAlleles); } /** * Create a new AFCalc * - * @param calc the calculation to use + * @param implementation the calculation to use * @param nSamples the number of samples we'll be using * @param maxAltAlleles the max. alt alleles to consider for SNPs - * @param ploidy the sample ploidy. Must be consistent with the calc + * @param ploidy the sample ploidy. Must be consistent with the implementation * * @return an initialized AFCalc */ - public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) { - if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null"); + public static AFCalc createCalculator(final AFCalculatorImplementation implementation, final int nSamples, final int maxAltAlleles, final int ploidy) { + if ( implementation == null ) throw new IllegalArgumentException("Calculation cannot be null"); if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy); - if ( ! calc.usableForParams(ploidy, maxAltAlleles) ) - throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy); + if ( ! implementation.usableForParams(ploidy, maxAltAlleles) ) + throw new IllegalArgumentException("AFCalc " + implementation + " does not support requested ploidy " + ploidy); - final Class afClass = getClassByName(calc.className); - if ( afClass == null ) - throw new IllegalArgumentException("Unexpected AFCalc " + calc); - - try { - Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy}; - Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class); - return (AFCalc)c.newInstance(args); - } catch (Exception e) { - throw new ReviewedGATKException("Could not instantiate AFCalc " + calc, e); - } + return implementation.newInstance(); } - protected static List createAFCalcs(final List calcs, final int nSamples, final int maxAltAlleles, final int ploidy) { - final List AFCalcs = new LinkedList(); - - for ( final Calculation calc : calcs ) - AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy)); - - return AFCalcs; - } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java new file mode 100644 index 000000000..662f37263 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java @@ -0,0 +1,236 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.util.HashMap; +import java.util.Map; + +/** + * Enumeration of usable AF calculation, their constraints (i.e. ploidy). + * + * Note that the order these occur in the enum is the order of preference, so + * the first value is taken over the second when multiple calculations satisfy + * the needs of the request (i.e., considering ploidy). + */ +public enum AFCalculatorImplementation { + + /** default implementation */ + EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2), + + /** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */ + EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2), + + /** original biallelic exact model, for testing only */ + EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2), + + /** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */ + EXACT_GENERAL_PLOIDY(GeneralPloidyExactAFCalc.class); + + /** + * Special max alt allele count indicating that this maximum is in fact unbound (can be anything). + */ + public final static int UNBOUND_ALTERNATIVE_ALLELE_COUNT = -1; + + /** + * Special ploidy constant that indicates that in fact the ploidy is unbound (can be anything). + */ + public final static int UNBOUND_PLOIDY = -1; + + private static Map,AFCalculatorImplementation> calculatorClassToValue = buildCalculatorClassToValueMap(); + + /** + * Reference to the calculator class. + */ + public final Class calculatorClass; + + /** + * Maximum number of supported alternative alleles. + */ + public final int maxAltAlleles; + + /** + * Reference to the constructor to instantiate a calculator for this implementation. + */ + protected final Constructor constructor; + + /** + * Supported ploidy. + * + * This is equal to {@link #UNBOUND_PLOIDY} if the class can handle any ploidy. + */ + public final int requiredPloidy; + + /** + * Reference to the default implementation. + */ + public final static AFCalculatorImplementation DEFAULT = EXACT_INDEPENDENT; + + + /** + * Constructs a new instance given all its properties + * @param clazz the calculator class that realizes this implementation. + * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. + * @param maxAltAlleles the maximum alternative alleles; zero or greater or {@link #UNBOUND_ALTERNATIVE_ALLELE_COUNT} to indicate that any maximum number of alternative alleles is supported. + */ + AFCalculatorImplementation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) { + calculatorClass = clazz; + this.requiredPloidy = requiredPloidy; + this.maxAltAlleles = maxAltAlleles; + this.constructor = findInstantiationConstructor(calculatorClass); + } + + /** + * Constructs a new instance leaving ploidy and max-allele count unbound. + * @param clazz the calculator class that realizes this implementation. + */ + AFCalculatorImplementation(final Class clazz) { + this(clazz,UNBOUND_PLOIDY, UNBOUND_ALTERNATIVE_ALLELE_COUNT); + } + + /** Constructs a new instance leaving max-allele count unbound. + * @param clazz the calculator class that realizes this implementation. + * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. + */ + AFCalculatorImplementation(final Class clazz, final int requiredPloidy) { + this(clazz,requiredPloidy,UNBOUND_PLOIDY); + } + + /** + * Checks whether a given ploidy and max alternative alleles combination is supported or not. + * @param requestedPloidy the targeted ploidy. + * @param requestedMaxAltAlleles the targeted max alternative alleles. + * @return {@code true} iff this calculator implementation satisfies both requirements. + */ + public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) { + return (requiredPloidy == UNBOUND_PLOIDY || requiredPloidy == requestedPloidy) + && (maxAltAlleles == UNBOUND_ALTERNATIVE_ALLELE_COUNT || maxAltAlleles >= requestedMaxAltAlleles); + } + + /** + * Resolve the constructor to use to instantiate calculators. + * + * @param clazz target class. Assume not to be {@code null}. + */ + private Constructor findInstantiationConstructor(final Class clazz) { + if (Modifier.isAbstract(clazz.getModifiers())) + throw new IllegalStateException("AF calculator implementation class cannot be abstract"); + + final Constructor result; + try { + result = clazz.getDeclaredConstructor(); + } catch (final NoSuchMethodException e) { + throw new IllegalStateException("cannot find a suitable (int,int) constructor for the AFCalculator implementation " + this + " class " + clazz.getName()); + } + + // Check whether there will be issue calling the constructor just due to protections: + if (Modifier.isPrivate(result.getModifiers()) || (!Modifier.isPublic(result.getModifiers()) && !clazz.getPackage().equals(getClass().getPackage()))) + throw new IllegalStateException("triple int constructor for AFCalculator implementation " + this + " class " + clazz.getName() + " is not public "); + return result; + } + + /** + * Creates new instance + * + * @throws IllegalStateException if the instance could not be create due to some exception. The {@link Exception#getCause() cause} will hold a reference to the actual exception. + * @return never {@code null}. + */ + public AFCalc newInstance() { + try { + return constructor.newInstance(); + } catch (final Throwable e) { + throw new IllegalStateException("could not instantiate AFCalculator for implementation " + this + " class " + calculatorClass.getName()); + } + } + + /** + * Returns the best (fastest) model give the required ploidy and alternative allele count. + * + * @param requiredPloidy required ploidy + * @param requiredAlternativeAlleleCount required alternative allele count. + * @param preferred a preferred mode if any. A {@code null} indicate that we should be try to use the default instead. + * @return never {@code null} + */ + public static AFCalculatorImplementation bestValue(final int requiredPloidy, final int requiredAlternativeAlleleCount, final AFCalculatorImplementation preferred) { + final AFCalculatorImplementation preferredValue = preferred == null ? DEFAULT : preferred; + if (preferredValue.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return preferredValue; + if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return EXACT_INDEPENDENT; + if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return EXACT_REFERENCE; + return EXACT_GENERAL_PLOIDY; + } + + /** + * Returns the value that corresponds to a given implementation calculator class. + * + * @param clazz the target class. + * + * @throws IllegalArgumentException if {@code clazz} is {@code null} or if it is abstract. + * @throws IllegalStateException if + * + * @return never {@code null}. + */ + public static AFCalculatorImplementation fromCalculatorClass(final Class clazz) { + if (clazz == null) + throw new IllegalArgumentException("input class cannot be null"); + final AFCalculatorImplementation result = calculatorClassToValue.get(clazz); + if (result == null) + throw new IllegalStateException("Attempt to retrieve AFCalculatorImplementation instance from a non-registered calculator class " + clazz.getName()); + return result; + } + + // Initializes the content of the class to value map. + private static Map, AFCalculatorImplementation> buildCalculatorClassToValueMap() { + final Map,AFCalculatorImplementation> result = new HashMap<>(values().length); + for (final AFCalculatorImplementation value : values()) + if (result.put(value.calculatorClass,value) != null) + throw new IllegalStateException("more than one value associated with class " + value.calculatorClass.getName()); + return result; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 3399d8ace..14d236986 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -59,15 +59,14 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2); - public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); + public DiploidExactAFCalc() { } @Override - protected AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { + protected AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { final int numAlternateAlleles = vc.getNAlleles() - 1; + final ArrayList genotypeLikelihoods = getGLs(vc.getGenotypes(), true); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; @@ -85,12 +84,13 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { indexesToACset.put(zeroSet.getACcounts(), zeroSet); while ( !ACqueue.isEmpty() ) { - getStateTracker().incNEvaluations(); // keep track of the number of evaluations + stateTracker.incNEvaluations(); // keep track of the number of evaluations // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors); + calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, + indexesToACset, log10AlleleFrequencyPriors,stateTracker); // clean up memory indexesToACset.remove(set.getACcounts()); @@ -98,17 +98,17 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { // System.out.printf(" *** removing used set=%s%n", set.ACcounts); } - return getResultFromFinalState(vc, log10AlleleFrequencyPriors); + return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); } @Override - protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse) { + protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse) { return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); } @Override - protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) { + protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final ArrayList GLs = getGLs(vc.getGenotypes(), true); for ( final double[] likelihoods : GLs ) { final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); @@ -140,18 +140,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { //if ( DEBUG ) // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors); + computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, stateTracker); final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; // can we abort early because the log10Likelihoods are so small? - if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK; @@ -227,7 +228,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { private void computeLofK(final ExactACset set, final ArrayList genotypeLikelihoods, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { set.getLog10Likelihoods()[0] = 0.0; // the zero case final int totalK = set.getACsum(); @@ -238,8 +239,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; - getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0); - getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); + stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); return; } @@ -261,7 +262,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; // update the MLE if necessary - getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); + stateTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); // apply the priors over each alternate allele for ( final int ACcount : set.getACcounts().getCounts() ) { @@ -269,7 +270,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { log10LofK += log10AlleleFrequencyPriors[ACcount]; } - getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts()); + stateTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts()); } private void pushData(final ExactACset targetSet, @@ -324,12 +325,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { return coeff; } + @Override public GenotypesContext subsetAlleles(final VariantContext vc, + final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy) { + final boolean assignGenotypes) { + if (defaultPloidy != 2) + throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy); return allelesToUse.size() == 1 - ? GATKVariantContextUtils.subsetToRefOnly(vc, ploidy) + ? GATKVariantContextUtils.subsetToRefOnly(vc, 2) : GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java index d1aa86bfe..10a669e63 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java @@ -56,9 +56,10 @@ import java.util.*; * Uses the Exact calculation of Heng Li */ abstract class ExactAFCalc extends AFCalc { + protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first. + * Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first. */ protected static final Comparator LIKELIHOOD_SUM_COMPARATOR = new Comparator() { @@ -68,7 +69,7 @@ abstract class ExactAFCalc extends AFCalc { } }; /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that + * Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that * NON_REF alleles are place are last. */ protected static final Comparator LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator() { @@ -83,7 +84,7 @@ abstract class ExactAFCalc extends AFCalc { } }; /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of + * Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of * the likelihood sum. */ protected static final Comparator LIKELIHOOD_INDEX_COMPARATOR = new Comparator() { @@ -93,8 +94,8 @@ abstract class ExactAFCalc extends AFCalc { } }; - protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); + protected ExactAFCalc() { + } /** @@ -135,10 +136,10 @@ abstract class ExactAFCalc extends AFCalc { } @Override - protected VariantContext reduceScope(final VariantContext vc) { + protected VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles) { // don't try to genotype too many alternate alleles final List inputAltAlleles = vc.getAlternateAlleles(); - final List outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles()); + final List outputAltAlleles = reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles); // only if output allele has reduced from the input alt allele set size we should care. final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size(); @@ -146,17 +147,17 @@ abstract class ExactAFCalc extends AFCalc { if (altAlleleReduction == 0) return vc; else if (altAlleleReduction != 0) { - logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + logger.warn("this tool is currently set to genotype at most " + maximumAlternativeAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); - final List alleles = new ArrayList<>(getMaxAltAlleles() + 1); + final List alleles = new ArrayList<>(maximumAlternativeAlleles + 1); alleles.add(vc.getReference()); - alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles())); + alleles.addAll(reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles)); final VariantContextBuilder builder = new VariantContextBuilder(vc); builder.alleles(alleles); - builder.genotypes(reduceScopeGenotypes(vc, alleles)); + builder.genotypes(reduceScopeGenotypes(vc, defaultPloidy, alleles)); if (altAlleleReduction < 0) throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make()); return builder.make(); @@ -170,7 +171,7 @@ abstract class ExactAFCalc extends AFCalc { * @param numAllelesToChoose number of alleles to keep. * @return the list of alternative allele to keep. */ - protected List reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) { + protected List reduceScopeAlleles(final VariantContext vc, final int defaultPloidy, final int numAllelesToChoose) { // Look for the allele to exclude it from the pruning if present. final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); @@ -194,7 +195,7 @@ abstract class ExactAFCalc extends AFCalc { } // Calculate the allele likelihood sums. - reduceScopeCalculateLikelihoodSums(vc, likelihoodSums); + reduceScopeCalculateLikelihoodSums(vc, defaultPloidy, likelihoodSums); // sort them by probability mass and choose the best ones // Make sure that the allele is last if present. @@ -227,7 +228,7 @@ abstract class ExactAFCalc extends AFCalc { * @param vc source variant context. * @param likelihoodSums where to update the likelihood sums. */ - protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums); + protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums); /** * Transforms the genotypes of the variant context according to the new subset of possible alleles. @@ -236,5 +237,5 @@ abstract class ExactAFCalc extends AFCalc { * @param allelesToUse possible alleles. * @return never {@code null}, the new set of genotype calls for the reduced scope. */ - protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse); + protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse); } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index 6b28a9fae..73d4eca47 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -58,26 +58,24 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; public class GeneralPloidyExactAFCalc extends ExactAFCalc { + static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them - private final int ploidy; private final static boolean VERBOSE = false; - protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - this.ploidy = ploidy; + protected GeneralPloidyExactAFCalc() { } @Override - protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse) { - return subsetAlleles(vc,allelesToUse,false,ploidy); + protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse) { + return subsetAlleles(vc,defaultPloidy,allelesToUse,false); } @Override - public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors); - return getResultFromFinalState(vc, log10AlleleFrequencyPriors); + protected AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { + combineSinglePools(vc.getGenotypes(), defaultPloidy, vc.getNAlleles(), log10AlleleFrequencyPriors); + return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); } /** @@ -126,17 +124,29 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { } } + @Override - protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) { + protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final int numOriginalAltAlleles = likelihoodSums.length; - final ArrayList GLs = getGLs(vc.getGenotypes(), false); - for ( final double[] likelihoods : GLs ) { - final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); + final GenotypesContext genotypes = vc.getGenotypes(); + for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + if (!genotype.hasPL()) + continue; + final double[] gls = genotype.getLikelihoods().getAsVector(); + if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) + continue; + + final int PLindexOfBestGL = MathUtils.maxElementIndex(gls); + + final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF]; + final int declaredPloidy = genotype.getPloidy(); + final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; + final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele for (int k=1; k < acCount.length;k++) if (acCount[k] > 0 ) - likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]); + likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL; } } @@ -144,51 +154,51 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { * Simple non-optimized version that combines GLs from several pools and produces global AF distribution. * @param GLs Inputs genotypes context with per-pool GLs * @param numAlleles Number of alternate alleles - * @param ploidyPerPool Number of samples per pool * @param log10AlleleFrequencyPriors Frequency priors */ protected void combineSinglePools(final GenotypesContext GLs, + final int defaultPloidy, final int numAlleles, - final int ploidyPerPool, final double[] log10AlleleFrequencyPriors) { - final ArrayList genotypeLikelihoods = getGLs(GLs, true); - - - int combinedPloidy = 0; - // Combine each pool incrementally - likelihoods will be renormalized at each step - CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); // first element: zero ploidy, e.g. trivial degenerate distribution + final int numAltAlleles = numAlleles - 1; final int[] zeroCounts = new int[numAlleles]; final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts)); set.getLog10Likelihoods()[0] = 0.0; - - final int genotypeCount = genotypeLikelihoods.size(); + final StateTracker stateTracker = getStateTracker(false,numAltAlleles); + int combinedPloidy = 0; + CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); combinedPoolLikelihoods.add(set); - getStateTracker().reset(numAlleles - 1); - if ( genotypeCount <= 1 ) { - // no meaningful GLs at all, just set the tracker to non poly values - // just mimic-ing call below - getStateTracker().setLog10LikelihoodOfAFzero(0.0); - } else { - for (int p=1; p= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) + continue; + stateTracker.reset(); + final int declaredPloidy = genotype.getPloidy(); + final int ploidy = declaredPloidy < 1 ? defaultPloidy : declaredPloidy; + // they do qualify so we proceed. + combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, gls, + combinedPloidy, ploidy, numAlleles, log10AlleleFrequencyPriors, stateTracker); + combinedPloidy = ploidy + combinedPloidy; // total number of chromosomes in combinedLikelihoods } + if (combinedPloidy == 0) + stateTracker.setLog10LikelihoodOfAFzero(0.0); } - public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, + private CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { final LinkedList ACqueue = new LinkedList<>(); // mapping of ExactACset indexes to the objects final HashMap indexesToACset = new HashMap<>(); @@ -206,11 +216,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // keep processing while we have AC conformations that need to be calculated while ( !ACqueue.isEmpty() ) { - getStateTracker().incNEvaluations(); + stateTracker.incNEvaluations(); // compute log10Likelihoods final ExactACset ACset = ACqueue.remove(); - calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset); + calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset, stateTracker); // clean up memory indexesToACset.remove(ACset.getACcounts()); @@ -243,12 +253,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { final int originalPloidy, final int newGLPloidy, final LinkedList ACqueue, - final HashMap indexesToACset) { + final HashMap indexesToACset, + final StateTracker stateTracker) { // compute likelihood in "set" of new set based on original likelihoods final int numAlleles = set.getACcounts().getCounts().length; final int newPloidy = set.getACsum(); - final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy); + final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, stateTracker); // add to new pool @@ -256,7 +267,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { newPool.add(set); // TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix) - if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) { return log10LofK; } @@ -364,7 +375,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { final CombinedPoolLikelihoods firstGLs, final double[] secondGL, final double[] log10AlleleFrequencyPriors, - final int numAlleles, final int ploidy1, final int ploidy2) { + final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) { final int newPloidy = ploidy1 + ploidy2; @@ -381,9 +392,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { if ( totalAltK == 0 ) { // all-ref case final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX]; set.getLog10Likelihoods()[0] = log10Lof0; - - getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0); - getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); + stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); return log10Lof0; } else { @@ -427,7 +437,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // update the MLE if necessary final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); + stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); // apply the priors over each alternate allele for (final int ACcount : altCounts ) { @@ -435,7 +445,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { log10LofK += log10AlleleFrequencyPriors[ACcount]; } // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); + stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); return log10LofK; } @@ -462,21 +472,17 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { * From a given variant context, extract a given subset of alleles, and update genotype context accordingly, * including updating the PL's, and assign genotypes accordingly * @param vc variant context with alleles and genotype likelihoods + * @param defaultPloidy ploidy to assume in case that {@code vc} does not contain that information + * for a sample. * @param allelesToUse alleles to subset * @param assignGenotypes true: assign hard genotypes, false: leave as no-call - * @param ploidy number of chromosomes per sample (pool) * @return GenotypesContext with new PLs */ - public GenotypesContext subsetAlleles(final VariantContext vc, + public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy) { + final boolean assignGenotypes) { // the genotypes with PLs final GenotypesContext oldGTs = vc.getGenotypes(); - List NO_CALL_ALLELES = new ArrayList<>(ploidy); - - for (int k=0; k < ploidy; k++) - NO_CALL_ALLELES.add(Allele.NO_CALL); // samples final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); @@ -492,8 +498,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // create the new genotypes for ( int k = 0; k < oldGTs.size(); k++ ) { final Genotype g = oldGTs.get(sampleIndices.get(k)); + final int declaredPloidy = g.getPloidy(); + final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy))); continue; } @@ -514,7 +522,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // if there is no mass on the (new) likelihoods, then just no-call the sample if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy))); } else { final GenotypeBuilder gb = new GenotypeBuilder(g); @@ -526,7 +534,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // if we weren't asked to assign a genotype, then just no-call the sample if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) - gb.alleles(NO_CALL_ALLELES); + gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); else assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); newGTs.add(gb.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 6a740164f..a4de291ea 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -116,47 +116,49 @@ import java.util.*; private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0}; private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + + /** * Sorts AFCalcResults by their posteriors of AF > 0, so the */ - private final static class CompareAFCalcResultsByPNonRef implements Comparator { + private final static class CompareAFCalculatorResultsByPNonRef implements Comparator { @Override public int compare(AFCalcResult o1, AFCalcResult o2) { return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0()); } } - private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef(); + private final static CompareAFCalculatorResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalculatorResultsByPNonRef(); /** * The AFCalc model we are using to do the bi-allelic computation */ final AFCalc biAlleleExactModel; - protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy); + protected IndependentAllelesDiploidExactAFCalc() { + super(); + biAlleleExactModel = new ReferenceDiploidExactAFCalc(); } /** * Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call */ - private static class MyAFCalcResult extends AFCalcResult { + private static class MyAFCalculationResult extends AFCalcResult { /** * List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call */ final List supporting; - private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pRefByAllele, List supporting) { + private MyAFCalculationResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pRefByAllele, List supporting) { super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); this.supporting = supporting; } } @Override - public AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { - final List independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors); + public AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { + final List independentResultTrackers = computeAlleleIndependentExact(vc, defaultPloidy, log10AlleleFrequencyPriors); if ( independentResultTrackers.size() == 0 ) throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc); @@ -181,12 +183,12 @@ import java.util.*; */ @Requires({"vc != null", "log10AlleleFrequencyPriors != null"}) @Ensures("goodIndependentResult(vc, result)") - protected final List computeAlleleIndependentExact(final VariantContext vc, + protected final List computeAlleleIndependentExact(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors) { final List results = new LinkedList(); for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) { - final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors); results.add(resultTracker); } @@ -486,7 +488,7 @@ import java.util.*; log10PosteriorOfACGt0 - log10PriorsOfAC[1] }; - return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), + return new MyAFCalculationResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), // necessary to ensure all values < 0 MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // priors incorporate multiple alt alleles, must be normalized diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index 20ef6d370..407eb2dce 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -59,12 +59,15 @@ import java.util.Map; * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference */ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { - protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); + protected OriginalDiploidExactAFCalc() { } @Override - protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) { + protected AFCalcResult computeLog10PNonRef(final VariantContext vc, + @SuppressWarnings("unused") + final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length]; final double[] log10AlleleFrequencyPosteriors = new double[log10AlleleFrequencyPriors.length]; final Pair result = linearExact(vc, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); @@ -122,7 +125,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { } } - public Pair linearExact(final VariantContext vc, + private Pair linearExact(final VariantContext vc, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyLikelihoods, double[] log10AlleleFrequencyPosteriors) { @@ -194,6 +197,6 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { logY.rotate(); } - return new Pair(lastK, mleK); + return new Pair<>(lastK, mleK); } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java index 955fa86f2..63d071484 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java @@ -47,7 +47,6 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc { - protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); + protected ReferenceDiploidExactAFCalc() { } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java index 27ae69512..20f7e57c2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java @@ -57,9 +57,15 @@ import java.util.List; import java.util.Map; /** + * TODO this class (+AFCalculator) is a bit messy... it seems that it combines "debugging" (unnecessarily adding CPU cost in production) + * TODO but it also contains important part of the AF calculation state... why mix both!!!? It seems that the second + * TODO part could be just blend into AFCalculator ... one one hand you want to reduce classes code size ... but these + * TODO two classes code seems to be quite intertwine and makes it difficult to understand what is going on. + * in the production setting without much need + * * Keeps track of the state information during the exact model AF calculation. * - * Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior + * Tracks things like the MLE and MAP AC values, their corresponding likelihood and posterior * values, the likelihood of the AF == 0 state, and the number of evaluations needed * by the calculation to compute the P(AF == 0) */ @@ -120,7 +126,7 @@ final class StateTracker { * @param maxAltAlleles an integer >= 1 */ public StateTracker(final int maxAltAlleles) { - if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles); + if ( maxAltAlleles < 0 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles); alleleCountsOfMLE = new int[maxAltAlleles]; alleleCountsOfMAP = new int[maxAltAlleles]; @@ -170,7 +176,7 @@ final class StateTracker { } @Ensures("result >= 0") - protected int getnEvaluations() { + protected int getNEvaluations() { return nEvaluations; } @@ -351,4 +357,9 @@ final class StateTracker { this.allelesUsedInGenotyping = allelesUsedInGenotyping; } + + public void ensureMaximumAlleleCapacity(final int capacity) { + if (this.alleleCountsOfMAP.length < capacity) + reset(capacity); + } }