Refactored AFCalc to remove unecessary capability limits allowing to deal

with mixed ploidies and max-alt-allele number changes dynamically.

Changes:
--------

* Moved the AFCalcFactory.Calculation enum in a top level class
    AFCalculatorImplementation.
* Given more reponsabilities to the enum like resolving the constructor
    method once per implementation and the best-model selection algorithm.
* Removed test-code only fields and methods from AFCalc; just used to perform
    unit-testing and not any actual functionality of this component.
* Removed the fixed ploidy constraint of GeneralPloidyExactAFCalc
    implementation... now can deal with mixed ploidies that may change
    per site and sample.
* Removed the fixed maxAltAllele restriction by allowing resizing of
    the stateTracker structures.
* Due to previous two points now call the the AFCalc object are passed
    the default-ploidy to assume in case some genotype in the input
    VC does not have it and the max-alt-allele.
* Also due to those changes, removed the now totally useless 3 int
    parameters from all AFCalc constructors.
* Cleaned the code a bit from no further used components and methods.
This commit is contained in:
Valentin Ruano-Rubio 2014-09-10 12:51:39 -04:00
parent 7f6e526a87
commit 31e58ae4ec
11 changed files with 459 additions and 304 deletions

View File

@ -46,10 +46,10 @@
package org.broadinstitute.gatk.engine.arguments;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.utils.collections.DefaultHashMap;
import htsjdk.variant.variantcontext.VariantContext;
@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
*/
@Hidden
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel;
public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)

View File

@ -64,32 +64,23 @@ import java.util.List;
public abstract class AFCalc implements Cloneable {
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
protected final int nSamples;
protected final int maxAlternateAllelesToGenotype;
protected Logger logger = defaultLogger;
private SimpleTimer callTimer = new SimpleTimer();
private final StateTracker stateTracker;
private StateTracker stateTracker;
private ExactCallLogger exactCallLogger = null;
/**
* Create a new AFCalc object capable of calculating the prob. that alleles are
* segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels
* for indels for samples with ploidy
* segregating among many samples.
*
* @param nSamples number of samples, must be > 0
* @param maxAltAlleles maxAltAlleles for SNPs
* @param ploidy the ploidy, must be > 0
* <p>
* Restrictions in ploidy and number of alternative alleles that a instance can handle will be determined
* by its implementation class {@link AFCalculatorImplementation}
* </p>
*/
protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy);
this.nSamples = nSamples;
this.maxAlternateAllelesToGenotype = maxAltAlleles;
this.stateTracker = new StateTracker(maxAltAlleles);
protected AFCalc() {
}
/**
@ -118,19 +109,18 @@ public abstract class AFCalc implements Cloneable {
* @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i)
* @return result (for programming convenience)
*/
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
public AFCalcResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) {
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
// reset the result, so we can store our new result there
stateTracker.reset();
final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles);
final VariantContext vcWorking = reduceScope(vc);
final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles);
callTimer.start();
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
final AFCalcResult result = computeLog10PNonRef(vcWorking, defaultPloidy, log10AlleleFrequencyPriors, stateTracker);
final long nanoTime = callTimer.getElapsedTimeNano();
if ( exactCallLogger != null )
@ -150,7 +140,7 @@ public abstract class AFCalc implements Cloneable {
*/
@Requires("stateTracker.getnEvaluations() >= 0")
@Ensures("result != null")
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors);
}
@ -174,7 +164,7 @@ public abstract class AFCalc implements Cloneable {
*/
@Requires({"vc != null", "vc.getNAlleles() > 1"})
@Ensures("result != null")
protected abstract VariantContext reduceScope(final VariantContext vc);
protected abstract VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles);
/**
* Actually carry out the log10PNonRef calculation on vc, storing results in results
@ -185,8 +175,8 @@ public abstract class AFCalc implements Cloneable {
* @return a AFCalcResult object describing the results of this calculation
*/
@Requires({"vc != null", "log10AlleleFrequencyPriors != null", "vc.getNAlleles() > 1"})
protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors);
protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker);
/**
* Subset VC to the just allelesToUse, updating genotype likelihoods
@ -194,15 +184,15 @@ public abstract class AFCalc implements Cloneable {
* Must be overridden by concrete subclasses
*
* @param vc variant context with alleles and genotype likelihoods
* @param defaultPloidy default ploidy to assume in case {@code vc} does not indicate it for a sample.
* @param allelesToUse alleles to subset
* @param assignGenotypes
* @param ploidy
* @return GenotypesContext object
*/
public abstract GenotypesContext subsetAlleles(final VariantContext vc,
final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy);
final boolean assignGenotypes);
// ---------------------------------------------------------------------------
//
@ -210,12 +200,41 @@ public abstract class AFCalc implements Cloneable {
//
// ---------------------------------------------------------------------------
public int getMaxAltAlleles() {
return maxAlternateAllelesToGenotype;
}
protected StateTracker getStateTracker() {
/**
* Retrieves the state tracker.
*
* <p>
* The tracker will be reset if so requested or if it needs to be resized due to an increase in the
* maximum number of alleles is must be able to handle.
* </p>
*
* @param reset make sure the tracker is reset.
* @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if
* the current tracker is able to handle that number.
*
* @return never {@code null}
*/
protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) {
if (stateTracker == null)
stateTracker = new StateTracker(maximumAlternativeAlleleCount);
else if (reset)
stateTracker.reset(maximumAlternativeAlleleCount);
else
stateTracker.ensureMaximumAlleleCapacity(maximumAlternativeAlleleCount);
return stateTracker;
}
/**
* Used by testing code.
*
* Please don't use this method in production.
*
* @deprecated
*/
@Deprecated
protected int getAltAlleleCountOfMAP(final int allele) {
return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele];
}
}

View File

@ -46,147 +46,38 @@
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.lang.reflect.Constructor;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Factory to make AFCalculations
*/
public class AFCalcFactory {
/**
* Enumeration of usable AF calculation, their constraints (i.e. ploidy).
*
* Note that the order these occur in the enum is the order of preference, so
* the first value is taken over the second when multiple calculations satisfy
* the needs of the request (i.e., considering ploidy).
*/
public enum Calculation {
/** default implementation */
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1),
/** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1),
/** original biallelic exact model, for testing only */
EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2),
/** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */
EXACT_GENERAL_PLOIDY("GeneralPloidyExactAFCalc", -1, -1);
/**
* Must be a name because we look this up dynamically
*/
public final String className;
public final int maxAltAlleles;
public final int requiredPloidy;
private Calculation(final String className, final int requiredPloidy, final int maxAltAlleles) {
this.className = className;
this.requiredPloidy = requiredPloidy;
this.maxAltAlleles = maxAltAlleles;
}
private Calculation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) {
this(clazz.getSimpleName(), requiredPloidy, maxAltAlleles);
}
public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) {
return (requiredPloidy == -1 || requiredPloidy == requestedPloidy)
&& (maxAltAlleles == -1 || maxAltAlleles >= requestedMaxAltAlleles);
}
public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; }
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) {
final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel;
if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferred;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
}
private static final Map<String, Class<? extends AFCalc>> afClasses;
static {
afClasses = new PluginManager<AFCalc>(AFCalc.class).getPluginsByName();
}
private AFCalcFactory() {
}
private static Class<? extends AFCalc> getClassByName(final String name) {
for ( final Class<? extends AFCalc> clazz : afClasses.values() ) {
if ( clazz.getSimpleName().contains(name) ) {
return clazz;
}
}
return null;
}
/**
* Create a new AFCalc based on the parameters in the UAC
*
* @param UAC the UnifiedArgumentCollection containing the command-line parameters for the caller
* @param nSamples the number of samples we will be using
* @param logger an optional (can be null) logger to override the default in the model
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples, final boolean emitConfModel,
final Logger logger) {
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
UAC.requestedAlleleFrequencyCalculationModel);
final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy);
if ( logger != null ) calc.setLogger(logger);
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
return calc;
}
/**
* Create a new AFCalc, choosing the best implementation based on the given parameters, assuming
* that we will only be requesting bi-allelic variants to diploid genotypes
*
* @param nSamples the number of samples we'll be using
*
* @return an initialized AFCalc
*
* @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the
* ploidy as this is a sign of a hidden diploid assumption which is bad.
*/
public static AFCalc createAFCalc(final int nSamples) {
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2);
@Deprecated
public static AFCalc createCalculatorForDiploidBiAllelicAnalysis() {
return AFCalculatorImplementation.bestValue(2,1,null).newInstance();
}
/**
* Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes
*
* @param calc the calculation we'd like to use
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles for both SNPs and indels
*
* @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the
* ploidy as this is a sign of a hidden diploid assumption which is bad.
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) {
return createAFCalc(calc, nSamples, maxAltAlleles, 2);
@Deprecated
public static AFCalc createCalculatorForDiploidAnalysis() {
return AFCalculatorImplementation.bestValue(2,AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,null).newInstance();
}
/**
@ -198,66 +89,47 @@ public class AFCalcFactory {
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
public static AFCalc createCalculator(final int nSamples, final int maxAltAlleles, final int ploidy) {
return createCalculator(chooseBestImplementation(ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
}
/**
* Choose the best calculation for nSamples and ploidy
*
* @param nSamples
* @param ploidy
* @param maxAltAlleles
* @return
*/
private static Calculation chooseBestCalculation(final int nSamples, final int ploidy, final int maxAltAlleles) {
for ( final Calculation calc : Calculation.values() ) {
private static AFCalculatorImplementation chooseBestImplementation(final int ploidy, final int maxAltAlleles) {
for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) {
if ( calc.usableForParams(ploidy, maxAltAlleles) ) {
return calc;
}
}
throw new IllegalStateException("no calculation found that supports nSamples " + nSamples + " ploidy " + ploidy + " and maxAltAlleles " + maxAltAlleles);
throw new IllegalStateException("no calculation found that supports nSamples " + ploidy + " and maxAltAlleles " + maxAltAlleles);
}
/**
* Create a new AFCalc
*
* @param calc the calculation to use
* @param implementation the calculation to use
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles to consider for SNPs
* @param ploidy the sample ploidy. Must be consistent with the calc
* @param ploidy the sample ploidy. Must be consistent with the implementation
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null");
public static AFCalc createCalculator(final AFCalculatorImplementation implementation, final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( implementation == null ) throw new IllegalArgumentException("Calculation cannot be null");
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
if ( ! calc.usableForParams(ploidy, maxAltAlleles) )
throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy);
if ( ! implementation.usableForParams(ploidy, maxAltAlleles) )
throw new IllegalArgumentException("AFCalc " + implementation + " does not support requested ploidy " + ploidy);
final Class<? extends AFCalc> afClass = getClassByName(calc.className);
if ( afClass == null )
throw new IllegalArgumentException("Unexpected AFCalc " + calc);
try {
Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy};
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class);
return (AFCalc)c.newInstance(args);
} catch (Exception e) {
throw new ReviewedGATKException("Could not instantiate AFCalc " + calc, e);
}
return implementation.newInstance();
}
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int ploidy) {
final List<AFCalc> AFCalcs = new LinkedList<AFCalc>();
for ( final Calculation calc : calcs )
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy));
return AFCalcs;
}
}

View File

@ -0,0 +1,236 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import java.lang.reflect.Constructor;
import java.lang.reflect.Modifier;
import java.util.HashMap;
import java.util.Map;
/**
* Enumeration of usable AF calculation, their constraints (i.e. ploidy).
*
* Note that the order these occur in the enum is the order of preference, so
* the first value is taken over the second when multiple calculations satisfy
* the needs of the request (i.e., considering ploidy).
*/
public enum AFCalculatorImplementation {
/** default implementation */
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2),
/** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2),
/** original biallelic exact model, for testing only */
EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2),
/** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */
EXACT_GENERAL_PLOIDY(GeneralPloidyExactAFCalc.class);
/**
* Special max alt allele count indicating that this maximum is in fact unbound (can be anything).
*/
public final static int UNBOUND_ALTERNATIVE_ALLELE_COUNT = -1;
/**
* Special ploidy constant that indicates that in fact the ploidy is unbound (can be anything).
*/
public final static int UNBOUND_PLOIDY = -1;
private static Map<Class<? extends AFCalc>,AFCalculatorImplementation> calculatorClassToValue = buildCalculatorClassToValueMap();
/**
* Reference to the calculator class.
*/
public final Class<? extends AFCalc> calculatorClass;
/**
* Maximum number of supported alternative alleles.
*/
public final int maxAltAlleles;
/**
* Reference to the constructor to instantiate a calculator for this implementation.
*/
protected final Constructor<? extends AFCalc> constructor;
/**
* Supported ploidy.
*
* This is equal to {@link #UNBOUND_PLOIDY} if the class can handle any ploidy.
*/
public final int requiredPloidy;
/**
* Reference to the default implementation.
*/
public final static AFCalculatorImplementation DEFAULT = EXACT_INDEPENDENT;
/**
* Constructs a new instance given all its properties
* @param clazz the calculator class that realizes this implementation.
* @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported.
* @param maxAltAlleles the maximum alternative alleles; zero or greater or {@link #UNBOUND_ALTERNATIVE_ALLELE_COUNT} to indicate that any maximum number of alternative alleles is supported.
*/
AFCalculatorImplementation(final Class<? extends AFCalc> clazz, final int requiredPloidy, final int maxAltAlleles) {
calculatorClass = clazz;
this.requiredPloidy = requiredPloidy;
this.maxAltAlleles = maxAltAlleles;
this.constructor = findInstantiationConstructor(calculatorClass);
}
/**
* Constructs a new instance leaving ploidy and max-allele count unbound.
* @param clazz the calculator class that realizes this implementation.
*/
AFCalculatorImplementation(final Class<? extends AFCalc> clazz) {
this(clazz,UNBOUND_PLOIDY, UNBOUND_ALTERNATIVE_ALLELE_COUNT);
}
/** Constructs a new instance leaving max-allele count unbound.
* @param clazz the calculator class that realizes this implementation.
* @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported.
*/
AFCalculatorImplementation(final Class<? extends AFCalc> clazz, final int requiredPloidy) {
this(clazz,requiredPloidy,UNBOUND_PLOIDY);
}
/**
* Checks whether a given ploidy and max alternative alleles combination is supported or not.
* @param requestedPloidy the targeted ploidy.
* @param requestedMaxAltAlleles the targeted max alternative alleles.
* @return {@code true} iff this calculator implementation satisfies both requirements.
*/
public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) {
return (requiredPloidy == UNBOUND_PLOIDY || requiredPloidy == requestedPloidy)
&& (maxAltAlleles == UNBOUND_ALTERNATIVE_ALLELE_COUNT || maxAltAlleles >= requestedMaxAltAlleles);
}
/**
* Resolve the constructor to use to instantiate calculators.
*
* @param clazz target class. Assume not to be {@code null}.
*/
private Constructor<? extends AFCalc> findInstantiationConstructor(final Class<? extends AFCalc> clazz) {
if (Modifier.isAbstract(clazz.getModifiers()))
throw new IllegalStateException("AF calculator implementation class cannot be abstract");
final Constructor<? extends AFCalc> result;
try {
result = clazz.getDeclaredConstructor();
} catch (final NoSuchMethodException e) {
throw new IllegalStateException("cannot find a suitable (int,int) constructor for the AFCalculator implementation " + this + " class " + clazz.getName());
}
// Check whether there will be issue calling the constructor just due to protections:
if (Modifier.isPrivate(result.getModifiers()) || (!Modifier.isPublic(result.getModifiers()) && !clazz.getPackage().equals(getClass().getPackage())))
throw new IllegalStateException("triple int constructor for AFCalculator implementation " + this + " class " + clazz.getName() + " is not public ");
return result;
}
/**
* Creates new instance
*
* @throws IllegalStateException if the instance could not be create due to some exception. The {@link Exception#getCause() cause} will hold a reference to the actual exception.
* @return never {@code null}.
*/
public AFCalc newInstance() {
try {
return constructor.newInstance();
} catch (final Throwable e) {
throw new IllegalStateException("could not instantiate AFCalculator for implementation " + this + " class " + calculatorClass.getName());
}
}
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
*
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferred a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static AFCalculatorImplementation bestValue(final int requiredPloidy, final int requiredAlternativeAlleleCount, final AFCalculatorImplementation preferred) {
final AFCalculatorImplementation preferredValue = preferred == null ? DEFAULT : preferred;
if (preferredValue.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferredValue;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
/**
* Returns the value that corresponds to a given implementation calculator class.
*
* @param clazz the target class.
*
* @throws IllegalArgumentException if {@code clazz} is {@code null} or if it is abstract.
* @throws IllegalStateException if
*
* @return never {@code null}.
*/
public static AFCalculatorImplementation fromCalculatorClass(final Class<? extends AFCalc> clazz) {
if (clazz == null)
throw new IllegalArgumentException("input class cannot be null");
final AFCalculatorImplementation result = calculatorClassToValue.get(clazz);
if (result == null)
throw new IllegalStateException("Attempt to retrieve AFCalculatorImplementation instance from a non-registered calculator class " + clazz.getName());
return result;
}
// Initializes the content of the class to value map.
private static Map<Class<? extends AFCalc>, AFCalculatorImplementation> buildCalculatorClassToValueMap() {
final Map<Class<? extends AFCalc>,AFCalculatorImplementation> result = new HashMap<>(values().length);
for (final AFCalculatorImplementation value : values())
if (result.put(value.calculatorClass,value) != null)
throw new IllegalStateException("more than one value associated with class " + value.calculatorClass.getName());
return result;
}
}

View File

@ -59,15 +59,14 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2);
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
public DiploidExactAFCalc() {
}
@Override
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
protected AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
final int numAlternateAlleles = vc.getNAlleles() - 1;
final ArrayList<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes(), true);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
@ -85,12 +84,13 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
while ( !ACqueue.isEmpty() ) {
getStateTracker().incNEvaluations(); // keep track of the number of evaluations
stateTracker.incNEvaluations(); // keep track of the number of evaluations
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue,
indexesToACset, log10AlleleFrequencyPriors,stateTracker);
// clean up memory
indexesToACset.remove(set.getACcounts());
@ -98,17 +98,17 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
}
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker);
}
@Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
}
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) {
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
@ -140,18 +140,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final int numChr,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
//if ( DEBUG )
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors);
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, stateTracker);
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// can we abort early because the log10Likelihoods are so small?
if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) {
//if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK;
@ -227,7 +228,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
private void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
set.getLog10Likelihoods()[0] = 0.0; // the zero case
final int totalK = set.getACsum();
@ -238,8 +239,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return;
}
@ -261,7 +262,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// update the MLE if necessary
getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
stateTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
// apply the priors over each alternate allele
for ( final int ACcount : set.getACcounts().getCounts() ) {
@ -269,7 +270,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
stateTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
}
private void pushData(final ExactACset targetSet,
@ -324,12 +325,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
return coeff;
}
@Override
public GenotypesContext subsetAlleles(final VariantContext vc,
final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
final boolean assignGenotypes) {
if (defaultPloidy != 2)
throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy);
return allelesToUse.size() == 1
? GATKVariantContextUtils.subsetToRefOnly(vc, ploidy)
? GATKVariantContextUtils.subsetToRefOnly(vc, 2)
: GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse,
assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
}

View File

@ -56,9 +56,10 @@ import java.util.*;
* Uses the Exact calculation of Heng Li
*/
abstract class ExactAFCalc extends AFCalc {
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
* Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -68,7 +69,7 @@ abstract class ExactAFCalc extends AFCalc {
}
};
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* NON_REF alleles are place are last.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -83,7 +84,7 @@ abstract class ExactAFCalc extends AFCalc {
}
};
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* Sorts {@link ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* the likelihood sum.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_INDEX_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -93,8 +94,8 @@ abstract class ExactAFCalc extends AFCalc {
}
};
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
protected ExactAFCalc() {
}
/**
@ -135,10 +136,10 @@ abstract class ExactAFCalc extends AFCalc {
}
@Override
protected VariantContext reduceScope(final VariantContext vc) {
protected VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles) {
// don't try to genotype too many alternate alleles
final List<Allele> inputAltAlleles = vc.getAlternateAlleles();
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles());
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles);
// only if output allele has reduced from the input alt allele set size we should care.
final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size();
@ -146,17 +147,17 @@ abstract class ExactAFCalc extends AFCalc {
if (altAlleleReduction == 0)
return vc;
else if (altAlleleReduction != 0) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles()
logger.warn("this tool is currently set to genotype at most " + maximumAlternativeAlleles
+ " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart()
+ " has " + (vc.getAlternateAlleles().size())
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<>(getMaxAltAlleles() + 1);
final List<Allele> alleles = new ArrayList<>(maximumAlternativeAlleles + 1);
alleles.add(vc.getReference());
alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles()));
alleles.addAll(reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles));
final VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(reduceScopeGenotypes(vc, alleles));
builder.genotypes(reduceScopeGenotypes(vc, defaultPloidy, alleles));
if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make());
return builder.make();
@ -170,7 +171,7 @@ abstract class ExactAFCalc extends AFCalc {
* @param numAllelesToChoose number of alleles to keep.
* @return the list of alternative allele to keep.
*/
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) {
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int defaultPloidy, final int numAllelesToChoose) {
// Look for the <NON_REF> allele to exclude it from the pruning if present.
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
@ -194,7 +195,7 @@ abstract class ExactAFCalc extends AFCalc {
}
// Calculate the allele likelihood sums.
reduceScopeCalculateLikelihoodSums(vc, likelihoodSums);
reduceScopeCalculateLikelihoodSums(vc, defaultPloidy, likelihoodSums);
// sort them by probability mass and choose the best ones
// Make sure that the <NON_REF> allele is last if present.
@ -227,7 +228,7 @@ abstract class ExactAFCalc extends AFCalc {
* @param vc source variant context.
* @param likelihoodSums where to update the likelihood sums.
*/
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums);
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums);
/**
* Transforms the genotypes of the variant context according to the new subset of possible alleles.
@ -236,5 +237,5 @@ abstract class ExactAFCalc extends AFCalc {
* @param allelesToUse possible alleles.
* @return never {@code null}, the new set of genotype calls for the reduced scope.
*/
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse);
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse);
}

View File

@ -58,26 +58,24 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
public class GeneralPloidyExactAFCalc extends ExactAFCalc {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;
private final static boolean VERBOSE = false;
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
this.ploidy = ploidy;
protected GeneralPloidyExactAFCalc() {
}
@Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
return subsetAlleles(vc,allelesToUse,false,ploidy);
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
return subsetAlleles(vc,defaultPloidy,allelesToUse,false);
}
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
protected AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
combineSinglePools(vc.getGenotypes(), defaultPloidy, vc.getNAlleles(), log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker);
}
/**
@ -126,17 +124,29 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
}
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) {
final int numOriginalAltAlleles = likelihoodSums.length;
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final GenotypesContext genotypes = vc.getGenotypes();
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
if (!genotype.hasPL())
continue;
final double[] gls = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL)
continue;
final int PLindexOfBestGL = MathUtils.maxElementIndex(gls);
final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF];
final int declaredPloidy = genotype.getPloidy();
final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++)
if (acCount[k] > 0 )
likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]);
likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL;
}
}
@ -144,51 +154,51 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
* @param GLs Inputs genotypes context with per-pool GLs
* @param numAlleles Number of alternate alleles
* @param ploidyPerPool Number of samples per pool
* @param log10AlleleFrequencyPriors Frequency priors
*/
protected void combineSinglePools(final GenotypesContext GLs,
final int defaultPloidy,
final int numAlleles,
final int ploidyPerPool,
final double[] log10AlleleFrequencyPriors) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs, true);
int combinedPloidy = 0;
// Combine each pool incrementally - likelihoods will be renormalized at each step
CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods();
// first element: zero ploidy, e.g. trivial degenerate distribution
final int numAltAlleles = numAlleles - 1;
final int[] zeroCounts = new int[numAlleles];
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
set.getLog10Likelihoods()[0] = 0.0;
final int genotypeCount = genotypeLikelihoods.size();
final StateTracker stateTracker = getStateTracker(false,numAltAlleles);
int combinedPloidy = 0;
CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods();
combinedPoolLikelihoods.add(set);
getStateTracker().reset(numAlleles - 1);
if ( genotypeCount <= 1 ) {
// no meaningful GLs at all, just set the tracker to non poly values
// just mimic-ing call below
getStateTracker().setLog10LikelihoodOfAFzero(0.0);
} else {
for (int p=1; p<genotypeCount; p++) {
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
if (p < genotypeCount - 1) getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
}
for (final Genotype genotype : GLs.iterateInSampleNameOrder()) {
// recover gls and check if they qualify.
if (!genotype.hasPL())
continue;
final double[] gls = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL)
continue;
stateTracker.reset();
final int declaredPloidy = genotype.getPloidy();
final int ploidy = declaredPloidy < 1 ? defaultPloidy : declaredPloidy;
// they do qualify so we proceed.
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, gls,
combinedPloidy, ploidy, numAlleles, log10AlleleFrequencyPriors, stateTracker);
combinedPloidy = ploidy + combinedPloidy; // total number of chromosomes in combinedLikelihoods
}
if (combinedPloidy == 0)
stateTracker.setLog10LikelihoodOfAFzero(0.0);
}
public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
private CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
double[] newGL,
int originalPloidy,
int newGLPloidy,
int numAlleles,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>();
@ -206,11 +216,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// keep processing while we have AC conformations that need to be calculated
while ( !ACqueue.isEmpty() ) {
getStateTracker().incNEvaluations();
stateTracker.incNEvaluations();
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset, stateTracker);
// clean up memory
indexesToACset.remove(ACset.getACcounts());
@ -243,12 +253,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int originalPloidy,
final int newGLPloidy,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final StateTracker stateTracker) {
// compute likelihood in "set" of new set based on original likelihoods
final int numAlleles = set.getACcounts().getCounts().length;
final int newPloidy = set.getACsum();
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy);
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, stateTracker);
// add to new pool
@ -256,7 +267,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
newPool.add(set);
// TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) {
return log10LofK;
}
@ -364,7 +375,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final CombinedPoolLikelihoods firstGLs,
final double[] secondGL,
final double[] log10AlleleFrequencyPriors,
final int numAlleles, final int ploidy1, final int ploidy2) {
final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) {
final int newPloidy = ploidy1 + ploidy2;
@ -381,9 +392,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
if ( totalAltK == 0 ) { // all-ref case
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
set.getLog10Likelihoods()[0] = log10Lof0;
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return log10Lof0;
} else {
@ -427,7 +437,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// update the MLE if necessary
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
// apply the priors over each alternate allele
for (final int ACcount : altCounts ) {
@ -435,7 +445,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
return log10LofK;
}
@ -462,21 +472,17 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* From a given variant context, extract a given subset of alleles, and update genotype context accordingly,
* including updating the PL's, and assign genotypes accordingly
* @param vc variant context with alleles and genotype likelihoods
* @param defaultPloidy ploidy to assume in case that {@code vc} does not contain that information
* for a sample.
* @param allelesToUse alleles to subset
* @param assignGenotypes true: assign hard genotypes, false: leave as no-call
* @param ploidy number of chromosomes per sample (pool)
* @return GenotypesContext with new PLs
*/
public GenotypesContext subsetAlleles(final VariantContext vc,
public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
final boolean assignGenotypes) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
List<Allele> NO_CALL_ALLELES = new ArrayList<>(ploidy);
for (int k=0; k < ploidy; k++)
NO_CALL_ALLELES.add(Allele.NO_CALL);
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
@ -492,8 +498,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
final int declaredPloidy = g.getPloidy();
final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy)));
continue;
}
@ -514,7 +522,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
@ -526,7 +534,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(NO_CALL_ALLELES);
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
newGTs.add(gb.make());

View File

@ -116,47 +116,49 @@ import java.util.*;
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/**
* Sorts AFCalcResults by their posteriors of AF > 0, so the
*/
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
private final static class CompareAFCalculatorResultsByPNonRef implements Comparator<AFCalcResult> {
@Override
public int compare(AFCalcResult o1, AFCalcResult o2) {
return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
}
}
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
private final static CompareAFCalculatorResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalculatorResultsByPNonRef();
/**
* The AFCalc model we are using to do the bi-allelic computation
*/
final AFCalc biAlleleExactModel;
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy);
protected IndependentAllelesDiploidExactAFCalc() {
super();
biAlleleExactModel = new ReferenceDiploidExactAFCalc();
}
/**
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
*/
private static class MyAFCalcResult extends AFCalcResult {
private static class MyAFCalculationResult extends AFCalcResult {
/**
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
*/
final List<AFCalcResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
private MyAFCalculationResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
this.supporting = supporting;
}
}
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
public AFCalcResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, defaultPloidy, log10AlleleFrequencyPriors);
if ( independentResultTrackers.size() == 0 )
throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc);
@ -181,12 +183,12 @@ import java.util.*;
*/
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
@Ensures("goodIndependentResult(vc, result)")
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors);
results.add(resultTracker);
}
@ -486,7 +488,7 @@ import java.util.*;
log10PosteriorOfACGt0 - log10PriorsOfAC[1]
};
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
return new MyAFCalculationResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
// necessary to ensure all values < 0
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
// priors incorporate multiple alt alleles, must be normalized

View File

@ -59,12 +59,15 @@ import java.util.Map;
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
*/
class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
protected OriginalDiploidExactAFCalc() {
}
@Override
protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) {
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
@SuppressWarnings("unused")
final int defaultPloidy,
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length];
final double[] log10AlleleFrequencyPosteriors = new double[log10AlleleFrequencyPriors.length];
final Pair<Integer, Integer> result = linearExact(vc, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
@ -122,7 +125,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
}
}
public Pair<Integer, Integer> linearExact(final VariantContext vc,
private Pair<Integer, Integer> linearExact(final VariantContext vc,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyLikelihoods,
double[] log10AlleleFrequencyPosteriors) {
@ -194,6 +197,6 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
logY.rotate();
}
return new Pair<Integer, Integer>(lastK, mleK);
return new Pair<>(lastK, mleK);
}
}

View File

@ -47,7 +47,6 @@
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
protected ReferenceDiploidExactAFCalc() {
}
}

View File

@ -57,9 +57,15 @@ import java.util.List;
import java.util.Map;
/**
* TODO this class (+AFCalculator) is a bit messy... it seems that it combines "debugging" (unnecessarily adding CPU cost in production)
* TODO but it also contains important part of the AF calculation state... why mix both!!!? It seems that the second
* TODO part could be just blend into AFCalculator ... one one hand you want to reduce classes code size ... but these
* TODO two classes code seems to be quite intertwine and makes it difficult to understand what is going on.
* in the production setting without much need
*
* Keeps track of the state information during the exact model AF calculation.
*
* Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior
* Tracks things like the MLE and MAP AC values, their corresponding likelihood and posterior
* values, the likelihood of the AF == 0 state, and the number of evaluations needed
* by the calculation to compute the P(AF == 0)
*/
@ -120,7 +126,7 @@ final class StateTracker {
* @param maxAltAlleles an integer >= 1
*/
public StateTracker(final int maxAltAlleles) {
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
if ( maxAltAlleles < 0 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles];
@ -170,7 +176,7 @@ final class StateTracker {
}
@Ensures("result >= 0")
protected int getnEvaluations() {
protected int getNEvaluations() {
return nEvaluations;
}
@ -351,4 +357,9 @@ final class StateTracker {
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
}
public void ensureMaximumAlleleCapacity(final int capacity) {
if (this.alleleCountsOfMAP.length < capacity)
reset(capacity);
}
}