Merge pull request #731 from broadinstitute/vrr_omniploidy_afcalc_base

Support for mixed ploidy in GenotypeGVCFs and CombineGVCFs

Story:

* https://www.pivotaltracker.com/story/show/77891194
This commit is contained in:
Valentin Ruano Rubio 2014-09-12 16:30:31 -04:00
commit 7cbb773c8f
54 changed files with 2341 additions and 880 deletions

View File

@ -46,10 +46,10 @@
package org.broadinstitute.gatk.engine.arguments;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.utils.collections.DefaultHashMap;
import htsjdk.variant.variantcontext.VariantContext;
@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
*/
@Hidden
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel;
public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)

View File

@ -0,0 +1,112 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import java.util.Arrays;
/**
* Class that produces allele-frequency priors.
*
* <p>
* This class is thread-unsafe for performance and so should be used accordingly.
* </p>
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public abstract class AFPriorProvider {
private double[][] priorByTotalPloidy;
protected AFPriorProvider() {
}
/**
* Returns the priors given a total-ploidy (the total number of genome copies across all samples).
*
* <p>
* For performance sake the returned value is a direct reference ot the cached prior, so the client code
* must not modify its content.
* </p>
*
* @param totalPloidy the requested total ploidy.
*
* @return never {@code null}, please do not modify its content. An array of {@code totalPloidy + 1} positions where
* the ith position is the log10(prior) probability of the an alternative allele AC to be exactly <i>i</i> copies in
* a draw of {@code totalPloidy} elements.
*/
public double[] forTotalPloidy(final int totalPloidy) {
if (totalPloidy < 0)
throw new IllegalArgumentException("the total-ploidy cannot be negative");
ensureCapacity(totalPloidy);
final double[] cachedResult = priorByTotalPloidy[totalPloidy];
if (cachedResult == null)
return priorByTotalPloidy[totalPloidy] = buildPriors(totalPloidy);
else
return cachedResult;
}
/**
* Make sure that structures have enough capacity to hold the information up to the given total-ploidy.
* @param totalPloidy
*/
protected void ensureCapacity(final int totalPloidy) {
if (priorByTotalPloidy == null)
priorByTotalPloidy = new double[totalPloidy + 1][]; // just enough for those cases in where we have a fix total-ploidy.
else if (priorByTotalPloidy.length - 1 < totalPloidy)
priorByTotalPloidy = Arrays.copyOf(priorByTotalPloidy,Math.max(priorByTotalPloidy.length << 1,totalPloidy + 1));
}
/**
* Given a total ploidy construct the allele prior probabilities array.
* @param totalPloidy the target total-ploidy. Code can assume that is a non-negative number.
*
* @return never {@code null}, an array of exactly {@code totalPloidy + 1} positions that satisifed the
* contract {@link #forTotalPloidy(int) forTotalPloidy(totalPloidy)}.
*/
protected abstract double[] buildPriors(final int totalPloidy);
}

View File

@ -0,0 +1,87 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import java.util.List;
/**
* Custom priors provided as an array.
*/
public class CustomAFPriorProvider extends AFPriorProvider {
private final double[] priors;
/**
*
* @param priorValues must exactly the number of genomes in the samples (the total ploidy).
*/
public CustomAFPriorProvider(final List<Double> priorValues) {
if (priorValues == null)
throw new IllegalArgumentException("the input prior values cannot be null");
priors = new double[priorValues.size() + 1];
int i = 1;
double sum = 0;
for (double value : priorValues) {
if (value <= 0 || value >= 1)
throw new IllegalArgumentException("the AF prior value "+ value + " is out of the valid interval (0,1)");
if (Double.isNaN(value))
throw new IllegalArgumentException("NaN is not a valid prior AF value");
priors[i++] = Math.log10(value);
sum += value;
}
if (sum >= 1)
throw new IllegalArgumentException("the AF prior value sum must be less than 1: " + sum);
priors[0] = Math.log10(1 - sum);
}
@Override
protected double[] buildPriors(final int totalPloidy) {
if (totalPloidy != priors.length - 1)
throw new IllegalStateException("requesting an invalid prior total ploidy " + totalPloidy + " != " + (priors.length - 1));
return priors;
}
}

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -286,8 +287,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
builder.attributes(attributes);
// create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array

View File

@ -59,9 +59,9 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
@ -81,8 +81,11 @@ import java.util.*;
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
protected final AFCalculatorProvider afCalculatorProvider ;
protected Logger logger;
protected final Config configuration;
@ -94,26 +97,12 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final SampleList samples;
private final double[] log10AlleleFrequencyPriorsSNPs;
private final AFPriorProvider log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels;
private final AFPriorProvider log10AlleleFrequencyPriorsIndels;
protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = getAlleleFrequencyCalculatorThreadLocal();
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger);
}
};
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
@ -124,19 +113,26 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/
protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) {
protected GenotypingEngine(final Config configuration, final SampleList samples,
final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider) {
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
if (samples == null)
throw new IllegalArgumentException("the sample list provided cannot be null");
if (afCalculatorProvider == null)
throw new IllegalArgumentException("the AF calculator provider cannot be null");
this.afCalculatorProvider = afCalculatorProvider;
this.configuration = configuration;
logger = Logger.getLogger(getClass());
this.samples = samples;
numberOfGenomes = this.samples.sampleCount() * configuration.genotypeArgs.samplePloidy;
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity,configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity,configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsSNPs = composeAlleleFrequencyPriorProvider(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity, configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = composeAlleleFrequencyPriorProvider(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity, configuration.genotypeArgs.inputPrior);
this.genomeLocParser = genomeLocParser;
}
@ -220,7 +216,10 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0)
return emptyCallContext(tracker,refContext,rawContext);
final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles);
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
@ -260,7 +259,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
builder.filter(LOW_QUAL_FILTER_NAME);
// create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, outputAlleles, true,configuration.genotypeArgs.samplePloidy);
final GenotypesContext genotypes = afCalculator.subsetAlleles(vc, defaultPloidy, outputAlleles, true);
builder.genotypes(genotypes);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
@ -333,7 +333,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* @param afcr the exact model calcualtion result.
* @return never {@code null}.
*/
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalcResult afcr) {
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afcr) {
final List<Allele> alleles = afcr.getAllelesUsedInGenotyping();
final int alternativeAlleleCount = alleles.size() - 1;
@ -496,14 +496,25 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return new VariantCallContext(vc, QualityUtils.phredScaleLog10CorrectRate(log10POfRef) >= configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING, false);
}
protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
/**
* Returns the log10 prior probability for all possible allele counts from 0 to N where N is the total number of
* genomes (total-ploidy).
*
* @param vc the target variant-context, use to determine the total ploidy thus the possible ACs.
* @param defaultPloidy default ploidy to be assume if we do not have the ploidy for some sample in {@code vc}.
* @param model the calculation model (SNP,INDEL or MIXED) whose priors are to be retrieved.
* @throws java.lang.NullPointerException if either {@code vc} or {@code model} is {@code null}
* @return never {@code null}, an array with exactly <code>total-ploidy(vc) + 1</code> positions.
*/
protected final double[] getAlleleFrequencyPriors( final VariantContext vc, final int defaultPloidy, final GenotypeLikelihoodsCalculationModel.Model model ) {
final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc,defaultPloidy);
switch (model) {
case SNP:
case GENERALPLOIDYSNP:
return log10AlleleFrequencyPriorsSNPs;
return log10AlleleFrequencyPriorsSNPs.forTotalPloidy(totalPloidy);
case INDEL:
case GENERALPLOIDYINDEL:
return log10AlleleFrequencyPriorsIndels;
return log10AlleleFrequencyPriorsIndels.forTotalPloidy(totalPloidy);
default:
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
@ -552,38 +563,20 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*
* @return never {@code null}.
*/
public static double[] computeAlleleFrequencyPriors(final int N, final double heterozygosity, final List<Double> inputPriors) {
private static AFPriorProvider composeAlleleFrequencyPriorProvider(final int N, final double heterozygosity, final List<Double> inputPriors) {
final double[] priors = new double[N + 1];
double sum = 0.0;
final AFPriorProvider result;
if (!inputPriors.isEmpty()) {
// user-specified priors
if (inputPriors.size() != N)
throw new UserException.BadArgumentValue("inputPrior","Invalid length of inputPrior vector: vector length must be equal to # samples +1 ");
int idx = 1;
for (final double prior: inputPriors) {
if (prior < 0.0)
throw new UserException.BadArgumentValue("Bad argument: negative values not allowed","inputPrior");
priors[idx++] = Math.log10(prior);
sum += prior;
}
return new CustomAFPriorProvider(inputPriors);
}
else
// for each i
for (int i = 1; i <= N; i++) {
final double value = heterozygosity / (double)i;
priors[i] = Math.log10(value);
sum += value;
}
// protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions)
if (sum > 1.0)
throw new UserException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed, or invalid values specified if input priors were provided - try reducing heterozygosity value or correct input priors.");
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[0] = Math.log10(1.0 - sum);
return priors;
return new HeterozygosityAFPriorProvider(heterozygosity);
}
/**
@ -642,7 +635,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final AFCalculationResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final HashMap<String, Object> attributes = new HashMap<>();

View File

@ -0,0 +1,92 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.utils.MathUtils;
import java.util.Arrays;
/**
* Allele frequency prior provider based on heterozygosity.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class HeterozygosityAFPriorProvider extends AFPriorProvider {
private final double heterozygosity;
private final double log10Heterozygosity;
/**
* Construct a new provider given the heterozygosity value.
* @param heterozygosity must be a valid heterozygosity between larger than 0 and smaller than 1.
* @throws IllegalArgumentException if {@code heterozygosity} is not valid one in the interval (0,1).
*/
public HeterozygosityAFPriorProvider(final double heterozygosity) {
if (heterozygosity <= 0)
throw new IllegalArgumentException("the heterozygosity must be greater than 0");
if (heterozygosity >= 1)
throw new IllegalArgumentException("the heterozygosity must be less than 1");
if (Double.isNaN(heterozygosity))
throw new IllegalArgumentException("the heterozygosity cannot be a NaN");
this.heterozygosity = heterozygosity;
this.log10Heterozygosity = Math.log10(heterozygosity);
}
@Override
protected double[] buildPriors(final int totalPloidy) {
final double[] result = new double [totalPloidy + 1];
Arrays.fill(result, log10Heterozygosity);
result[0] = Double.NEGATIVE_INFINITY;
MathUtils.Log10Cache.ensureCacheContains(totalPloidy);
for (int i = 1; i <= totalPloidy; i++)
result[i] -= MathUtils.Log10Cache.get(i);
final double log10Sum = MathUtils.approximateLog10SumLog10(result);
if (log10Sum >= 0)
throw new IllegalArgumentException("heterosygosity " + heterozygosity + " is too large of total ploidy " + totalPloidy);
result[0] = MathUtils.log10OneMinusPow10(log10Sum);
return result;
}
}

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -130,8 +131,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -148,6 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()));
b.PL(genotypeLikelihoods);
b.alleles(noCall);
b.DP(getFilteredDepth(pileup));
genotypes.add(b.make());

View File

@ -64,6 +64,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -180,7 +181,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
// create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create();
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( SampleGenotypeData sampleData : GLs ) {
final double[] allLikelihoods = sampleData.GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
@ -193,6 +195,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
gb.PL(genotypeLikelihoods);
gb.DP(sampleData.depth);
gb.alleles(noCall);
if (UAC.annotateAllSitesWithPLs)
gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true)));
genotypes.add(gb.make());

View File

@ -46,10 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
@ -59,18 +61,18 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter;
import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.baq.BAQ;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.io.PrintStream;
import java.util.*;
@ -284,7 +286,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
final AFCalculatorProvider afCalcAFCalculatorProvider = FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),UAC,logger);
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), afCalcAFCalculatorProvider, toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
@ -308,6 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {

View File

@ -54,7 +54,8 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.baq.BAQ;
@ -104,9 +105,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
*
* @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration, final AFCalculatorProvider afCalculatorProvider,
final GenomeAnalysisEngine toolkit) {
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),afCalculatorProvider,toolkit.getArguments().BAQMode);
}
@ -123,10 +124,10 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final SampleList samples, final GenomeLocParser genomeLocParser,
final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider,
final BAQ.CalculationMode baqCalculationMode) {
super(configuration,samples,genomeLocParser);
super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (baqCalculationMode == null)
throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
@ -358,7 +359,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
@Override
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final AFCalculationResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String,Object> result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef,
AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap);
@ -375,7 +376,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
private double calculateSLOD(final Map<String, AlignmentContext> stratifiedContexts,
final RefMetaDataTracker tracker,
final ReferenceContext refContext, final AFCalcResult AFresult,
final ReferenceContext refContext, final AFCalculationResult AFresult,
final List<Allele> allAllelesToUse,
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
@ -385,13 +386,13 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
final AFCalcResult forwardAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.FORWARD,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final AFCalculationResult forwardAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.FORWARD,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
final AFCalcResult reverseAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.REVERSE,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final AFCalculationResult reverseAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.REVERSE,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
@ -408,7 +409,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
return strandScore;
}
private AFCalcResult getDirectionalAfCalcResult(final AlignmentContextUtils.ReadOrientation orientation,
private AFCalculationResult getDirectionalAfCalcResult(final AlignmentContextUtils.ReadOrientation orientation,
final Map<String, AlignmentContext> stratifiedContexts,
final RefMetaDataTracker tracker,
final ReferenceContext refContext, List<Allele> allAllelesToUse,
@ -416,7 +417,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation,
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
@ -480,7 +483,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
AFline.append('\t');
AFline.append(i).append('/').append(numberOfGenomes).append('\t');
AFline.append(String.format("%.2f\t", ((float) i) / numberOfGenomes));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(vc,configuration.genotypeArgs.samplePloidy,model)[i]));
verboseWriter.println(AFline.toString());
}

View File

@ -1,263 +0,0 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.lang.reflect.Constructor;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Factory to make AFCalculations
*/
public class AFCalcFactory {
/**
* Enumeration of usable AF calculation, their constraints (i.e. ploidy).
*
* Note that the order these occur in the enum is the order of preference, so
* the first value is taken over the second when multiple calculations satisfy
* the needs of the request (i.e., considering ploidy).
*/
public enum Calculation {
/** default implementation */
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1),
/** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1),
/** original biallelic exact model, for testing only */
EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2),
/** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */
EXACT_GENERAL_PLOIDY("GeneralPloidyExactAFCalc", -1, -1);
/**
* Must be a name because we look this up dynamically
*/
public final String className;
public final int maxAltAlleles;
public final int requiredPloidy;
private Calculation(final String className, final int requiredPloidy, final int maxAltAlleles) {
this.className = className;
this.requiredPloidy = requiredPloidy;
this.maxAltAlleles = maxAltAlleles;
}
private Calculation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) {
this(clazz.getSimpleName(), requiredPloidy, maxAltAlleles);
}
public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) {
return (requiredPloidy == -1 || requiredPloidy == requestedPloidy)
&& (maxAltAlleles == -1 || maxAltAlleles >= requestedMaxAltAlleles);
}
public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; }
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) {
final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel;
if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferred;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
}
private static final Map<String, Class<? extends AFCalc>> afClasses;
static {
afClasses = new PluginManager<AFCalc>(AFCalc.class).getPluginsByName();
}
private AFCalcFactory() {
}
private static Class<? extends AFCalc> getClassByName(final String name) {
for ( final Class<? extends AFCalc> clazz : afClasses.values() ) {
if ( clazz.getSimpleName().contains(name) ) {
return clazz;
}
}
return null;
}
/**
* Create a new AFCalc based on the parameters in the UAC
*
* @param UAC the UnifiedArgumentCollection containing the command-line parameters for the caller
* @param nSamples the number of samples we will be using
* @param logger an optional (can be null) logger to override the default in the model
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples, final boolean emitConfModel,
final Logger logger) {
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
UAC.requestedAlleleFrequencyCalculationModel);
final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy);
if ( logger != null ) calc.setLogger(logger);
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
return calc;
}
/**
* Create a new AFCalc, choosing the best implementation based on the given parameters, assuming
* that we will only be requesting bi-allelic variants to diploid genotypes
*
* @param nSamples the number of samples we'll be using
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final int nSamples) {
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2);
}
/**
* Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes
*
* @param calc the calculation we'd like to use
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles for both SNPs and indels
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) {
return createAFCalc(calc, nSamples, maxAltAlleles, 2);
}
/**
* Create a new AFCalc, choosing the best implementation based on the given parameters
*
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles to consider for SNPs
* @param ploidy the sample ploidy. Must be consistent with the calc
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
}
/**
* Choose the best calculation for nSamples and ploidy
*
* @param nSamples
* @param ploidy
* @param maxAltAlleles
* @return
*/
private static Calculation chooseBestCalculation(final int nSamples, final int ploidy, final int maxAltAlleles) {
for ( final Calculation calc : Calculation.values() ) {
if ( calc.usableForParams(ploidy, maxAltAlleles) ) {
return calc;
}
}
throw new IllegalStateException("no calculation found that supports nSamples " + nSamples + " ploidy " + ploidy + " and maxAltAlleles " + maxAltAlleles);
}
/**
* Create a new AFCalc
*
* @param calc the calculation to use
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles to consider for SNPs
* @param ploidy the sample ploidy. Must be consistent with the calc
*
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null");
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
if ( ! calc.usableForParams(ploidy, maxAltAlleles) )
throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy);
final Class<? extends AFCalc> afClass = getClassByName(calc.className);
if ( afClass == null )
throw new IllegalArgumentException("Unexpected AFCalc " + calc);
try {
Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy};
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class);
return (AFCalc)c.newInstance(args);
} catch (Exception e) {
throw new ReviewedGATKException("Could not instantiate AFCalc " + calc, e);
}
}
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int ploidy) {
final List<AFCalc> AFCalcs = new LinkedList<AFCalc>();
for ( final Calculation calc : calcs )
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy));
return AFCalcs;
}
}

View File

@ -63,7 +63,7 @@ import java.util.*;
* Note that all of the values -- i.e. priors -- are checked now that they are meaningful, which means
* that users of this code can rely on the values coming out of these functions.
*/
public class AFCalcResult {
public class AFCalculationResult {
private final static int AF0 = 0;
private final static int AF1p = 1;
private final static int LOG_10_ARRAY_SIZES = 2;
@ -89,12 +89,12 @@ public class AFCalcResult {
/**
* Create a results object capability of storing results for calls with up to maxAltAlleles
*/
public AFCalcResult(final int[] alleleCountsOfMLE,
final int nEvaluations,
final List<Allele> allelesUsedInGenotyping,
final double[] log10LikelihoodsOfAC,
final double[] log10PriorsOfAC,
final Map<Allele, Double> log10pRefByAllele) {
public AFCalculationResult(final int[] alleleCountsOfMLE,
final int nEvaluations,
final List<Allele> allelesUsedInGenotyping,
final double[] log10LikelihoodsOfAC,
final double[] log10PriorsOfAC,
final Map<Allele, Double> log10pRefByAllele) {
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
@ -123,8 +123,8 @@ public class AFCalcResult {
* @param log10PriorsOfAC
* @return
*/
public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
public AFCalculationResult withNewPriors(final double[] log10PriorsOfAC) {
return new AFCalculationResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
}
/**

View File

@ -61,35 +61,26 @@ import java.util.List;
/**
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
*/
public abstract class AFCalc implements Cloneable {
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
public abstract class AFCalculator implements Cloneable {
private final static Logger defaultLogger = Logger.getLogger(AFCalculator.class);
protected final int nSamples;
protected final int maxAlternateAllelesToGenotype;
protected Logger logger = defaultLogger;
private SimpleTimer callTimer = new SimpleTimer();
private final StateTracker stateTracker;
private StateTracker stateTracker;
private ExactCallLogger exactCallLogger = null;
/**
* Create a new AFCalc object capable of calculating the prob. that alleles are
* segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels
* for indels for samples with ploidy
* segregating among many samples.
*
* @param nSamples number of samples, must be > 0
* @param maxAltAlleles maxAltAlleles for SNPs
* @param ploidy the ploidy, must be > 0
* <p>
* Restrictions in ploidy and number of alternative alleles that a instance can handle will be determined
* by its implementation class {@link AFCalculatorImplementation}
* </p>
*/
protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy);
this.nSamples = nSamples;
this.maxAlternateAllelesToGenotype = maxAltAlleles;
this.stateTracker = new StateTracker(maxAltAlleles);
protected AFCalculator() {
}
/**
@ -118,19 +109,18 @@ public abstract class AFCalc implements Cloneable {
* @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i)
* @return result (for programming convenience)
*/
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) {
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
// reset the result, so we can store our new result there
stateTracker.reset();
final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles);
final VariantContext vcWorking = reduceScope(vc);
final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles);
callTimer.start();
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
final AFCalculationResult result = computeLog10PNonRef(vcWorking, defaultPloidy, log10AlleleFrequencyPriors, stateTracker);
final long nanoTime = callTimer.getElapsedTimeNano();
if ( exactCallLogger != null )
@ -140,19 +130,19 @@ public abstract class AFCalc implements Cloneable {
}
/**
* Convert the final state of the state tracker into our result as an AFCalcResult
* Convert the final state of the state tracker into our result as an AFCalculationResult
*
* Assumes that stateTracker has been updated accordingly
*
* @param vcWorking the VariantContext we actually used as input to the calc model (after reduction)
* @param log10AlleleFrequencyPriors the priors by AC vector
* @return a AFCalcResult describing the result of this calculation
* @return a AFCalculationResult describing the result of this calculation
*/
@Requires("stateTracker.getnEvaluations() >= 0")
@Ensures("result != null")
protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) {
protected AFCalculationResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors);
return stateTracker.toAFCalculationResult(log10AlleleFrequencyPriors);
}
// ---------------------------------------------------------------------------
@ -174,7 +164,7 @@ public abstract class AFCalc implements Cloneable {
*/
@Requires({"vc != null", "vc.getNAlleles() > 1"})
@Ensures("result != null")
protected abstract VariantContext reduceScope(final VariantContext vc);
protected abstract VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles);
/**
* Actually carry out the log10PNonRef calculation on vc, storing results in results
@ -185,8 +175,8 @@ public abstract class AFCalc implements Cloneable {
* @return a AFCalcResult object describing the results of this calculation
*/
@Requires({"vc != null", "log10AlleleFrequencyPriors != null", "vc.getNAlleles() > 1"})
protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors);
protected abstract AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker);
/**
* Subset VC to the just allelesToUse, updating genotype likelihoods
@ -194,15 +184,15 @@ public abstract class AFCalc implements Cloneable {
* Must be overridden by concrete subclasses
*
* @param vc variant context with alleles and genotype likelihoods
* @param defaultPloidy default ploidy to assume in case {@code vc} does not indicate it for a sample.
* @param allelesToUse alleles to subset
* @param assignGenotypes
* @param ploidy
* @return GenotypesContext object
*/
public abstract GenotypesContext subsetAlleles(final VariantContext vc,
final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy);
final boolean assignGenotypes);
// ---------------------------------------------------------------------------
//
@ -210,12 +200,41 @@ public abstract class AFCalc implements Cloneable {
//
// ---------------------------------------------------------------------------
public int getMaxAltAlleles() {
return maxAlternateAllelesToGenotype;
}
protected StateTracker getStateTracker() {
/**
* Retrieves the state tracker.
*
* <p>
* The tracker will be reset if so requested or if it needs to be resized due to an increase in the
* maximum number of alleles is must be able to handle.
* </p>
*
* @param reset make sure the tracker is reset.
* @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if
* the current tracker is able to handle that number.
*
* @return never {@code null}
*/
protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) {
if (stateTracker == null)
stateTracker = new StateTracker(maximumAlternativeAlleleCount);
else if (reset)
stateTracker.reset(maximumAlternativeAlleleCount);
else
stateTracker.ensureMaximumAlleleCapacity(maximumAlternativeAlleleCount);
return stateTracker;
}
/**
* Used by testing code.
*
* Please don't use this method in production.
*
* @deprecated
*/
@Deprecated
protected int getAltAlleleCountOfMAP(final int allele) {
return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele];
}
}

View File

@ -0,0 +1,135 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
/**
* Factory to make AFCalculations
*/
public class AFCalculatorFactory {
/**
* Create a new AFCalc, choosing the best implementation based on the given parameters, assuming
* that we will only be requesting bi-allelic variants to diploid genotypes
*
* @return an initialized AFCalc
*
* @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the
* ploidy as this is a sign of a hidden diploid assumption which is bad.
*/
@Deprecated
public static AFCalculator createCalculatorForDiploidBiAllelicAnalysis() {
return AFCalculatorImplementation.bestValue(2,1,null).newInstance();
}
/**
* Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes
*
*
* @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the
* ploidy as this is a sign of a hidden diploid assumption which is bad.
* @return an initialized AFCalc
*/
@Deprecated
public static AFCalculator createCalculatorForDiploidAnalysis() {
return AFCalculatorImplementation.bestValue(2,AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,null).newInstance();
}
/**
* Create a new AFCalc, choosing the best implementation based on the given parameters
*
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles to consider for SNPs
* @param ploidy the sample ploidy. Must be consistent with the calc
*
* @return an initialized AFCalc
*/
public static AFCalculator createCalculator(final int nSamples, final int maxAltAlleles, final int ploidy) {
return createCalculator(chooseBestImplementation(ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
}
/**
* Choose the best calculation for nSamples and ploidy
*
* @param ploidy
* @param maxAltAlleles
* @return
*/
private static AFCalculatorImplementation chooseBestImplementation(final int ploidy, final int maxAltAlleles) {
for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) {
if ( calc.usableForParams(ploidy, maxAltAlleles) ) {
return calc;
}
}
throw new IllegalStateException("no calculation found that supports nSamples " + ploidy + " and maxAltAlleles " + maxAltAlleles);
}
/**
* Create a new AFCalc
*
* @param implementation the calculation to use
* @param nSamples the number of samples we'll be using
* @param maxAltAlleles the max. alt alleles to consider for SNPs
* @param ploidy the sample ploidy. Must be consistent with the implementation
*
* @return an initialized AFCalc
*/
public static AFCalculator createCalculator(final AFCalculatorImplementation implementation, final int nSamples, final int maxAltAlleles, final int ploidy) {
if ( implementation == null ) throw new IllegalArgumentException("Calculation cannot be null");
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
if ( ! implementation.usableForParams(ploidy, maxAltAlleles) )
throw new IllegalArgumentException("AFCalc " + implementation + " does not support requested ploidy " + ploidy);
return implementation.newInstance();
}
}

View File

@ -0,0 +1,236 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import java.lang.reflect.Constructor;
import java.lang.reflect.Modifier;
import java.util.HashMap;
import java.util.Map;
/**
* Enumeration of usable AF calculation, their constraints (i.e. ploidy).
*
* Note that the order these occur in the enum is the order of preference, so
* the first value is taken over the second when multiple calculations satisfy
* the needs of the request (i.e., considering ploidy).
*/
public enum AFCalculatorImplementation {
/** default implementation */
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalculator.class, 2),
/** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */
EXACT_REFERENCE(ReferenceDiploidExactAFCalculator.class, 2),
/** original biallelic exact model, for testing only */
EXACT_ORIGINAL(OriginalDiploidExactAFCalculator.class, 2, 2),
/** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */
EXACT_GENERAL_PLOIDY(GeneralPloidyExactAFCalculator.class);
/**
* Special max alt allele count indicating that this maximum is in fact unbound (can be anything).
*/
public final static int UNBOUND_ALTERNATIVE_ALLELE_COUNT = -1;
/**
* Special ploidy constant that indicates that in fact the ploidy is unbound (can be anything).
*/
public final static int UNBOUND_PLOIDY = -1;
private static Map<Class<? extends AFCalculator>,AFCalculatorImplementation> calculatorClassToValue = buildCalculatorClassToValueMap();
/**
* Reference to the calculator class.
*/
public final Class<? extends AFCalculator> calculatorClass;
/**
* Maximum number of supported alternative alleles.
*/
public final int maxAltAlleles;
/**
* Reference to the constructor to instantiate a calculator for this implementation.
*/
protected final Constructor<? extends AFCalculator> constructor;
/**
* Supported ploidy.
*
* This is equal to {@link #UNBOUND_PLOIDY} if the class can handle any ploidy.
*/
public final int requiredPloidy;
/**
* Reference to the default implementation.
*/
public final static AFCalculatorImplementation DEFAULT = EXACT_INDEPENDENT;
/**
* Constructs a new instance given all its properties
* @param clazz the calculator class that realizes this implementation.
* @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported.
* @param maxAltAlleles the maximum alternative alleles; zero or greater or {@link #UNBOUND_ALTERNATIVE_ALLELE_COUNT} to indicate that any maximum number of alternative alleles is supported.
*/
AFCalculatorImplementation(final Class<? extends AFCalculator> clazz, final int requiredPloidy, final int maxAltAlleles) {
calculatorClass = clazz;
this.requiredPloidy = requiredPloidy;
this.maxAltAlleles = maxAltAlleles;
this.constructor = findInstantiationConstructor(calculatorClass);
}
/**
* Constructs a new instance leaving ploidy and max-allele count unbound.
* @param clazz the calculator class that realizes this implementation.
*/
AFCalculatorImplementation(final Class<? extends AFCalculator> clazz) {
this(clazz,UNBOUND_PLOIDY, UNBOUND_ALTERNATIVE_ALLELE_COUNT);
}
/** Constructs a new instance leaving max-allele count unbound.
* @param clazz the calculator class that realizes this implementation.
* @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported.
*/
AFCalculatorImplementation(final Class<? extends AFCalculator> clazz, final int requiredPloidy) {
this(clazz,requiredPloidy,UNBOUND_PLOIDY);
}
/**
* Checks whether a given ploidy and max alternative alleles combination is supported or not.
* @param requestedPloidy the targeted ploidy.
* @param requestedMaxAltAlleles the targeted max alternative alleles.
* @return {@code true} iff this calculator implementation satisfies both requirements.
*/
public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) {
return (requiredPloidy == UNBOUND_PLOIDY || requiredPloidy == requestedPloidy)
&& (maxAltAlleles == UNBOUND_ALTERNATIVE_ALLELE_COUNT || maxAltAlleles >= requestedMaxAltAlleles);
}
/**
* Resolve the constructor to use to instantiate calculators.
*
* @param clazz target class. Assume not to be {@code null}.
*/
private Constructor<? extends AFCalculator> findInstantiationConstructor(final Class<? extends AFCalculator> clazz) {
if (Modifier.isAbstract(clazz.getModifiers()))
throw new IllegalStateException("AF calculator implementation class cannot be abstract");
final Constructor<? extends AFCalculator> result;
try {
result = clazz.getDeclaredConstructor();
} catch (final NoSuchMethodException e) {
throw new IllegalStateException("cannot find a suitable (int,int) constructor for the AFCalculator implementation " + this + " class " + clazz.getName());
}
// Check whether there will be issue calling the constructor just due to protections:
if (Modifier.isPrivate(result.getModifiers()) || (!Modifier.isPublic(result.getModifiers()) && !clazz.getPackage().equals(getClass().getPackage())))
throw new IllegalStateException("triple int constructor for AFCalculator implementation " + this + " class " + clazz.getName() + " is not public ");
return result;
}
/**
* Creates new instance
*
* @throws IllegalStateException if the instance could not be create due to some exception. The {@link Exception#getCause() cause} will hold a reference to the actual exception.
* @return never {@code null}.
*/
public AFCalculator newInstance() {
try {
return constructor.newInstance();
} catch (final Throwable e) {
throw new IllegalStateException("could not instantiate AFCalculator for implementation " + this + " class " + calculatorClass.getName());
}
}
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
*
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferred a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static AFCalculatorImplementation bestValue(final int requiredPloidy, final int requiredAlternativeAlleleCount, final AFCalculatorImplementation preferred) {
final AFCalculatorImplementation preferredValue = preferred == null ? DEFAULT : preferred;
if (preferredValue.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferredValue;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
/**
* Returns the value that corresponds to a given implementation calculator class.
*
* @param clazz the target class.
*
* @throws IllegalArgumentException if {@code clazz} is {@code null} or if it is abstract.
* @throws IllegalStateException if
*
* @return never {@code null}.
*/
public static AFCalculatorImplementation fromCalculatorClass(final Class<? extends AFCalculator> clazz) {
if (clazz == null)
throw new IllegalArgumentException("input class cannot be null");
final AFCalculatorImplementation result = calculatorClassToValue.get(clazz);
if (result == null)
throw new IllegalStateException("Attempt to retrieve AFCalculatorImplementation instance from a non-registered calculator class " + clazz.getName());
return result;
}
// Initializes the content of the class to value map.
private static Map<Class<? extends AFCalculator>, AFCalculatorImplementation> buildCalculatorClassToValueMap() {
final Map<Class<? extends AFCalculator>,AFCalculatorImplementation> result = new HashMap<>(values().length);
for (final AFCalculatorImplementation value : values())
if (result.put(value.calculatorClass,value) != null)
throw new IllegalStateException("more than one value associated with class " + value.calculatorClass.getName());
return result;
}
}

View File

@ -60,6 +60,7 @@ import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.io.*;
import java.util.*;
@ -68,8 +69,8 @@ import java.util.*;
* A simple GATK utility (i.e, runs from command-line) for assessing the performance of
* the exact model
*/
public class AFCalcPerformanceTest {
final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class);
public class AFCalculatorPerformanceTest {
final static Logger logger = Logger.getLogger(AFCalculatorPerformanceTest.class);
private static abstract class Analysis {
final GATKReport report;
@ -78,7 +79,7 @@ public class AFCalcPerformanceTest {
report = GATKReport.newSimpleReport(name, columns);
}
public abstract void run(final AFCalcTestBuilder testBuilder,
public abstract void run(final AFCalculatorTestBuilder testBuilder,
final List<Object> coreColumns);
public String getName() {
@ -95,18 +96,18 @@ public class AFCalcPerformanceTest {
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac", "n.alt.seg", "other.ac"));
}
public void run(final AFCalcTestBuilder testBuilder, final List<Object> coreValues) {
public void run(final AFCalculatorTestBuilder testBuilder, final List<Object> coreValues) {
final SimpleTimer timer = new SimpleTimer();
for ( final int nonTypePL : Arrays.asList(100) ) {
final AFCalc calc = testBuilder.makeModel();
final AFCalculator calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
timer.start();
final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors);
final AFCalculationResult resultTracker = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors);
final long runtime = timer.getElapsedTimeNano();
int otherAC = 0;
@ -154,11 +155,11 @@ public class AFCalcPerformanceTest {
super("AnalyzeBySingletonPosition", Utils.append(columns, "non.type.pls", "position.of.singleton"));
}
public void run(final AFCalcTestBuilder testBuilder, final List<Object> coreValues) {
public void run(final AFCalculatorTestBuilder testBuilder, final List<Object> coreValues) {
final SimpleTimer timer = new SimpleTimer();
for ( final int nonTypePL : Arrays.asList(100) ) {
final AFCalc calc = testBuilder.makeModel();
final AFCalculator calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
final int[] ac = new int[testBuilder.numAltAlleles];
@ -172,7 +173,7 @@ public class AFCalcPerformanceTest {
vcb.genotypes(genotypes);
timer.start();
final AFCalcResult resultTracker = calc.getLog10PNonRef(vcb.make(), priors);
final AFCalculationResult resultTracker = calc.getLog10PNonRef(vcb.make(), HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors);
final long runtime = timer.getElapsedTimeNano();
final List<Object> columns = new LinkedList<Object>(coreValues);
@ -188,11 +189,11 @@ public class AFCalcPerformanceTest {
super("AnalyzeByNonInformative", Utils.append(columns, "non.type.pls", "n.non.informative"));
}
public void run(final AFCalcTestBuilder testBuilder, final List<Object> coreValues) {
public void run(final AFCalculatorTestBuilder testBuilder, final List<Object> coreValues) {
final SimpleTimer timer = new SimpleTimer();
for ( final int nonTypePL : Arrays.asList(100) ) {
final AFCalc calc = testBuilder.makeModel();
final AFCalculator calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
final int[] ac = new int[testBuilder.numAltAlleles];
@ -202,7 +203,7 @@ public class AFCalcPerformanceTest {
final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL);
timer.start();
final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors);
final AFCalculationResult resultTracker = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors);
final long runtime = timer.getElapsedTimeNano();
final List<Object> columns = new LinkedList<Object>(coreValues);
@ -214,10 +215,10 @@ public class AFCalcPerformanceTest {
}
private static class ModelParams {
final AFCalcFactory.Calculation modelType;
final AFCalculatorImplementation modelType;
final int maxBiNSamples, maxTriNSamples;
private ModelParams(AFCalcFactory.Calculation modelType, int maxBiNSamples, int maxTriNSamples) {
private ModelParams(AFCalculatorImplementation modelType, int maxBiNSamples, int maxTriNSamples) {
this.modelType = modelType;
this.maxBiNSamples = maxBiNSamples;
this.maxTriNSamples = maxTriNSamples;
@ -238,6 +239,7 @@ public class AFCalcPerformanceTest {
SINGLE,
EXACT_LOG
}
public static void main(final String[] args) throws Exception {
final TTCCLayout layout = new TTCCLayout();
layout.setThreadPrinting(false);
@ -269,12 +271,12 @@ public class AFCalcPerformanceTest {
final List<ExactCallLogger.ExactCall> loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser);
for ( final ExactCallLogger.ExactCall call : loggedCalls ) {
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(call.vc.getNSamples(), 1,
AFCalculatorImplementation.EXACT_INDEPENDENT,
AFCalculatorTestBuilder.PriorType.human);
logger.info(call);
final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
final AFCalculationResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles,testBuilder.makePriors());
final long newNanoTime = timer.getElapsedTimeNano();
if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
logger.info("**** ONE IS POLY");
@ -297,14 +299,14 @@ public class AFCalcPerformanceTest {
final int nSamples = Integer.valueOf(args[1]);
final int ac = Integer.valueOf(args[2]);
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(nSamples, 1,
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(nSamples, 1,
AFCalculatorImplementation.EXACT_INDEPENDENT,
AFCalculatorTestBuilder.PriorType.human);
final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100);
final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors());
final AFCalculationResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, testBuilder.makePriors());
final long runtime = timer.getElapsedTimeNano();
logger.info("result " + resultTracker.getLog10PosteriorOfAFGT0());
logger.info("runtime " + runtime);
@ -317,14 +319,14 @@ public class AFCalcPerformanceTest {
final PrintStream out = new PrintStream(new FileOutputStream(args[1]));
final List<ModelParams> modelParams = Arrays.asList(
new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10),
new ModelParams(AFCalculatorImplementation.EXACT_REFERENCE, 10000, 10),
// new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10),
new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000));
new ModelParams(AFCalculatorImplementation.EXACT_INDEPENDENT, 10000, 1000));
final boolean ONLY_HUMAN_PRIORS = false;
final List<AFCalcTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
? Arrays.asList(AFCalcTestBuilder.PriorType.values())
: Arrays.asList(AFCalcTestBuilder.PriorType.human);
final List<AFCalculatorTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
? Arrays.asList(AFCalculatorTestBuilder.PriorType.values())
: Arrays.asList(AFCalculatorTestBuilder.PriorType.human);
final List<Analysis> analyzes = new ArrayList<Analysis>();
analyzes.add(new AnalyzeByACAndPL(coreColumns));
@ -336,9 +338,9 @@ public class AFCalcPerformanceTest {
for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) {
for ( final ModelParams modelToRun : modelParams) {
if ( modelToRun.meetsConstraints(nAltAlleles, nSamples) ) {
for ( final AFCalcTestBuilder.PriorType priorType : priorTypes ) {
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, nAltAlleles, modelToRun.modelType, priorType);
for ( final AFCalculatorTestBuilder.PriorType priorType : priorTypes ) {
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(nSamples, nAltAlleles, modelToRun.modelType, priorType);
for ( final Analysis analysis : analyzes ) {
logger.info(Utils.join("\t", Arrays.asList(iteration, nAltAlleles, nSamples, modelToRun.modelType, priorType, analysis.getName())));

View File

@ -0,0 +1,101 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
/**
* Instantiates Exact AF calculators given the required ploidy specs.
*
* <p>Unless you know better implementations of this class might return the same instance several times
* and so the client code might need to make sure that there are no collisions or race conditions.</p>
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public abstract class AFCalculatorProvider {
/**
* Returns a AF calculator capable to handle a particular variant-context.
* @param variantContext the target context build.
* @param defaultPloidy the assumed ploidy in case that there is no a GT call present to determine it.
* @return never {@code null}
*/
public AFCalculator getInstance(final VariantContext variantContext, final int defaultPloidy, final int maximumAltAlleles) {
if (variantContext == null)
throw new IllegalArgumentException("variant context cannot be null");
final int sampleCount = variantContext.getNSamples();
if (sampleCount == 0)
return getInstance(defaultPloidy,maximumAltAlleles);
final GenotypesContext genotypes = variantContext.getGenotypes();
final Genotype firstGenotype = genotypes.get(0);
int ploidy = firstGenotype.getPloidy();
if (ploidy <= 0) ploidy = defaultPloidy;
for (int i = 1 ; i < sampleCount; i++) {
final Genotype genotype = genotypes.get(i);
final int declaredPloidy = genotype.getPloidy();
final int actualPloidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
if (actualPloidy != ploidy) {
ploidy = AFCalculatorImplementation.UNBOUND_PLOIDY;
break;
}
}
return getInstance(ploidy,Math.min(variantContext.getNAlleles() - 1, maximumAltAlleles));
}
/**
* Returns a AF calculator given the required homogeneous ploidy and allele count (including the reference).
* @param ploidy the required ploidy.
* @param maximumAltAlleles the allele count.
* @return never {@code null}
*/
public abstract AFCalculator getInstance(final int ploidy, final int maximumAltAlleles);
}

View File

@ -51,13 +51,14 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class AFCalcTestBuilder {
public class AFCalculatorTestBuilder {
final static Allele A = Allele.create("A", true);
final static Allele C = Allele.create("C");
final static Allele G = Allele.create("G");
@ -70,11 +71,11 @@ public class AFCalcTestBuilder {
final int nSamples;
final int numAltAlleles;
final AFCalcFactory.Calculation modelType;
final AFCalculatorImplementation modelType;
final PriorType priorType;
public AFCalcTestBuilder(final int nSamples, final int numAltAlleles,
final AFCalcFactory.Calculation modelType, final PriorType priorType) {
public AFCalculatorTestBuilder(final int nSamples, final int numAltAlleles,
final AFCalculatorImplementation modelType, final PriorType priorType) {
this.nSamples = nSamples;
this.numAltAlleles = numAltAlleles;
this.modelType = modelType;
@ -99,8 +100,8 @@ public class AFCalcTestBuilder {
return nSamples;
}
public AFCalc makeModel() {
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2);
public AFCalculator makeModel() {
return AFCalculatorFactory.createCalculator(modelType, nSamples, getNumAltAlleles(), HomoSapiensConstants.DEFAULT_PLOIDY);
}
public double[] makePriors() {

View File

@ -0,0 +1,83 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import htsjdk.variant.variantcontext.VariantContext;
/**
* Produces independent AF calculators per thread.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public abstract class ConcurrentAFCalculatorProvider extends AFCalculatorProvider {
private final ThreadLocal<AFCalculatorProvider> threadLocal;
/**
* Create a new concurrent af-calculator provider instance.
*/
public ConcurrentAFCalculatorProvider() {
threadLocal = new ThreadLocal<AFCalculatorProvider>() {
public AFCalculatorProvider initialValue() {
return createProvider();
}
};
}
@Override
public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maxAltAlleleCount) {
return threadLocal.get().getInstance(vc,defaultPloidy,maxAltAlleleCount);
}
@Override
public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) {
return threadLocal.get().getInstance(ploidy, maxAltAlleleCount);
}
protected abstract AFCalculatorProvider createProvider();
}

View File

@ -55,19 +55,18 @@ import htsjdk.variant.variantcontext.VariantContext;
import java.util.*;
public abstract class DiploidExactAFCalc extends ExactAFCalc {
public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2);
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
public DiploidExactAFCalculator() {
}
@Override
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
final int numAlternateAlleles = vc.getNAlleles() - 1;
final ArrayList<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes(), true);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
@ -85,12 +84,13 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
indexesToACset.put(zeroSet.getACcounts(), zeroSet);
while ( !ACqueue.isEmpty() ) {
getStateTracker().incNEvaluations(); // keep track of the number of evaluations
stateTracker.incNEvaluations(); // keep track of the number of evaluations
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue,
indexesToACset, log10AlleleFrequencyPriors,stateTracker);
// clean up memory
indexesToACset.remove(set.getACcounts());
@ -98,17 +98,17 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
}
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker);
}
@Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
}
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) {
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
@ -140,18 +140,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final int numChr,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
//if ( DEBUG )
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors);
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, stateTracker);
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// can we abort early because the log10Likelihoods are so small?
if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) {
//if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK;
@ -227,7 +228,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
private void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
set.getLog10Likelihoods()[0] = 0.0; // the zero case
final int totalK = set.getACsum();
@ -238,8 +239,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return;
}
@ -261,7 +262,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// update the MLE if necessary
getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
stateTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts());
// apply the priors over each alternate allele
for ( final int ACcount : set.getACcounts().getCounts() ) {
@ -269,7 +270,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
stateTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts());
}
private void pushData(final ExactACset targetSet,
@ -324,12 +325,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
return coeff;
}
@Override
public GenotypesContext subsetAlleles(final VariantContext vc,
final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
final boolean assignGenotypes) {
if (defaultPloidy != 2)
throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy);
return allelesToUse.size() == 1
? GATKVariantContextUtils.subsetToRefOnly(vc, ploidy)
? GATKVariantContextUtils.subsetToRefOnly(vc, 2)
: GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse,
assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
}

View File

@ -55,10 +55,11 @@ import java.util.*;
/**
* Uses the Exact calculation of Heng Li
*/
abstract class ExactAFCalc extends AFCalc {
abstract class ExactAFCalculator extends AFCalculator {
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -68,7 +69,7 @@ abstract class ExactAFCalc extends AFCalc {
}
};
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* NON_REF alleles are place are last.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -83,7 +84,7 @@ abstract class ExactAFCalc extends AFCalc {
}
};
/**
* Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* the likelihood sum.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_INDEX_COMPARATOR = new Comparator<LikelihoodSum>() {
@ -93,8 +94,8 @@ abstract class ExactAFCalc extends AFCalc {
}
};
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
protected ExactAFCalculator() {
}
/**
@ -135,10 +136,10 @@ abstract class ExactAFCalc extends AFCalc {
}
@Override
protected VariantContext reduceScope(final VariantContext vc) {
protected VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles) {
// don't try to genotype too many alternate alleles
final List<Allele> inputAltAlleles = vc.getAlternateAlleles();
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles());
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles);
// only if output allele has reduced from the input alt allele set size we should care.
final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size();
@ -146,17 +147,17 @@ abstract class ExactAFCalc extends AFCalc {
if (altAlleleReduction == 0)
return vc;
else if (altAlleleReduction != 0) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles()
logger.warn("this tool is currently set to genotype at most " + maximumAlternativeAlleles
+ " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart()
+ " has " + (vc.getAlternateAlleles().size())
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<>(getMaxAltAlleles() + 1);
final List<Allele> alleles = new ArrayList<>(maximumAlternativeAlleles + 1);
alleles.add(vc.getReference());
alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles()));
alleles.addAll(reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles));
final VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(reduceScopeGenotypes(vc, alleles));
builder.genotypes(reduceScopeGenotypes(vc, defaultPloidy, alleles));
if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make());
return builder.make();
@ -170,7 +171,7 @@ abstract class ExactAFCalc extends AFCalc {
* @param numAllelesToChoose number of alleles to keep.
* @return the list of alternative allele to keep.
*/
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) {
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int defaultPloidy, final int numAllelesToChoose) {
// Look for the <NON_REF> allele to exclude it from the pruning if present.
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
@ -194,7 +195,7 @@ abstract class ExactAFCalc extends AFCalc {
}
// Calculate the allele likelihood sums.
reduceScopeCalculateLikelihoodSums(vc, likelihoodSums);
reduceScopeCalculateLikelihoodSums(vc, defaultPloidy, likelihoodSums);
// sort them by probability mass and choose the best ones
// Make sure that the <NON_REF> allele is last if present.
@ -227,7 +228,7 @@ abstract class ExactAFCalc extends AFCalc {
* @param vc source variant context.
* @param likelihoodSums where to update the likelihood sums.
*/
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums);
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums);
/**
* Transforms the genotypes of the variant context according to the new subset of possible alleles.
@ -236,5 +237,5 @@ abstract class ExactAFCalc extends AFCalc {
* @param allelesToUse possible alleles.
* @return never {@code null}, the new set of genotype calls for the reduced scope.
*/
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse);
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse);
}

View File

@ -83,9 +83,9 @@ public class ExactCallLogger implements Cloneable {
public static class ExactCall {
final VariantContext vc;
final long runtime;
final AFCalcResult originalCall;
final AFCalculationResult originalCall;
public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
public ExactCall(VariantContext vc, final long runtime, final AFCalculationResult originalCall) {
this.vc = vc;
this.runtime = runtime;
this.originalCall = originalCall;
@ -103,7 +103,7 @@ public class ExactCallLogger implements Cloneable {
protected final void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors,
final long runtimeNano,
final AFCalcResult result) {
final AFCalculationResult result) {
printCallElement(vc, "type", "ignore", vc.getType());
int allelei = 0;
@ -194,7 +194,7 @@ public class ExactCallLogger implements Cloneable {
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes);
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
final AFCalculationResult result = new AFCalculationResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
}
break;

View File

@ -0,0 +1,238 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
/**
* A single fixed instance AF calculator provider.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class FixedAFCalculatorProvider extends AFCalculatorProvider {
private final AFCalculator singleton;
private final boolean verifyRequests;
private final int maximumAltAlleleCount;
private final int ploidy;
/**
* Constructs a fixed AF Calculator provider.
* @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of
* supported alleles.
* @param logger the logger to be used by the AF calculator instance.
* @param verifyRequests whether this provider will verify that each request for the AF calculator meets the
* initial parameter values (ploidy, sample-count and maximum number of alleles.
*
* @throws NullPointerException if {@code configuration} is {@code null}, or it contains invalid values for
* sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0.
*/
public FixedAFCalculatorProvider(final StandardCallerArgumentCollection configuration,
final Logger logger, final boolean verifyRequests) {
this(configuration.requestedAlleleFrequencyCalculationModel,configuration.genotypeArgs,logger,verifyRequests);
}
/**
* Constructs a fixed AF Calculator provider.
*
* @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of
* supported alleles.
* @param logger the logger to be used by the AF calculator instance.
* @param verifyRequests whether this provider will verify that each request for the AF calculator meets the
* initial parameter values (ploidy, sample-count and maximum number of alleles.
*
* @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for
* sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0.
*/
public FixedAFCalculatorProvider(final GenotypeCalculationArgumentCollection configuration,
final Logger logger, final boolean verifyRequests) {
this(null,configuration,logger,verifyRequests);
}
/**
* Constructs a fixed AF Calculator provider.
*
* @param preferred preferred implementation.
* @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of
* supported alleles.
* @param logger the logger to be used by the AF calculator instance.
* @param verifyRequests whether this provider will verify that each request for the AF calculator meets the
* initial parameter values (ploidy, sample-count and maximum number of alleles.
*
* @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for
* sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0.
*/
public FixedAFCalculatorProvider(final AFCalculatorImplementation preferred, final GenotypeCalculationArgumentCollection configuration,
final Logger logger, final boolean verifyRequests) {
if (configuration == null)
throw new IllegalArgumentException("null configuration");
if (configuration == null)
throw new IllegalArgumentException("null configuration genotype arguments");
if (configuration.samplePloidy < 1)
throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy);
if (configuration.MAX_ALTERNATE_ALLELES < 0)
throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1));
ploidy = configuration.samplePloidy;
maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES;
singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance();
singleton.setLogger(logger);
this.verifyRequests = verifyRequests;
}
@Override
public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maximumAlleleCount) {
if (verifyRequests)
// supers implementation will call eventually one of the other methods, so no need to verify anything here.
return super.getInstance(vc,defaultPloidy,maximumAlleleCount);
return singleton;
}
@Override
public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) {
if (verifyRequests) {
if (this.ploidy != AFCalculatorImplementation.UNBOUND_PLOIDY && ploidy != this.ploidy)
throw new IllegalStateException("non-supported ploidy");
if (maximumAltAlleleCount != AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT && maxAltAlleleCount > maximumAltAlleleCount)
throw new IllegalStateException("non-supported alleleCount");
}
return singleton;
}
/**
* Creates a fixed AF calculator provider that is thread safe given the engine configuration.
*
* @param toolkit the engine.
* @param config the caller configuration.
* @param logger reference to the logger for the AF calculator to dump messages to.
*
* @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration
* like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles.
*
* @return never {@code null}
*/
public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit,
final StandardCallerArgumentCollection config,
final Logger logger) {
final GATKArgumentCollection arguments = toolkit.getArguments();
final boolean isMultithread = arguments.numberOfCPUThreadsPerDataThread > 1 || arguments.numberOfDataThreads > 1;
return !isMultithread
? new FixedAFCalculatorProvider(config,logger,false) :
new ConcurrentAFCalculatorProvider() {
@Override
protected AFCalculatorProvider createProvider() {
return new FixedAFCalculatorProvider(config,logger,false);
}
};
}
/**
* Creates a fixed AF calculator provider that is thread safe given the engine configuration.
*
* @param toolkit the engine.
* @param config the caller configuration.
* @param logger reference to the logger for the AF calculator to dump messages to.
* @param verifyRequests whether each request should be check for a compatible ploidy and max-alt-allele count.
* A non-compliant request would result in an exception when requesting the AF calculator instance.
*
* @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration
* like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles.
*
* @return never {@code null}.
*/
@SuppressWarnings("unused")
public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit,
final StandardCallerArgumentCollection config,
final Logger logger, final boolean verifyRequests) {
final GATKArgumentCollection arguments = toolkit.getArguments();
final boolean isMultithread = arguments.numberOfCPUThreadsPerDataThread > 1 || arguments.numberOfDataThreads > 1;
return !isMultithread
? new FixedAFCalculatorProvider(config,logger,verifyRequests) :
new ConcurrentAFCalculatorProvider() {
@Override
protected AFCalculatorProvider createProvider() {
return new FixedAFCalculatorProvider(config,logger,verifyRequests);
}
};
}
/**
* Creates a fixed AF calculator provider that is thread safe given the engine configuration.
*
* @param toolkit the engine.
* @param config the caller configuration.
* @param logger reference to the logger for the AF calculator to dump messages to.
*
* @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration
* like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles.
*
* @return never {@code null}
*/
public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit,
final GenotypeCalculationArgumentCollection config,
final Logger logger, final boolean verifyRequests) {
return toolkit.getArguments().numberOfCPUThreadsPerDataThread <= 1
? new FixedAFCalculatorProvider(config,logger,false) :
new ConcurrentAFCalculatorProvider() {
@Override
protected AFCalculatorProvider createProvider() {
return new FixedAFCalculatorProvider(config,logger,verifyRequests);
}
};
}
}

View File

@ -57,27 +57,25 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
public class GeneralPloidyExactAFCalc extends ExactAFCalc {
public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;
private final static boolean VERBOSE = false;
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
this.ploidy = ploidy;
protected GeneralPloidyExactAFCalculator() {
}
@Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
return subsetAlleles(vc,allelesToUse,false,ploidy);
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
return subsetAlleles(vc,defaultPloidy,allelesToUse,false);
}
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
combineSinglePools(vc.getGenotypes(), defaultPloidy, vc.getNAlleles(), log10AlleleFrequencyPriors);
return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker);
}
/**
@ -126,17 +124,29 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
}
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) {
final int numOriginalAltAlleles = likelihoodSums.length;
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final GenotypesContext genotypes = vc.getGenotypes();
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
if (!genotype.hasPL())
continue;
final double[] gls = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL)
continue;
final int PLindexOfBestGL = MathUtils.maxElementIndex(gls);
final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF];
final int declaredPloidy = genotype.getPloidy();
final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++)
if (acCount[k] > 0 )
likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]);
likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL;
}
}
@ -144,51 +154,51 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
* @param GLs Inputs genotypes context with per-pool GLs
* @param numAlleles Number of alternate alleles
* @param ploidyPerPool Number of samples per pool
* @param log10AlleleFrequencyPriors Frequency priors
*/
protected void combineSinglePools(final GenotypesContext GLs,
final int defaultPloidy,
final int numAlleles,
final int ploidyPerPool,
final double[] log10AlleleFrequencyPriors) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs, true);
int combinedPloidy = 0;
// Combine each pool incrementally - likelihoods will be renormalized at each step
CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods();
// first element: zero ploidy, e.g. trivial degenerate distribution
final int numAltAlleles = numAlleles - 1;
final int[] zeroCounts = new int[numAlleles];
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
set.getLog10Likelihoods()[0] = 0.0;
final int genotypeCount = genotypeLikelihoods.size();
final StateTracker stateTracker = getStateTracker(false,numAltAlleles);
int combinedPloidy = 0;
CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods();
combinedPoolLikelihoods.add(set);
getStateTracker().reset(numAlleles - 1);
if ( genotypeCount <= 1 ) {
// no meaningful GLs at all, just set the tracker to non poly values
// just mimic-ing call below
getStateTracker().setLog10LikelihoodOfAFzero(0.0);
} else {
for (int p=1; p<genotypeCount; p++) {
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
if (p < genotypeCount - 1) getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
}
for (final Genotype genotype : GLs.iterateInSampleNameOrder()) {
// recover gls and check if they qualify.
if (!genotype.hasPL())
continue;
final double[] gls = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL)
continue;
stateTracker.reset();
final int declaredPloidy = genotype.getPloidy();
final int ploidy = declaredPloidy < 1 ? defaultPloidy : declaredPloidy;
// they do qualify so we proceed.
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, gls,
combinedPloidy, ploidy, numAlleles, log10AlleleFrequencyPriors, stateTracker);
combinedPloidy = ploidy + combinedPloidy; // total number of chromosomes in combinedLikelihoods
}
if (combinedPloidy == 0)
stateTracker.setLog10LikelihoodOfAFzero(0.0);
}
public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
private CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool,
double[] newGL,
int originalPloidy,
int newGLPloidy,
int numAlleles,
final double[] log10AlleleFrequencyPriors) {
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>();
@ -206,11 +216,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// keep processing while we have AC conformations that need to be calculated
while ( !ACqueue.isEmpty() ) {
getStateTracker().incNEvaluations();
stateTracker.incNEvaluations();
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset, stateTracker);
// clean up memory
indexesToACset.remove(ACset.getACcounts());
@ -243,12 +253,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int originalPloidy,
final int newGLPloidy,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final StateTracker stateTracker) {
// compute likelihood in "set" of new set based on original likelihoods
final int numAlleles = set.getACcounts().getCounts().length;
final int newPloidy = set.getACsum();
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy);
final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, stateTracker);
// add to new pool
@ -256,7 +267,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
newPool.add(set);
// TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) {
return log10LofK;
}
@ -364,7 +375,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final CombinedPoolLikelihoods firstGLs,
final double[] secondGL,
final double[] log10AlleleFrequencyPriors,
final int numAlleles, final int ploidy1, final int ploidy2) {
final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) {
final int newPloidy = ploidy1 + ploidy2;
@ -381,9 +392,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
if ( totalAltK == 0 ) { // all-ref case
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
set.getLog10Likelihoods()[0] = log10Lof0;
getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0);
getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return log10Lof0;
} else {
@ -427,7 +437,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// update the MLE if necessary
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
// apply the priors over each alternate allele
for (final int ACcount : altCounts ) {
@ -435,7 +445,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
return log10LofK;
}
@ -462,21 +472,17 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* From a given variant context, extract a given subset of alleles, and update genotype context accordingly,
* including updating the PL's, and assign genotypes accordingly
* @param vc variant context with alleles and genotype likelihoods
* @param defaultPloidy ploidy to assume in case that {@code vc} does not contain that information
* for a sample.
* @param allelesToUse alleles to subset
* @param assignGenotypes true: assign hard genotypes, false: leave as no-call
* @param ploidy number of chromosomes per sample (pool)
* @return GenotypesContext with new PLs
*/
public GenotypesContext subsetAlleles(final VariantContext vc,
public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
final boolean assignGenotypes) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
List<Allele> NO_CALL_ALLELES = new ArrayList<>(ploidy);
for (int k=0; k < ploidy; k++)
NO_CALL_ALLELES.add(Allele.NO_CALL);
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
@ -492,8 +498,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
final int declaredPloidy = g.getPloidy();
final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy)));
continue;
}
@ -514,7 +522,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy)));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
@ -526,7 +534,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(NO_CALL_ALLELES);
gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy));
else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
newGTs.add(gb.make());

View File

@ -0,0 +1,121 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
/**
* Provider that defaults to the general ploidy implementation when the preferred one does not handle the required
* ploidy.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GeneralPloidyFailOverAFCalculatorProvider extends AFCalculatorProvider {
private final AFCalculator preferred;
private final AFCalculatorImplementation preferredImplementation;
private final AFCalculator failOver;
/**
* Creates a new AF calculator provider given the genotyping arguments and logger reference.
* @param genotypeArgs genotyping parameter collection.
* @param logger where the AF calculator logging messages go. If {@code null}, logging message will not be emitted.
*
* @throws IllegalStateException if {@code genotypeArgs} is {@code null}.
*/
public GeneralPloidyFailOverAFCalculatorProvider(final GenotypeCalculationArgumentCollection genotypeArgs, final Logger logger) {
if (genotypeArgs == null)
throw new IllegalArgumentException("genotype arguments object cannot be null");
preferredImplementation = AFCalculatorImplementation.bestValue(genotypeArgs.samplePloidy,genotypeArgs.MAX_ALTERNATE_ALLELES, null);
preferred = preferredImplementation.newInstance();
preferred.setLogger(logger);
failOver = AFCalculatorImplementation.EXACT_GENERAL_PLOIDY.newInstance();
failOver.setLogger(logger);
}
/**
* {@inheritDoc}
* @param ploidy {@inheritDoc}
* @param maximumAlternativeAlleles {@inheritDoc}
* @return {@inheritDoc}
*/
@Override
public AFCalculator getInstance(final int ploidy, final int maximumAlternativeAlleles) {
return preferredImplementation.usableForParams(ploidy,maximumAlternativeAlleles) ? preferred : failOver;
}
/**
* Creates a AF calculator provider that complies to the contract of this class and that make sures that is
* safe to use in multi-thread walker runs.
*
* <p>
* Whether this is part of a multi-thread run is determined by looking into the engine configuration in {@code toolkit}.
* </p>
*
* @param toolkit enclosing engine.
* @param genotypeArgs the genotyping arguments.
* @param logger where to output logging messages for instantiated AF calculators.
* @return never {@code null}.
*/
public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit,
final GenotypeCalculationArgumentCollection genotypeArgs,
final Logger logger) {
if (genotypeArgs == null)
throw new IllegalArgumentException("genotype arguments object cannot be null");
final GATKArgumentCollection arguments = toolkit.getArguments();
final boolean isMultithread = arguments.numberOfDataThreads > 1 || arguments.numberOfCPUThreadsPerDataThread > 1;
return isMultithread ? new ConcurrentAFCalculatorProvider() {
@Override
protected AFCalculatorProvider createProvider() {
return new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs,logger);
}
} : new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs,logger);
}
}

View File

@ -106,7 +106,7 @@ import java.util.*;
* as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i
* prior for the ith least likely allele.
*/
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
public class IndependentAllelesDiploidExactAFCalculator extends DiploidExactAFCalculator {
/**
* The min. confidence of an allele to be included in the joint posterior.
@ -116,47 +116,49 @@ import java.util.*;
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/**
* Sorts AFCalcResults by their posteriors of AF > 0, so the
*/
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
private final static class CompareAFCalculatorResultsByPNonRef implements Comparator<AFCalculationResult> {
@Override
public int compare(AFCalcResult o1, AFCalcResult o2) {
public int compare(AFCalculationResult o1, AFCalculationResult o2) {
return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
}
}
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
private final static CompareAFCalculatorResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalculatorResultsByPNonRef();
/**
* The AFCalc model we are using to do the bi-allelic computation
*/
final AFCalc biAlleleExactModel;
final AFCalculator biAlleleExactModel;
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy);
protected IndependentAllelesDiploidExactAFCalculator() {
super();
biAlleleExactModel = new ReferenceDiploidExactAFCalculator();
}
/**
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
*/
private static class MyAFCalcResult extends AFCalcResult {
private static class MyAFCalculationResult extends AFCalculationResult {
/**
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
*/
final List<AFCalcResult> supporting;
final List<AFCalculationResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
private MyAFCalculationResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalculationResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
this.supporting = supporting;
}
}
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
public AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) {
final List<AFCalculationResult> independentResultTrackers = computeAlleleIndependentExact(vc, defaultPloidy, log10AlleleFrequencyPriors);
if ( independentResultTrackers.size() == 0 )
throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc);
@ -166,7 +168,7 @@ import java.util.*;
return independentResultTrackers.get(0);
} else {
// we are a multi-allelic, so we need to actually combine the results
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
final List<AFCalculationResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
}
}
@ -181,12 +183,12 @@ import java.util.*;
*/
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
@Ensures("goodIndependentResult(vc, result)")
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
protected final List<AFCalculationResult> computeAlleleIndependentExact(final VariantContext vc, final int defaultPloidy,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
final List<AFCalculationResult> results = new LinkedList<AFCalculationResult>();
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
final AFCalculationResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors);
results.add(resultTracker);
}
@ -196,7 +198,7 @@ import java.util.*;
/**
* Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results
*/
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalcResult> results) {
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalculationResult> results) {
if ( results.size() != vc.getNAlleles() - 1) return false;
for ( int i = 0; i < results.size(); i++ ) {
if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 )
@ -387,8 +389,8 @@ import java.util.*;
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
protected final List<AFCalculationResult> applyMultiAllelicPriors(final List<AFCalculationResult> conditionalPNonRefResults) {
final ArrayList<AFCalculationResult> sorted = new ArrayList<AFCalculationResult>(conditionalPNonRefResults);
// sort the results, so the most likely allele is first
Collections.sort(sorted, compareAFCalcResultsByPNonRef);
@ -424,8 +426,8 @@ import java.util.*;
*
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
*/
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
protected AFCalculationResult combineIndependentPNonRefs(final VariantContext vc,
final List<AFCalculationResult> sortedResultsWithThetaNPriors) {
int nEvaluations = 0;
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
final int[] alleleCountsOfMLE = new int[nAltAlleles];
@ -437,7 +439,7 @@ import java.util.*;
double log10PosteriorOfACGt0Sum = 0.0;
boolean anyPoly = false;
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
for ( final AFCalculationResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
final int altI = vc.getAlleles().indexOf(altAllele) - 1;
@ -486,7 +488,7 @@ import java.util.*;
log10PosteriorOfACGt0 - log10PriorsOfAC[1]
};
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
return new MyAFCalculationResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
// necessary to ensure all values < 0
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
// priors incorporate multiple alt alleles, must be normalized

View File

@ -58,13 +58,16 @@ import java.util.Map;
/**
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
*/
class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
class OriginalDiploidExactAFCalculator extends DiploidExactAFCalculator {
protected OriginalDiploidExactAFCalculator() {
}
@Override
protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) {
protected AFCalculationResult computeLog10PNonRef(final VariantContext vc,
@SuppressWarnings("unused")
final int defaultPloidy,
final double[] log10AlleleFrequencyPriors,
final StateTracker stateTracker) {
final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length];
final double[] log10AlleleFrequencyPosteriors = new double[log10AlleleFrequencyPriors.length];
final Pair<Integer, Integer> result = linearExact(vc, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
@ -79,7 +82,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
return new AFCalculationResult(new int[]{mleK}, 0, vc.getAlleles(),
MathUtils.normalizeFromLog10(log10Likelihoods, true),
MathUtils.normalizeFromLog10(log10Priors, true),
log10pRefByAllele);
@ -122,7 +125,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
}
}
public Pair<Integer, Integer> linearExact(final VariantContext vc,
private Pair<Integer, Integer> linearExact(final VariantContext vc,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyLikelihoods,
double[] log10AlleleFrequencyPosteriors) {
@ -194,6 +197,6 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
logY.rotate();
}
return new Pair<Integer, Integer>(lastK, mleK);
return new Pair<>(lastK, mleK);
}
}

View File

@ -46,8 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
public class ReferenceDiploidExactAFCalculator extends DiploidExactAFCalculator {
protected ReferenceDiploidExactAFCalculator() {
}
}

View File

@ -57,9 +57,15 @@ import java.util.List;
import java.util.Map;
/**
* TODO this class (+AFCalculator) is a bit messy... it seems that it combines "debugging" (unnecessarily adding CPU cost in production)
* TODO but it also contains important part of the AF calculation state... why mix both!!!? It seems that the second
* TODO part could be just blend into AFCalculator ... one one hand you want to reduce classes code size ... but these
* TODO two classes code seems to be quite intertwine and makes it difficult to understand what is going on.
* in the production setting without much need
*
* Keeps track of the state information during the exact model AF calculation.
*
* Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior
* Tracks things like the MLE and MAP AC values, their corresponding likelihood and posterior
* values, the likelihood of the AF == 0 state, and the number of evaluations needed
* by the calculation to compute the P(AF == 0)
*/
@ -120,7 +126,7 @@ final class StateTracker {
* @param maxAltAlleles an integer >= 1
*/
public StateTracker(final int maxAltAlleles) {
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
if ( maxAltAlleles < 0 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles);
alleleCountsOfMLE = new int[maxAltAlleles];
alleleCountsOfMAP = new int[maxAltAlleles];
@ -170,7 +176,7 @@ final class StateTracker {
}
@Ensures("result >= 0")
protected int getnEvaluations() {
protected int getNEvaluations() {
return nEvaluations;
}
@ -206,7 +212,7 @@ final class StateTracker {
* @return an AFCalcResult summarizing the final results of this calculation
*/
@Requires("allelesUsedInGenotyping != null")
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
protected AFCalculationResult toAFCalculationResult(final double[] log10PriorsByAC) {
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
@ -218,7 +224,7 @@ final class StateTracker {
log10pRefByAllele.put(allele, log10PRef);
}
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
return new AFCalculationResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
}
// --------------------------------------------------------------------------------
@ -351,4 +357,9 @@ final class StateTracker {
this.allelesUsedInGenotyping = allelesUsedInGenotyping;
}
public void ensureMaximumAlleleCapacity(final int capacity) {
if (this.alleleCountsOfMAP.length < capacity)
reset(capacity);
}
}

View File

@ -68,6 +68,7 @@ import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
@ -96,6 +97,7 @@ import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.io.FileNotFoundException;
@ -697,7 +699,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// Seems that at least with some test data we can lose genuine haploid variation if we use
// UGs engine with ploidy == 1
simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC,
FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),simpleUAC,logger), toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
@ -707,7 +711,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),SCAC,logger), !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -880,7 +885,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// if we don't have any data, just abort early
return new ActivityProfileState(ref.getLocus(), 0.0);
final List<Allele> noCall = Collections.singletonList(Allele.NO_CALL); // used to noCall all genotypes until the exact model is applied
final int ploidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // used to noCall all genotypes until the exact model is applied
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();

View File

@ -51,8 +51,7 @@ import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
@ -71,7 +70,6 @@ import java.util.*;
*/
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL);
private static final int ALLELE_EXTENSION = 2;
private static final String phase01 = "0|1";
private static final String phase10 = "1|0";
@ -91,8 +89,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param genomeLocParser {@inheritDoc}
* @param doPhysicalPhasing whether to try physical phasing.
*/
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser);
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (genomeLocParser == null)
throw new IllegalArgumentException("the genome location parser provided cannot be null");
this.doPhysicalPhasing= doPhysicalPhasing;
@ -100,25 +98,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
genotypingModel = new InfiniteRandomMatingPopulationModel();
}
/**
* {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
this(configuration,samples,genomeLocParser,false);
}
@Override
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes,
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger);
}
};
}
/**
* Change the merge variant across haplotypes for this engine.
*
@ -226,6 +205,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// Walk along each position in the key set and create each event to be outputted
final Set<Haplotype> calledHaplotypes = new HashSet<>();
final List<VariantContext> returnCalls = new ArrayList<>();
final int ploidy = configuration.genotypeArgs.samplePloidy;
final List<Allele> noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy);
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
@ -276,7 +257,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (emitReferenceConfidence)
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC );
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC, noCallAlleles );
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
@ -698,14 +679,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/
@Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC ) {
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
final int sampleCount = samples.sampleCount();
final GenotypesContext result = GenotypesContext.create(sampleCount);
for (int s = 0; s < sampleCount; s++)
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(noCallAlleles).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
return result;
}
@ -769,23 +750,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return eventMapper;
}
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return NO_CALL; }
final List<Allele> eventAllelesForSample = new ArrayList<>();
for( final Allele a : haplotypeAllelesForSample ) {
final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
final List<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
if( mappedHaplotypes.contains(haplotype) ) {
eventAllelesForSample.add(eventAlleles.get(iii));
break;
}
}
}
return eventAllelesForSample;
}
@Deprecated
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) {
return new EventMap(haplotype, ref, refLoc, sourceNameToAdd);

View File

@ -48,6 +48,7 @@ package org.broadinstitute.gatk.tools.walkers.validation;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
@ -352,12 +353,15 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(uac,toolkit);
snpEngine = new UnifiedGenotypingEngine(uac,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit);
indelEngine = new UnifiedGenotypingEngine(uac_indel,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;

View File

@ -46,18 +46,20 @@
package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.TreeSet;
public class GLBasedSampleSelector extends SampleSelector {
private static final int MAX_ALT_ALLELES = 4;
double[] flatPriors = null;
final double referenceLikelihood;
AFCalc AFCalculator;
AFCalculator afCalculator;
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
super(sm);
@ -78,9 +80,9 @@ public class GLBasedSampleSelector extends SampleSelector {
// do we want to apply a prior? maybe user-spec?
if ( flatPriors == null ) {
flatPriors = new double[1+2*samples.size()];
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2);
afCalculator = AFCalculatorFactory.createCalculator(samples.size(), MAX_ALT_ALLELES, HomoSapiensConstants.DEFAULT_PLOIDY);
}
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
final AFCalculationResult result = afCalculator.getLog10PNonRef(subContext, HomoSapiensConstants.DEFAULT_PLOIDY, MAX_ALT_ALLELES, flatPriors);
// do we want to let this qual go up or down?
if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) {
return true;

View File

@ -63,11 +63,11 @@ import org.broadinstitute.gatk.engine.walkers.Window;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
@ -166,7 +166,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
toolkit.getArguments().BAQMode);
// create the annotation engine
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit);
@ -182,6 +183,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader);
logger.info("Notice that the -ploidy parameter is ignored in " + getClass().getSimpleName() + " tool as this is automatically determined by the input variant files");
}
public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
@ -192,23 +195,9 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true);
if ( combinedVC == null )
return null;
checkPloidy(combinedVC);
return regenotypeVC(tracker, ref, combinedVC);
}
private void checkPloidy(final VariantContext combinedVC) {
final int requiredPloidy = genotypeArgs.samplePloidy;
for (final Genotype g : combinedVC.getGenotypes()) {
if (g.getPloidy() != requiredPloidy) {
throw new UserException.BadArgumentValue("ploidy",
"the input variant data contains calls with a different ploidy than the one specified (" + requiredPloidy
+ "). For example sample (" + g.getSampleName() + ") at (" + combinedVC.getChr() + ":" + combinedVC.getStart() + ")");
}
}
}
/**
* Re-genotype (and re-annotate) a combined genomic VC
*

View File

@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -129,7 +130,9 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
final SampleList samples =
new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,UAC,logger),
toolkit.getArguments().BAQMode);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -56,6 +56,7 @@ import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
@ -78,7 +79,8 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final SampleList fakeSamples = SampleListUtils.singletonList("fake");
ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),engine.getArguments().BAQMode);
ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),
new FixedAFCalculatorProvider(args,null,true),engine.getArguments().BAQMode);
}
private UnifiedGenotypingEngine getEngine() {

View File

@ -69,16 +69,16 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "39cccae4a26c64e09631e28f17db2792");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c731cd0ddb0fa153bffa7bc19586bfa6");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5e4013183ca26b0d7c48ea2ec147d9d7");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "b6ed1729a25670d81aff61099fb13998");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "af69cf83a27d082d76b18fede7ac2699");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "54d718401c413fdbc1f801061bf8e7d1");
}
}

View File

@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","68a2aa0b213fa5a53e925542e340fbe8");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1426e311501a7bd96ecb75ed839f7c64");
}
@Test(enabled = true)

View File

@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("8084abd2460e934e483403aff76a3698"));
Arrays.asList("34a92dd832bbb8ed53abf21ba88c6faa"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}

View File

@ -65,7 +65,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("9824ea30340fd3632bad672fc6ade114"));
Arrays.asList("a15a28d854789c71ef2879dd4c606b1a"));
executeTest("test MultiSample Pilot1", spec);
}
@ -97,7 +97,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("2474c60302fc79f28bb0c721d305c09b"));
Arrays.asList("1a36b5c036e0452f526dc1a5fdd60929"));
executeTest("test Multiple SNP alleles", spec);
}

View File

@ -46,13 +46,14 @@
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import htsjdk.variant.variantcontext.*;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotypingEngine;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
@ -61,7 +62,7 @@ import org.testng.annotations.Test;
import java.util.*;
public class AFCalcUnitTest extends BaseTest {
public class AFCalculationUnitTest extends BaseTest {
static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C");
static Allele G = Allele.create("G");
@ -76,6 +77,19 @@ public class AFCalcUnitTest extends BaseTest {
final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug
final private static boolean DEBUG_ONLY = false;
protected static List<AFCalculator> createAFCalculators(final List<AFCalculatorImplementation> calcs, final int maxAltAlleles, final int ploidy) {
final List<AFCalculator> AFCalculators = new LinkedList<>();
for ( final AFCalculatorImplementation calc : calcs ) {
if (calc.usableForParams(ploidy,maxAltAlleles))
AFCalculators.add(calc.newInstance());
else
throw new IllegalStateException("cannot use " + calc + " calculator instance with combination " + maxAltAlleles + " " + ploidy);
}
return AFCalculators;
}
@BeforeSuite
public void before() {
AA1 = makePL(Arrays.asList(A, A), 0, 20, 20);
@ -102,14 +116,14 @@ public class AFCalcUnitTest extends BaseTest {
private class GetGLsTest extends TestDataProvider {
GenotypesContext GLs;
int numAltAlleles;
final AFCalc calc;
final AFCalculator calc;
final int[] expectedACs;
final double[] priors;
final String priorName;
private GetGLsTest(final AFCalc calc, int numAltAlleles, List<Genotype> arg, final double[] priors, final String priorName) {
private GetGLsTest(final AFCalculator calc, int numAltAlleles, List<Genotype> arg, final double[] priors, final String priorName) {
super(GetGLsTest.class);
GLs = GenotypesContext.create(new ArrayList<Genotype>(arg));
GLs = GenotypesContext.create(new ArrayList<>(arg));
this.numAltAlleles = numAltAlleles;
this.calc = calc;
this.priors = priors;
@ -125,20 +139,20 @@ public class AFCalcUnitTest extends BaseTest {
}
}
public AFCalcResult execute() {
return getCalc().getLog10PNonRef(getVC(), getPriors());
public AFCalculationResult execute() {
return getCalc().getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors());
}
public AFCalcResult executeRef() {
final AFCalc ref = AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_REFERENCE, getCalc().nSamples, getCalc().getMaxAltAlleles());
return ref.getLog10PNonRef(getVC(), getPriors());
public AFCalculationResult executeRef() {
final AFCalculator ref = AFCalculatorImplementation.EXACT_REFERENCE.newInstance();
return ref.getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors());
}
public double[] getPriors() {
return priors;
}
public AFCalc getCalc() {
public AFCalculator getCalc() {
return calc;
}
@ -165,13 +179,18 @@ public class AFCalcUnitTest extends BaseTest {
}
}
private static final int MAX_ALT_ALLELES = 2;
private static final int PLOIDY = 2;
@DataProvider(name = "wellFormedGLs")
public Object[][] createSimpleGLsData() {
final List<Genotype> biAllelicSamples = Arrays.asList(AA1, AB1, BB1);
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
List<AFCalc> calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2);
List<AFCalculator> calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY);
final int nPriorValues = 2*nSamples+1;
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
@ -179,7 +198,7 @@ public class AFCalcUnitTest extends BaseTest {
UnifiedGenotypingEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, new ArrayList<Double>());
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
for ( AFCalc model : calcs ) {
for ( AFCalculator model : calcs ) {
final String priorName = priors == humanPriors ? "human" : "flat";
// bi-allelic
@ -188,7 +207,7 @@ public class AFCalcUnitTest extends BaseTest {
new GetGLsTest(model, 1, genotypes, priors, priorName);
// tri-allelic
if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || Guillermo_FIXME ) && ! ( model instanceof OriginalDiploidExactAFCalc) ) // || model != generalCalc ) )
if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || Guillermo_FIXME ) && ! ( model instanceof OriginalDiploidExactAFCalculator) ) // || model != generalCalc ) )
for ( List<Genotype> genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) )
new GetGLsTest(model, 2, genotypes, priors, priorName);
}
@ -261,12 +280,12 @@ public class AFCalcUnitTest extends BaseTest {
samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative));
final int nSamples = samples.size();
List<AFCalc> calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2);
List<AFCalculator> calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY);
final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors
for ( AFCalc model : calcs ) {
if ( testData.nAltAlleles > 1 && model instanceof OriginalDiploidExactAFCalc )
for ( AFCalculator model : calcs ) {
if ( testData.nAltAlleles > 1 && model instanceof OriginalDiploidExactAFCalculator)
continue;
final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat");
@ -285,19 +304,22 @@ public class AFCalcUnitTest extends BaseTest {
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "GLsWithNonInformative", dependsOnMethods = {"testBiallelicGLs", "testTriallelicGLs"})
public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) {
final AFCalcResult expected = onlyInformative.execute();
final AFCalcResult actual = withNonInformative.execute();
final AFCalculationResult expected = onlyInformative.execute();
final AFCalculationResult actual = withNonInformative.execute();
testResultSimple(withNonInformative);
compareAFCalcResults(actual, expected, onlyInformative.getCalc(), true);
compareAFCalcResults(actual, expected, onlyInformative.getCalc(), onlyInformative.numAltAlleles, true);
}
private void testResultSimple(final GetGLsTest cfg) {
final AFCalcResult refResultTracker = cfg.executeRef();
final AFCalcResult resultTracker = cfg.execute();
compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc(), true);
final AFCalculationResult refResultTracker = cfg.executeRef();
final AFCalculationResult resultTracker = cfg.execute();
try {
compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc(), cfg.numAltAlleles, true);
} catch (Throwable t) {
cfg.execute();
throw new RuntimeException(t);
}
Assert.assertNotNull(resultTracker.getAllelesUsedInGenotyping());
Assert.assertTrue(cfg.getAlleles().containsAll(resultTracker.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list");
@ -310,9 +332,9 @@ public class AFCalcUnitTest extends BaseTest {
}
}
private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected, final AFCalc calc, final boolean onlyPosteriorsShouldBeEqual) {
private void compareAFCalcResults(final AFCalculationResult actual, final AFCalculationResult expected, final AFCalculator calc, final int maxAltAlleles, final boolean onlyPosteriorsShouldBeEqual) {
// note we cannot really test the multi-allelic case because we actually meaningfully differ among the models here
final double TOLERANCE = calc.getMaxAltAlleles() > 1 ? 1000 : 0.1; // much tighter constraints on bi-allelic results
final double TOLERANCE = maxAltAlleles > 1 ? 1000 : 0.1; // much tighter constraints on bi-allelic results
if ( ! onlyPosteriorsShouldBeEqual ) {
Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE, "Priors AF == 0");
@ -322,7 +344,7 @@ public class AFCalcUnitTest extends BaseTest {
}
Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE, "Posteriors AF == 0");
Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE, "Posteriors AF > 0");
Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE(), "MLE ACs");
Assert.assertTrue(Arrays.equals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()), "MLE ACs ");
Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping(), "Alleles used in genotyping");
for ( final Allele a : expected.getAllelesUsedInGenotyping() ) {
@ -337,23 +359,23 @@ public class AFCalcUnitTest extends BaseTest {
}
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models")
public void testLargeGLs(final ExactAFCalc calc) {
public void testLargeGLs(final ExactAFCalculator calc) {
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat");
final AFCalcResult resultTracker = cfg.execute();
final AFCalculationResult resultTracker = cfg.execute();
int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0];
Assert.assertEquals(calculatedAlleleCount, 6);
}
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models")
public void testMismatchedGLs(final ExactAFCalc calc) {
public void testMismatchedGLs(final ExactAFCalculator calc) {
final Genotype AB = makePL(Arrays.asList(A, C), 2000, 0, 2000, 2000, 2000, 2000);
final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100);
GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat");
final AFCalcResult resultTracker = cfg.execute();
final AFCalculationResult resultTracker = cfg.execute();
Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1);
Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1);
@ -369,14 +391,14 @@ public class AFCalcUnitTest extends BaseTest {
final Genotype g;
final double pNonRef, tolerance;
final boolean canScale;
final List<AFCalcFactory.Calculation> badModels;
final List<AFCalculatorImplementation> badModels;
final VariantContext vc;
private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale) {
this(vc, g, pNonRef, tolerance, canScale, Collections.<AFCalcFactory.Calculation>emptyList());
this(vc, g, pNonRef, tolerance, canScale, Collections.<AFCalculatorImplementation>emptyList());
}
private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List<AFCalcFactory.Calculation> badModels) {
private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List<AFCalculatorImplementation> badModels) {
this.g = g;
this.pNonRef = pNonRef;
this.tolerance = tolerance;
@ -411,7 +433,7 @@ public class AFCalcUnitTest extends BaseTest {
final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make();
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat;
final AFCalculatorTestBuilder.PriorType priorType = AFCalculatorTestBuilder.PriorType.flat;
final double TOLERANCE = 0.5;
@ -433,7 +455,7 @@ public class AFCalcUnitTest extends BaseTest {
new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false)
);
for ( AFCalcFactory.Calculation modelType : Arrays.asList(AFCalcFactory.Calculation.EXACT_REFERENCE, AFCalcFactory.Calculation.EXACT_INDEPENDENT) ) {
for ( AFCalculatorImplementation modelType : Arrays.asList(AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorImplementation.EXACT_INDEPENDENT) ) {
for ( int nNonInformative = 0; nNonInformative < 3; nNonInformative++ ) {
for ( final PNonRefData rootData : initialPNonRefData ) {
for ( int plScale = 1; plScale <= 100000; plScale *= 10 ) {
@ -451,19 +473,19 @@ public class AFCalcUnitTest extends BaseTest {
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRef")
private void testPNonRef(final VariantContext vcRoot,
AFCalcFactory.Calculation modelType,
AFCalcTestBuilder.PriorType priorType,
AFCalculatorImplementation modelType,
AFCalculatorTestBuilder.PriorType priorType,
final List<Genotype> genotypes,
final double expectedPNonRef,
final double tolerance,
final int nNonInformative) {
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(1, vcRoot.getNAlleles()-1, modelType, priorType);
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(1, vcRoot.getNAlleles()-1, modelType, priorType);
final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot);
vcb.genotypes(genotypes);
final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors());
final AFCalculationResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors());
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance,
"Actual pNonRef not within tolerance " + tolerance + " of expected");
@ -476,12 +498,12 @@ public class AFCalcUnitTest extends BaseTest {
final List<Integer> bigNonRefPLs = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 100, 1000);
final List<List<Integer>> bigDiploidPLs = removeBadPLs(Utils.makePermutations(bigNonRefPLs, 3, true));
for ( AFCalcFactory.Calculation modelType : AFCalcFactory.Calculation.values() ) {
for ( AFCalculatorImplementation modelType : AFCalculatorImplementation.values() ) {
if ( false ) { // for testing only
tests.add(new Object[]{modelType, toGenotypes(Arrays.asList(Arrays.asList(0,100,0)))});
} else {
if ( modelType == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) continue; // TODO -- GENERAL_PLOIDY DOESN'T WORK
if ( modelType == AFCalculatorImplementation.EXACT_GENERAL_PLOIDY ) continue; // TODO -- GENERAL_PLOIDY DOESN'T WORK
// test all combinations of PLs for 1 sample
for ( final List<List<Integer>> PLsPerSample : Utils.makePermutations(bigDiploidPLs, 1, true) ) {
@ -539,16 +561,16 @@ public class AFCalcUnitTest extends BaseTest {
}
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRefBiallelicSystematic")
private void PNonRefBiallelicSystematic(AFCalcFactory.Calculation modelType, final List<Genotype> genotypes) {
private void PNonRefBiallelicSystematic(AFCalculatorImplementation modelType, final List<Genotype> genotypes) {
//logger.warn("Running " + modelType + " with " + genotypes);
final AFCalcTestBuilder refBuilder = new AFCalcTestBuilder(genotypes.size(), 1, AFCalcFactory.Calculation.EXACT_REFERENCE, AFCalcTestBuilder.PriorType.human);
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(genotypes.size(), 1, modelType, AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder refBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, modelType, AFCalculatorTestBuilder.PriorType.human);
final VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, Arrays.asList(A, C));
vcb.genotypes(genotypes);
final AFCalcResult refResult = refBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors());
final AFCalcResult testResult = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors());
final AFCalculationResult refResult = refBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors());
final AFCalculationResult testResult = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors());
final double tolerance = 1e-3;
Assert.assertEquals(testResult.getLog10PosteriorOfAFGT0(), refResult.getLog10PosteriorOfAFGT0(), tolerance,
@ -567,9 +589,9 @@ public class AFCalcUnitTest extends BaseTest {
public Object[][] makeModels() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final AFCalcFactory.Calculation calc : AFCalcFactory.Calculation.values() ) {
for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) {
if ( calc.usableForParams(2, 4) )
tests.add(new Object[]{AFCalcFactory.createAFCalc(calc, 2, 4)});
tests.add(new Object[]{AFCalculatorFactory.createCalculatorForDiploidAnalysis()});
}
return tests.toArray(new Object[][]{});
@ -577,7 +599,7 @@ public class AFCalcUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "Models")
public void testNoPrior(final AFCalc model) {
public void testNoPrior(final AFCalculator model) {
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000);
@ -592,8 +614,8 @@ public class AFCalcUnitTest extends BaseTest {
GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior");
GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "noPrior");
final AFCalcResult resultTrackerFlat = cfgFlatPrior.execute();
final AFCalcResult resultTrackerNoPrior = cfgNoPrior.execute();
final AFCalculationResult resultTrackerFlat = cfgFlatPrior.execute();
final AFCalculationResult resultTrackerNoPrior = cfgNoPrior.execute();
final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0];
final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5);
@ -609,7 +631,7 @@ public class AFCalcUnitTest extends BaseTest {
}
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
public void testBiallelicPriors(final AFCalc model) {
public void testBiallelicPriors(final AFCalculator model) {
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000);
@ -620,7 +642,7 @@ public class AFCalcUnitTest extends BaseTest {
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true);
if ( ! Double.isInfinite(priors[1]) ) {
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
final AFCalcResult resultTracker = cfg.execute();
final AFCalculationResult resultTracker = cfg.execute();
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
@ -656,14 +678,14 @@ public class AFCalcUnitTest extends BaseTest {
List<Object[]> tests = new ArrayList<Object[]>();
// list of all high-quality models in the system
final List<AFCalcFactory.Calculation> models = Arrays.asList(
AFCalcFactory.Calculation.getDefaultModel(),
AFCalcFactory.Calculation.EXACT_REFERENCE,
AFCalcFactory.Calculation.EXACT_INDEPENDENT);
final List<AFCalculatorImplementation> models = Arrays.asList(
AFCalculatorImplementation.DEFAULT,
AFCalculatorImplementation.EXACT_REFERENCE,
AFCalculatorImplementation.EXACT_INDEPENDENT);
// note that we cannot use small PLs here or the thresholds are hard to set
for ( final int nonTypePLs : Arrays.asList(100, 1000) ) {
for ( final AFCalcFactory.Calculation model : models ) {
for ( final AFCalculatorImplementation model : models ) {
for ( final int allele1AC : Arrays.asList(0, 1, 2, 10, 100, 1000, 10000) ) {
for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) {
// for ( final int nonTypePLs : Arrays.asList(10) ) {
@ -678,8 +700,8 @@ public class AFCalcUnitTest extends BaseTest {
// bi-allelic tests
{
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, 1, model, AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(nSamples, 1, model, AFCalculatorTestBuilder.PriorType.human);
final List<Integer> ACs = Arrays.asList(allele1AC);
tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1)});
}
@ -689,8 +711,8 @@ public class AFCalcUnitTest extends BaseTest {
if ( nSamples < allele2AC || allele1AC + allele2AC > nSamples || nSamples > 100 || nSamples == 1)
continue;
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, 2, model, AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(nSamples, 2, model, AFCalculatorTestBuilder.PriorType.human);
final List<Integer> ACs = Arrays.asList(allele1AC, allele2AC);
final boolean poly2 = allele2AC > errorFreq && (nonTypePLs * allele2AC) > 90;
tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1, poly2)});
@ -704,7 +726,7 @@ public class AFCalcUnitTest extends BaseTest {
}
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProvider")
public void testCallingGeneral(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly ) {
public void testCallingGeneral(final AFCalculatorTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly ) {
testCalling(testBuilder, ACs, nonTypePL, expectedPoly);
}
@ -713,13 +735,13 @@ public class AFCalcUnitTest extends BaseTest {
List<Object[]> tests = new ArrayList<Object[]>();
// list of all high-quality models in the system
final List<AFCalcFactory.Calculation> models = Arrays.asList(AFCalcFactory.Calculation.EXACT_INDEPENDENT);
final List<AFCalculatorImplementation> models = Arrays.asList(AFCalculatorImplementation.EXACT_INDEPENDENT);
final List<Integer> alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 20);
final int nonTypePLs = 1000;
final int nAlleles = 4;
for ( final AFCalcFactory.Calculation model : models ) {
for ( final AFCalculatorImplementation model : models ) {
for ( final List<Integer> ACs : Utils.makePermutations(alleleCounts, nAlleles, true) ) {
final List<Boolean> isPoly = new ArrayList<Boolean>(ACs.size());
for ( final int ac : ACs ) isPoly.add(ac > 0);
@ -727,8 +749,8 @@ public class AFCalcUnitTest extends BaseTest {
final double acSum = MathUtils.sum(ACs);
for ( final int nSamples : Arrays.asList(1, 10, 100) ) {
if ( nSamples < acSum ) continue;
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, nAlleles, model, AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(nSamples, nAlleles, model, AFCalculatorTestBuilder.PriorType.human);
tests.add(new Object[]{testBuilder, ACs, nonTypePLs, isPoly});
}
}
@ -738,15 +760,15 @@ public class AFCalcUnitTest extends BaseTest {
}
@Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProviderLotsOfAlleles")
public void testCallingLotsOfAlleles(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly ) {
public void testCallingLotsOfAlleles(final AFCalculatorTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly ) {
testCalling(testBuilder, ACs, nonTypePL, expectedPoly);
}
private void testCalling(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly) {
final AFCalc calc = testBuilder.makeModel();
private void testCalling(final AFCalculatorTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly) {
final AFCalculator calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
final AFCalcResult result = calc.getLog10PNonRef(vc, priors);
final AFCalculationResult result = calc.getLog10PNonRef(vc, PLOIDY, testBuilder.numAltAlleles, priors);
boolean anyPoly = false;
for ( final boolean onePoly : expectedPoly ) anyPoly = anyPoly || onePoly;

View File

@ -52,6 +52,7 @@ import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.collections.Pair;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@ -60,18 +61,18 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class AFCalcPerformanceUnitTest extends BaseTest {
public class AFCalculatorPerformanceUnitTest extends BaseTest {
@DataProvider(name = "ScalingTests")
public Object[][] makepolyTestProviderLotsOfAlleles() {
List<Object[]> tests = new ArrayList<Object[]>();
// list of all high-quality models in the system
final List<AFCalcFactory.Calculation> biAllelicModels = Arrays.asList(
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcFactory.Calculation.EXACT_REFERENCE);
final List<AFCalculatorImplementation> biAllelicModels = Arrays.asList(
AFCalculatorImplementation.EXACT_INDEPENDENT,
AFCalculatorImplementation.EXACT_REFERENCE);
final List<AFCalcFactory.Calculation> multiAllelicModels = Arrays.asList(
AFCalcFactory.Calculation.EXACT_INDEPENDENT);
final List<AFCalculatorImplementation> multiAllelicModels = Arrays.asList(
AFCalculatorImplementation.EXACT_INDEPENDENT);
// for ( final int nonTypePLs : Arrays.asList(100) ) {
// for ( final int nSamples : Arrays.asList(10000) ) {
@ -81,12 +82,12 @@ public class AFCalcPerformanceUnitTest extends BaseTest {
for ( final int nSamples : Arrays.asList(100, 1000) ) {
final List<Integer> alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500);
for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) {
final List<AFCalcFactory.Calculation> models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels;
for ( final AFCalcFactory.Calculation model : models ) {
final List<AFCalculatorImplementation> models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels;
for ( final AFCalculatorImplementation model : models ) {
for ( final List<Integer> ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) {
if ( MathUtils.sum(ACs) < nSamples * 2 ) {
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human);
final AFCalculatorTestBuilder testBuilder
= new AFCalculatorTestBuilder(nSamples, nAltAlleles, model, AFCalculatorTestBuilder.PriorType.human);
tests.add(new Object[]{testBuilder, ACs, nonTypePLs});
}
}
@ -98,7 +99,7 @@ public class AFCalcPerformanceUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
private Pair<Integer, Integer> estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) {
private Pair<Integer, Integer> estNumberOfEvaluations(final AFCalculatorTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) {
final int evalOverhead = 2; // 2
final int maxEvalsPerSamplePerAC = 3;
@ -114,11 +115,11 @@ public class AFCalcPerformanceUnitTest extends BaseTest {
}
@Test(dataProvider = "ScalingTests")
private void testScaling(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL) {
final AFCalc calc = testBuilder.makeModel();
private void testScaling(final AFCalculatorTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL) {
final AFCalculator calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
final AFCalcResult result = calc.getLog10PNonRef(vc, priors);
final AFCalculationResult result = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors);
final Pair<Integer, Integer> expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL);
final int minEvals = expectedNEvaluation.getFirst();
final int maxEvals = expectedNEvaluation.getSecond();

View File

@ -57,7 +57,7 @@ import org.testng.annotations.Test;
import java.util.*;
public class AFCalcResultUnitTest extends BaseTest {
public class AFCalculatorResultUnitTest extends BaseTest {
private static class MyTest {
final double[] Ls, expectedPosteriors;
@ -115,7 +115,7 @@ public class AFCalcResultUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "TestComputePosteriors")
private void testComputingPosteriors(final MyTest data) {
final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
final AFCalculationResult result = new AFCalculationResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected");
Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected");
@ -145,8 +145,8 @@ public class AFCalcResultUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
private AFCalcResult makePolymorphicTestData(final double pNonRef) {
return new AFCalcResult(
private AFCalculationResult makePolymorphicTestData(final double pNonRef) {
return new AFCalculationResult(
new int[]{0},
1,
alleles,
@ -157,7 +157,7 @@ public class AFCalcResultUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
final AFCalcResult result = makePolymorphicTestData(pNonRef);
final AFCalculationResult result = makePolymorphicTestData(pNonRef);
final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(1 - pThreshold));
Assert.assertEquals(actualIsPoly, shouldBePoly,
"isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
@ -166,7 +166,7 @@ public class AFCalcResultUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
final AFCalcResult result = makePolymorphicTestData(pNonRef);
final AFCalculationResult result = makePolymorphicTestData(pNonRef);
final double qual = QualityUtils.phredScaleCorrectRate(pThreshold);
final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual);
Assert.assertEquals(actualIsPoly, shouldBePoly,

View File

@ -0,0 +1,166 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class FixedAFCalculatorProviderUnitTest {
@Test(dataProvider="nonThreadSafeConstructorsData")
public void testNonThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final AFCalculatorImplementation preferred) {
final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection();
args.MAX_ALTERNATE_ALLELES = maxAltAlleles;
args.samplePloidy = ploidy;
final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection();
if (preferred != null ) callerArgs.requestedAlleleFrequencyCalculationModel = preferred;
callerArgs.genotypeArgs = args;
final FixedAFCalculatorProvider providerCallerArgs = new FixedAFCalculatorProvider(callerArgs,null,true);
final FixedAFCalculatorProvider providerGenotypingArgs = new FixedAFCalculatorProvider(args,null,true);
Assert.assertNotNull(providerCallerArgs.getInstance(ploidy,maxAltAlleles));
Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy,maxAltAlleles).getClass()).usableForParams(ploidy,maxAltAlleles));
Assert.assertNotNull(providerGenotypingArgs.getInstance(ploidy,maxAltAlleles));
Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerGenotypingArgs.getInstance(ploidy,maxAltAlleles).getClass()).usableForParams(ploidy,maxAltAlleles));
if (preferred != null && preferred.usableForParams(ploidy,maxAltAlleles)) {
Assert.assertEquals(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy, maxAltAlleles).getClass()), preferred);
}
}
@Test(dataProvider="threadSafeFactoryData")
public void testThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final AFCalculatorImplementation impl, final int cpuThreadCount, final int dataThreadCount) {
final GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine();
final GATKArgumentCollection gatkArguments = new GATKArgumentCollection();
gatkArguments.numberOfCPUThreadsPerDataThread =cpuThreadCount;
gatkArguments.numberOfDataThreads = dataThreadCount;
toolkit.setArguments(gatkArguments);
final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection();
if (impl != null) callerArgs.requestedAlleleFrequencyCalculationModel = impl;
final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
callerArgs.genotypeArgs = genotypeArgs;
genotypeArgs.samplePloidy = ploidy;
genotypeArgs.MAX_ALTERNATE_ALLELES = maxAltAlleles;
final AFCalculatorProvider provider = FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,callerArgs,null);
final Hashtable<Thread,AFCalculator> perThreadProvider = new Hashtable(cpuThreadCount * dataThreadCount);
final List<Thread> threads = new ArrayList<>();
// execute different threads.
for (int i = 0; i < cpuThreadCount; i++)
for (int j = 0; j < cpuThreadCount; j++) {
final Thread thread = new Thread(new Runnable() {
@Override
public void run() {
synchronized (perThreadProvider) {
perThreadProvider.put(Thread.currentThread(), provider.getInstance(ploidy, maxAltAlleles));
}
}
});
thread.start();
threads.add(thread);
}
// wait all threads to have finished.
for (final Thread thread : threads)
try {
thread.join();
} catch (InterruptedException e) {
Assert.fail();
}
// check that each thread gave a different calculator.
final Set<AFCalculator> calculators = new HashSet<>(perThreadProvider.values());
Assert.assertEquals(calculators.size(),threads.size());
}
private final static int[] PLOIDIES = new int[] { 1,2,3,4,10 };
private final static int[] MAX_ALT_ALLELES = new int[] { 1,2,3,4,10};
private final static int[] CPU_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10};
private final static int[] DATA_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10};
@DataProvider(name="nonThreadSafeConstructorsData")
public Object[][] nonThreadSafeConstructorsData() {
final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length * (AFCalculatorImplementation.values().length + 1)][];
int idx = 0;
for (int i = 0; i < PLOIDIES.length; i++) {
for (int j = 0; j < MAX_ALT_ALLELES.length; j++) {
result[idx++] = new Object[] { PLOIDIES[i], MAX_ALT_ALLELES[j], null };
for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()) {
result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], impl};
}
}
}
return result;
}
@DataProvider(name="threadSafeFactoryData")
public Object[][] threadSafeFactoryData() {
final Object[][] result = new Object[DATA_THREAD_COUNT.length * CPU_THREAD_COUNT.length * PLOIDIES.length * MAX_ALT_ALLELES.length * (AFCalculatorImplementation.values().length + 1)][];
int idx = 0;
for (int i = 0; i < PLOIDIES.length; i++) {
for (int j = 0; j < MAX_ALT_ALLELES.length; j++) {
for (int k = 0; k < CPU_THREAD_COUNT.length; k++) {
for (int l = 0; l < DATA_THREAD_COUNT.length; l++) {
result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], null, CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]};
for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()) {
result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], impl, CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]};
}
}
}
}
}
return result;
}
}

View File

@ -186,12 +186,13 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, cfg.ploidy);
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
final GeneralPloidyExactAFCalculator calc = new GeneralPloidyExactAFCalculator();
calc.combineSinglePools(cfg.GLs, cfg.ploidy,cfg.numAltAlleles + 1, priors);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = calc.getStateTracker().getAlleleCountsOfMAP()[allele];
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex + 1));
int calculatedAlleleCount = calc.getAltAlleleCountOfMAP(allele);
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
}
}

View File

@ -0,0 +1,162 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Tests {@link GeneralPloidyFailOverAFCalculatorProvider}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GeneralPloidyFailOverAFCalculatorProviderUnitTest {
@Test(dataProvider="nonThreadSafeConstructorsData")
public void testNonThreadSafeConstructors(final int ploidy, final int maxAltAlleles) {
final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection();
args.MAX_ALTERNATE_ALLELES = maxAltAlleles;
args.samplePloidy = ploidy;
final GeneralPloidyFailOverAFCalculatorProvider provider = new GeneralPloidyFailOverAFCalculatorProvider(args,null);
final AFCalculator calculator = provider.getInstance(ploidy,maxAltAlleles);
Assert.assertNotNull(calculator);
final AFCalculatorImplementation implementation = AFCalculatorImplementation.fromCalculatorClass(calculator.getClass());
Assert.assertTrue(implementation.usableForParams(ploidy,maxAltAlleles));
for (int i = 0; i < PLOIDIES.length; i++) {
for (int j = 0; j < MAX_ALT_ALLELES.length; j++) {
if (implementation.usableForParams(PLOIDIES[i],MAX_ALT_ALLELES[j]))
Assert.assertSame(provider.getInstance(PLOIDIES[i],MAX_ALT_ALLELES[j]),calculator);
else {
final AFCalculator failOver = provider.getInstance(PLOIDIES[i],MAX_ALT_ALLELES[j]);
Assert.assertNotNull(failOver);
final AFCalculatorImplementation failOverImplementation = AFCalculatorImplementation.fromCalculatorClass(failOver.getClass());
Assert.assertTrue(failOverImplementation.usableForParams(PLOIDIES[i],MAX_ALT_ALLELES[j]));
Assert.assertEquals(failOverImplementation, AFCalculatorImplementation.EXACT_GENERAL_PLOIDY);
}
}
}
}
@Test(dataProvider="threadSafeFactoryData")
public void testThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final int cpuThreadCount, final int dataThreadCount) {
final GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine();
final GATKArgumentCollection gatkArguments = new GATKArgumentCollection();
gatkArguments.numberOfCPUThreadsPerDataThread =cpuThreadCount;
gatkArguments.numberOfDataThreads = dataThreadCount;
toolkit.setArguments(gatkArguments);
final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
genotypeArgs.samplePloidy = ploidy;
genotypeArgs.MAX_ALTERNATE_ALLELES = maxAltAlleles;
final AFCalculatorProvider provider = GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit,genotypeArgs,null);
final Hashtable<Thread,AFCalculator> perThreadProvider = new Hashtable(cpuThreadCount * dataThreadCount);
final List<Thread> threads = new ArrayList<>();
// execute different threads.
for (int i = 0; i < cpuThreadCount; i++)
for (int j = 0; j < cpuThreadCount; j++) {
final Thread thread = new Thread(new Runnable() {
@Override
public void run() {
synchronized (perThreadProvider) {
perThreadProvider.put(Thread.currentThread(), provider.getInstance(ploidy, maxAltAlleles));
}
}
});
thread.start();
threads.add(thread);
}
// wait all threads to have finished.
for (final Thread thread : threads)
try {
thread.join();
} catch (InterruptedException e) {
Assert.fail();
}
// check that each thread gave a different calculator.
final Set<AFCalculator> calculators = new HashSet<>(perThreadProvider.values());
Assert.assertEquals(calculators.size(),threads.size());
}
private final static int[] PLOIDIES = new int[] { AFCalculatorImplementation.UNBOUND_PLOIDY,1,2,3,4,10 };
private final static int[] MAX_ALT_ALLELES = new int[] { AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,1,2,3,4,10};
private final static int[] CPU_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10};
private final static int[] DATA_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10};
@DataProvider(name="nonThreadSafeConstructorsData")
public Object[][] nonThreadSafeConstructorsData() {
final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length][];
int idx = 0;
for (int i = 0; i < PLOIDIES.length; i++) {
for (int j = 0; j < MAX_ALT_ALLELES.length; j++) {
result[idx++] = new Object[] { PLOIDIES[i], MAX_ALT_ALLELES[j]};
}
}
return result;
}
@DataProvider(name="threadSafeFactoryData")
public Object[][] threadSafeFactoryData() {
final Object[][] result = new Object[DATA_THREAD_COUNT.length * CPU_THREAD_COUNT.length * PLOIDIES.length * MAX_ALT_ALLELES.length][];
int idx = 0;
for (int i = 0; i < PLOIDIES.length; i++) {
for (int j = 0; j < MAX_ALT_ALLELES.length; j++) {
for (int k = 0; k < CPU_THREAD_COUNT.length; k++) {
for (int l = 0; l < DATA_THREAD_COUNT.length; l++) {
result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]};
}
}
}
}
return result;
}
}

View File

@ -61,7 +61,7 @@ import java.util.*;
// SEE private/R/pls.R if you want the truth output for these tests
public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
public class IndependentAllelesDiploidExactAFCalculatorUnitTest extends BaseTest {
@DataProvider(name = "TestCombineGLs")
public Object[][] makeTestCombineGLs() {
List<Object[]> tests = new ArrayList<Object[]>();
@ -102,12 +102,12 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
}
private Genotype makePL(final int ... PLs) {
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
return AFCalculationUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
}
@Test(enabled = true, dataProvider = "TestCombineGLs")
public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance();
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
@ -116,7 +116,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "TestCombineGLs")
public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance();
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
@ -156,7 +156,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts")
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance();
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
Assert.assertEquals(biAllelicVCs.size(), expectedVCs.size());
@ -197,24 +197,24 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
final double log10pNonRef = Math.log10(1-pRef);
final List<AFCalcResult> originalPriors = new LinkedList<AFCalcResult>();
final List<AFCalculationResult> originalPriors = new LinkedList<AFCalculationResult>();
final List<Double> pNonRefN = new LinkedList<Double>();
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
final double log10LAllele1 = log10LAlleles.get(i);
final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
final AFCalculationResult result1 = new AFCalculationResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
originalPriors.add(result1);
pNonRefN.add(log10pNonRef*(i+1));
}
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 2);
final List<AFCalcResult> thetaNPriors = calc.applyMultiAllelicPriors(originalPriors);
final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance();
final List<AFCalculationResult> thetaNPriors = calc.applyMultiAllelicPriors(originalPriors);
double prevPosterior = 0.0;
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
final AFCalcResult thetaN = thetaNPriors.get(i);
AFCalcResult orig = null;
for ( final AFCalcResult x : originalPriors )
final AFCalculationResult thetaN = thetaNPriors.get(i);
AFCalculationResult orig = null;
for ( final AFCalculationResult x : originalPriors )
if ( x.getAllelesUsedInGenotyping().equals(thetaN.getAllelesUsedInGenotyping()))
orig = x;

View File

@ -0,0 +1,143 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.AFPriorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.CustomAFPriorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.HeterozygosityAFPriorProvider;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.MathUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* TODO document this.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class AFPriorProviderUnitTest extends BaseTest {
private static final double TOLERANCE = 0.0001;
@Test(dataProvider="HeterozygosityProviderData")
public void testHeterozygosityProvider(final double h, final int useCount, final int minPloidy, final int maxPloidy) {
final double het = h / maxPloidy;
final Random rdn = GenomeAnalysisEngine.getRandomGenerator();
final int[] plodies = new int[useCount];
for (int i = 0; i < useCount; i++)
plodies[i] = rdn.nextInt(maxPloidy - minPloidy + 1) + minPloidy;
final AFPriorProvider provider = new HeterozygosityAFPriorProvider(het);
for (int i = 0; i < useCount; i++) {
final int ploidy = plodies[i];
double[] priors = provider.forTotalPloidy(ploidy);
Assert.assertNotNull(priors);
Assert.assertEquals(priors.length, ploidy + 1);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(priors),0,TOLERANCE);
for (int j = 0; j < priors.length; j++) {
Assert.assertTrue(!Double.isNaN(priors[j]));
Assert.assertTrue(priors[j] < 0);
if (j > 0) Assert.assertEquals(priors[j], Math.log10(het) - Math.log10(j));
}
}
}
@Test(dataProvider="CustomProviderData")
public void testCustomProvider(final int ploidy) {
final double[] priors = new double[ploidy];
final Random rdn = GenomeAnalysisEngine.getRandomGenerator();
double remaining = 1;
final List<Double> priorsList = new ArrayList<Double>();
for (int i = 0; i < priors.length; i++) {
priors[i] = remaining * rdn.nextDouble() * (.1 / ploidy );
remaining -= priors[i];
priorsList.add(priors[i]);
}
final AFPriorProvider provider = new CustomAFPriorProvider(priorsList);
final double[] providedPriors = provider.forTotalPloidy(ploidy);
Assert.assertNotNull(providedPriors);
Assert.assertEquals(providedPriors.length, priors.length + 1);
for (int i = 0; i < priors.length; i++)
Assert.assertEquals(providedPriors[i+1],Math.log10(priors[i]),TOLERANCE);
Assert.assertEquals(MathUtils.approximateLog10SumLog10(providedPriors),0,TOLERANCE);
}
private double[] hets = new double[] { 0.00001, 0.001, 0.1, 0.5, 0.99, 0.999 };
private int[] useCounts = new int[] { 10, 100, 1000 };
private int[] ploidy = new int[] { 1 , 2, 3, 10, 100, 200, 500};
@DataProvider(name="CustomProviderData")
public Object[][] customProviderData() {
final Object[][] result = new Object[ploidy.length][];
for (int i = 0; i < result.length; i++)
result[i] = new Object[] { ploidy[i] };
return result;
}
@DataProvider(name="HeterozygosityProviderData")
public Object[][] heterozygosityProviderData() {
final Object[][] result = new Object[hets.length * useCounts.length * ((ploidy.length + 1) * (ploidy.length) / 2)][];
int idx = 0;
for (double h : hets)
for (int sc : useCounts)
for (int i = 0; i < ploidy.length; i++)
for (int j = i; j < ploidy.length; j++)
result[idx++] = new Object[] { h, sc, ploidy[i], ploidy[j]};
return result;
}
}

View File

@ -91,107 +91,6 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
genomeLocParser = new GenomeLocParser(seq);
}
@Test
public void testFindHomVarEventAllelesInSample() {
final List<Allele> eventAlleles = new ArrayList<Allele>();
eventAlleles.add( Allele.create("A", true) );
eventAlleles.add( Allele.create("C", false) );
final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
haplotypeAlleles.add( Allele.create("AATA", true) );
haplotypeAlleles.add( Allele.create("AACA", false) );
haplotypeAlleles.add( Allele.create("CATA", false) );
haplotypeAlleles.add( Allele.create("CACA", false) );
final List<Haplotype> haplotypes = new ArrayList<Haplotype>();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
haplotypes.add(new Haplotype("CACA".getBytes()));
final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
haplotypeAllelesForSample.add( Allele.create("CATA", false) );
haplotypeAllelesForSample.add( Allele.create("CACA", false) );
final List<List<Haplotype>> alleleMapper = new ArrayList<List<Haplotype>>();
List<Haplotype> Aallele = new ArrayList<Haplotype>();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
List<Haplotype> Callele = new ArrayList<Haplotype>();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
alleleMapper.add(Aallele);
alleleMapper.add(Callele);
final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
eventAllelesForSample.add( Allele.create("C", false) );
eventAllelesForSample.add( Allele.create("C", false) );
if(!compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
logger.warn("calc alleles = " + HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
logger.warn("expected alleles = " + eventAllelesForSample);
}
Assert.assertTrue(compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
}
@Test
public void testFindHetEventAllelesInSample() {
final List<Allele> eventAlleles = new ArrayList<Allele>();
eventAlleles.add( Allele.create("A", true) );
eventAlleles.add( Allele.create("C", false) );
eventAlleles.add( Allele.create("T", false) );
final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
haplotypeAlleles.add( Allele.create("AATA", true) );
haplotypeAlleles.add( Allele.create("AACA", false) );
haplotypeAlleles.add( Allele.create("CATA", false) );
haplotypeAlleles.add( Allele.create("CACA", false) );
haplotypeAlleles.add( Allele.create("TACA", false) );
haplotypeAlleles.add( Allele.create("TTCA", false) );
haplotypeAlleles.add( Allele.create("TTTA", false) );
final List<Haplotype> haplotypes = new ArrayList<Haplotype>();
haplotypes.add(new Haplotype("AATA".getBytes()));
haplotypes.add(new Haplotype("AACA".getBytes()));
haplotypes.add(new Haplotype("CATA".getBytes()));
haplotypes.add(new Haplotype("CACA".getBytes()));
haplotypes.add(new Haplotype("TACA".getBytes()));
haplotypes.add(new Haplotype("TTCA".getBytes()));
haplotypes.add(new Haplotype("TTTA".getBytes()));
final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
haplotypeAllelesForSample.add( Allele.create("TTTA", false) );
haplotypeAllelesForSample.add( Allele.create("AATA", true) );
final List<List<Haplotype>> alleleMapper = new ArrayList<List<Haplotype>>();
List<Haplotype> Aallele = new ArrayList<Haplotype>();
Aallele.add(haplotypes.get(0));
Aallele.add(haplotypes.get(1));
List<Haplotype> Callele = new ArrayList<Haplotype>();
Callele.add(haplotypes.get(2));
Callele.add(haplotypes.get(3));
List<Haplotype> Tallele = new ArrayList<Haplotype>();
Tallele.add(haplotypes.get(4));
Tallele.add(haplotypes.get(5));
Tallele.add(haplotypes.get(6));
alleleMapper.add(Aallele);
alleleMapper.add(Callele);
alleleMapper.add(Tallele);
final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
eventAllelesForSample.add( Allele.create("A", true) );
eventAllelesForSample.add( Allele.create("T", false) );
if(!compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
logger.warn("calc alleles = " + HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
logger.warn("expected alleles = " + eventAllelesForSample);
}
Assert.assertTrue(compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
}
private boolean compareAlleleLists(List<Allele> l1, List<Allele> l2) {
if( l1.size() != l2.size() ) {
return false; // sanity check
}
for( int i=0; i < l1.size(); i++ ){
if ( !l2.contains(l1.get(i)) )
return false;
}
return true;
}
private class BasicGenotypingTestProvider extends TestDataProvider {
byte[] ref;
byte[] hap;

View File

@ -90,12 +90,24 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testTetraploidRun() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" +
"-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" +
" -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" +
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals"),
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
Arrays.asList("41de0808e029ebefd8b28d31ce10109c"));
Arrays.asList("20f55be01d01bed48bf66f354fa72e5b"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@Test(enabled= true)
public void testMixedPloidyRun() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V:sample1 " + privateTestDir + "haploid-gvcf-1.vcf" +
" -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" +
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
Arrays.asList("c8bf3da5eb641d0082bdd5f12ea58e1e"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}

View File

@ -85,7 +85,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testTetraploidRun() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -ploidy 4 -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" +
baseTestString(" -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" +
" -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" +
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
@ -94,6 +94,18 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@Test(enabled= true)
public void testMixedPloidyRun() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "haploid-gvcf-1.vcf" +
" -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" +
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
1,
Arrays.asList("0ad7d784a15ad7f8b386ec7ca34032af"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@Test(enabled = true)
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -148,7 +160,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V " + privateTestDir + "gvcfExample1.vcf" +
" -V " + privateTestDir + "gvcfExample2.vcf",
1,
Arrays.asList("81c4cc8a6b72c24598ee899df838f1e8"));
Arrays.asList("ec63a629cc707554d3dd2ba7254b3b8d"));
executeTest("testSamplesWithDifferentLs", spec);
}

View File

@ -293,11 +293,12 @@ public class MathUtils {
public static double log10sumLog10(final double[] log10p, final int start, final int finish) {
if (start >= finish)
return Double.NEGATIVE_INFINITY;
final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish);
double maxValue = log10p[maxElementIndex];
if(maxValue == Double.NEGATIVE_INFINITY) {
final double maxValue = log10p[maxElementIndex];
if(maxValue == Double.NEGATIVE_INFINITY)
return maxValue;
}
double sum = 1.0;
for (int i = start; i < finish; i++) {
double curVal = log10p[i];
@ -888,6 +889,7 @@ public class MathUtils {
if (start > endIndex) {
throw new IllegalArgumentException("Start cannot be after end.");
}
int maxI = start;
for (int i = (start+1); i < endIndex; i++) {
if (array[i] > array[maxI])

View File

@ -47,7 +47,14 @@ public class GATKVariantContextUtils {
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
/**
* Diploid NO_CALL allele list...
*
* @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
*/
@Deprecated
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
@ -143,6 +150,26 @@ public class GATKVariantContextUtils {
return ref;
}
/**
* Calculates the total ploidy of a variant context as the sum of all plodies across genotypes.
* @param vc the target variant context.
* @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype.
* @return never {@code null}.
*/
public static int totalPloidy(final VariantContext vc, final int defaultPloidy) {
if (vc == null)
throw new IllegalArgumentException("the vc provided cannot be null");
if (defaultPloidy < 0)
throw new IllegalArgumentException("the default ploidy must 0 or greater");
int result = 0;
for (final Genotype genotype : vc.getGenotypes()) {
final int declaredPloidy = genotype.getPloidy();
result += declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
}
return result;
}
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -1322,6 +1349,28 @@ public class GATKVariantContextUtils {
}
}
/**
* Cached NO_CALL immutable lists where the position ith contains the list with i elements.
*/
private static List<Allele>[] NOCALL_LISTS = new List[] {
Collections.emptyList(),
Collections.singletonList(Allele.NO_CALL),
Collections.nCopies(2,Allele.NO_CALL)
};
/**
* Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy
* @param capacity the requested ploidy.
*/
private static synchronized void ensureNoCallListsCapacity(final int capacity) {
final int currentCapacity = NOCALL_LISTS.length - 1;
if (currentCapacity >= capacity)
return;
NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS,Math.max(capacity,currentCapacity << 1) + 1);
for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++)
NOCALL_LISTS[i] = Collections.nCopies(i,Allele.NO_CALL);
}
/**
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
*
@ -1331,16 +1380,9 @@ public class GATKVariantContextUtils {
* might or might not be mutable.
*/
public static List<Allele> noCallAlleles(final int ploidy) {
if (ploidy <= 0)
return Collections.EMPTY_LIST;
else if (ploidy == 1)
return Collections.singletonList(Allele.NO_CALL);
else {
final List<Allele> result = new ArrayList<>(ploidy);
for (int i = 0; i < ploidy; i++)
result.add(Allele.NO_CALL);
return result;
}
if (NOCALL_LISTS.length <= ploidy)
ensureNoCallListsCapacity(ploidy);
return NOCALL_LISTS[ploidy];
}

View File

@ -1573,6 +1573,35 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
locs[nextIndex++] = GenomeLoc.UNMAPPED;
}
@Test(dataProvider = "totalPloidyData")
public void testTotalPloidy(final int[] ploidies, final int defaultPloidy, final int expected) {
final Genotype[] genotypes = new Genotype[ploidies.length];
final List<Allele> vcAlleles = Arrays.asList(Aref,C);
for (int i = 0; i < genotypes.length; i++)
genotypes[i] = new GenotypeBuilder().alleles(GATKVariantContextUtils.noCallAlleles(ploidies[i])).make();
final VariantContext vc = new VariantContextBuilder().chr("seq1").genotypes(genotypes).alleles(vcAlleles).make();
Assert.assertEquals(GATKVariantContextUtils.totalPloidy(vc,defaultPloidy),expected," " + defaultPloidy + " " + Arrays.toString(ploidies));
}
@DataProvider(name="totalPloidyData")
public Object[][] totalPloidyData() {
final Random rdn = GenomeAnalysisEngine.getRandomGenerator();
final List<Object[]> resultList = new ArrayList<>();
for (int i = 0; i < 100; i++) {
final int sampleCount = rdn.nextInt(10);
int expected = 0;
final int defaultPloidy = rdn.nextInt(10) + 1;
final int[] plodies = new int[sampleCount];
for (int j = 0; j < sampleCount; j++) {
plodies[j] = rdn.nextInt(10);
expected += plodies[j] == 0 ? defaultPloidy : plodies[j];
}
resultList.add(new Object[] { plodies, defaultPloidy, expected });
}
return resultList.toArray(new Object[100][]);
}
private byte[] randomBases(final int length, final boolean reference) {
final byte[] bases = new byte[length];
bases[0] = (byte) (reference ? 'A' : 'C');