diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java index 3f7794b24..731895b70 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/StandardCallerArgumentCollection.java @@ -46,10 +46,10 @@ package org.broadinstitute.gatk.engine.arguments; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode; import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.gatk.utils.collections.DefaultHashMap; import htsjdk.variant.variantcontext.VariantContext; @@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable { */ @Hidden @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) - public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel; + public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel; @Hidden @Argument(shortName = "logExactCalls", doc="x", required=false) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/AFPriorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/AFPriorProvider.java new file mode 100644 index 000000000..d998d9241 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/AFPriorProvider.java @@ -0,0 +1,112 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper; + +import java.util.Arrays; + +/** + * Class that produces allele-frequency priors. + * + *

+ * This class is thread-unsafe for performance and so should be used accordingly. + *

+ * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public abstract class AFPriorProvider { + + private double[][] priorByTotalPloidy; + + protected AFPriorProvider() { + + } + + /** + * Returns the priors given a total-ploidy (the total number of genome copies across all samples). + * + *

+ * For performance sake the returned value is a direct reference ot the cached prior, so the client code + * must not modify its content. + *

+ * + * @param totalPloidy the requested total ploidy. + * + * @return never {@code null}, please do not modify its content. An array of {@code totalPloidy + 1} positions where + * the ith position is the log10(prior) probability of the an alternative allele AC to be exactly i copies in + * a draw of {@code totalPloidy} elements. + */ + public double[] forTotalPloidy(final int totalPloidy) { + if (totalPloidy < 0) + throw new IllegalArgumentException("the total-ploidy cannot be negative"); + ensureCapacity(totalPloidy); + final double[] cachedResult = priorByTotalPloidy[totalPloidy]; + if (cachedResult == null) + return priorByTotalPloidy[totalPloidy] = buildPriors(totalPloidy); + else + return cachedResult; + } + + /** + * Make sure that structures have enough capacity to hold the information up to the given total-ploidy. + * @param totalPloidy + */ + protected void ensureCapacity(final int totalPloidy) { + if (priorByTotalPloidy == null) + priorByTotalPloidy = new double[totalPloidy + 1][]; // just enough for those cases in where we have a fix total-ploidy. + else if (priorByTotalPloidy.length - 1 < totalPloidy) + priorByTotalPloidy = Arrays.copyOf(priorByTotalPloidy,Math.max(priorByTotalPloidy.length << 1,totalPloidy + 1)); + } + + /** + * Given a total ploidy construct the allele prior probabilities array. + * @param totalPloidy the target total-ploidy. Code can assume that is a non-negative number. + * + * @return never {@code null}, an array of exactly {@code totalPloidy + 1} positions that satisifed the + * contract {@link #forTotalPloidy(int) forTotalPloidy(totalPloidy)}. + */ + protected abstract double[] buildPriors(final int totalPloidy); + +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java new file mode 100644 index 000000000..a2ea9e17e --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/CustomAFPriorProvider.java @@ -0,0 +1,87 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper; + +import java.util.List; + +/** + * Custom priors provided as an array. + */ +public class CustomAFPriorProvider extends AFPriorProvider { + + private final double[] priors; + + /** + * + * @param priorValues must exactly the number of genomes in the samples (the total ploidy). + */ + public CustomAFPriorProvider(final List priorValues) { + if (priorValues == null) + throw new IllegalArgumentException("the input prior values cannot be null"); + priors = new double[priorValues.size() + 1]; + + int i = 1; + double sum = 0; + for (double value : priorValues) { + if (value <= 0 || value >= 1) + throw new IllegalArgumentException("the AF prior value "+ value + " is out of the valid interval (0,1)"); + if (Double.isNaN(value)) + throw new IllegalArgumentException("NaN is not a valid prior AF value"); + priors[i++] = Math.log10(value); + sum += value; + } + if (sum >= 1) + throw new IllegalArgumentException("the AF prior value sum must be less than 1: " + sum); + priors[0] = Math.log10(1 - sum); + } + + @Override + protected double[] buildPriors(final int totalPloidy) { + if (totalPloidy != priors.length - 1) + throw new IllegalStateException("requesting an invalid prior total ploidy " + totalPloidy + " != " + (priors.length - 1)); + return priors; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index 9206fbc37..d649dcc08 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -286,8 +287,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G builder.attributes(attributes); // create the genotypes; no-call everyone for now final GenotypesContext genotypes = GenotypesContext.create(); - final List noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); for ( PoolGenotypeData sampleData : GLs ) { // extract from multidimensional array diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java index bf91b41c3..8bb0d1e76 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java @@ -59,9 +59,9 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.MathUtils; @@ -81,8 +81,11 @@ import java.util.*; public abstract class GenotypingEngine { public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; + public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + protected final AFCalculatorProvider afCalculatorProvider ; + protected Logger logger; protected final Config configuration; @@ -94,26 +97,12 @@ public abstract class GenotypingEngine afcm = getAlleleFrequencyCalculatorThreadLocal(); - - protected ThreadLocal getAlleleFrequencyCalculatorThreadLocal() { - return new ThreadLocal() { - - @Override - public AFCalc initialValue() { - return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger); - } - }; - } - - /** * Construct a new genotyper engine, on a specific subset of samples. * @@ -124,19 +113,26 @@ public abstract class GenotypingEngine alleles = afcr.getAllelesUsedInGenotyping(); final int alternativeAlleleCount = alleles.size() - 1; @@ -496,14 +496,25 @@ public abstract class GenotypingEngine= configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING, false); } - protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { + /** + * Returns the log10 prior probability for all possible allele counts from 0 to N where N is the total number of + * genomes (total-ploidy). + * + * @param vc the target variant-context, use to determine the total ploidy thus the possible ACs. + * @param defaultPloidy default ploidy to be assume if we do not have the ploidy for some sample in {@code vc}. + * @param model the calculation model (SNP,INDEL or MIXED) whose priors are to be retrieved. + * @throws java.lang.NullPointerException if either {@code vc} or {@code model} is {@code null} + * @return never {@code null}, an array with exactly total-ploidy(vc) + 1 positions. + */ + protected final double[] getAlleleFrequencyPriors( final VariantContext vc, final int defaultPloidy, final GenotypeLikelihoodsCalculationModel.Model model ) { + final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc,defaultPloidy); switch (model) { case SNP: case GENERALPLOIDYSNP: - return log10AlleleFrequencyPriorsSNPs; + return log10AlleleFrequencyPriorsSNPs.forTotalPloidy(totalPloidy); case INDEL: case GENERALPLOIDYINDEL: - return log10AlleleFrequencyPriorsIndels; + return log10AlleleFrequencyPriorsIndels.forTotalPloidy(totalPloidy); default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } @@ -552,38 +563,20 @@ public abstract class GenotypingEngine inputPriors) { + private static AFPriorProvider composeAlleleFrequencyPriorProvider(final int N, final double heterozygosity, final List inputPriors) { final double[] priors = new double[N + 1]; double sum = 0.0; + final AFPriorProvider result; if (!inputPriors.isEmpty()) { // user-specified priors if (inputPriors.size() != N) throw new UserException.BadArgumentValue("inputPrior","Invalid length of inputPrior vector: vector length must be equal to # samples +1 "); - - int idx = 1; - for (final double prior: inputPriors) { - if (prior < 0.0) - throw new UserException.BadArgumentValue("Bad argument: negative values not allowed","inputPrior"); - priors[idx++] = Math.log10(prior); - sum += prior; - } + return new CustomAFPriorProvider(inputPriors); } else - // for each i - for (int i = 1; i <= N; i++) { - final double value = heterozygosity / (double)i; - priors[i] = Math.log10(value); - sum += value; - } - - // protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions) - if (sum > 1.0) - throw new UserException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed, or invalid values specified if input priors were provided - try reducing heterozygosity value or correct input priors."); - // null frequency for AF=0 is (1 - sum(all other frequencies)) - priors[0] = Math.log10(1.0 - sum); - return priors; + return new HeterozygosityAFPriorProvider(heterozygosity); } /** @@ -642,7 +635,7 @@ public abstract class GenotypingEngine composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, final AlignmentContext rawContext, final Map stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List alleleCountsofMLE, final boolean bestGuessIsRef, - final AFCalcResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, + final AFCalculationResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { final HashMap attributes = new HashMap<>(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/HeterozygosityAFPriorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/HeterozygosityAFPriorProvider.java new file mode 100644 index 000000000..28e3d4193 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/HeterozygosityAFPriorProvider.java @@ -0,0 +1,92 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper; + +import org.broadinstitute.gatk.utils.MathUtils; + +import java.util.Arrays; + +/** + * Allele frequency prior provider based on heterozygosity. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class HeterozygosityAFPriorProvider extends AFPriorProvider { + + private final double heterozygosity; + private final double log10Heterozygosity; + + /** + * Construct a new provider given the heterozygosity value. + * @param heterozygosity must be a valid heterozygosity between larger than 0 and smaller than 1. + * @throws IllegalArgumentException if {@code heterozygosity} is not valid one in the interval (0,1). + */ + public HeterozygosityAFPriorProvider(final double heterozygosity) { + if (heterozygosity <= 0) + throw new IllegalArgumentException("the heterozygosity must be greater than 0"); + if (heterozygosity >= 1) + throw new IllegalArgumentException("the heterozygosity must be less than 1"); + if (Double.isNaN(heterozygosity)) + throw new IllegalArgumentException("the heterozygosity cannot be a NaN"); + this.heterozygosity = heterozygosity; + this.log10Heterozygosity = Math.log10(heterozygosity); + } + + @Override + protected double[] buildPriors(final int totalPloidy) { + final double[] result = new double [totalPloidy + 1]; + Arrays.fill(result, log10Heterozygosity); + result[0] = Double.NEGATIVE_INFINITY; + MathUtils.Log10Cache.ensureCacheContains(totalPloidy); + for (int i = 1; i <= totalPloidy; i++) + result[i] -= MathUtils.Log10Cache.get(i); + final double log10Sum = MathUtils.approximateLog10SumLog10(result); + if (log10Sum >= 0) + throw new IllegalArgumentException("heterosygosity " + heterozygosity + " is too large of total ploidy " + totalPloidy); + result[0] = MathUtils.log10OneMinusPow10(log10Sum); + return result; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 73b2d5e99..aaf77d56c 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -130,8 +131,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // create the genotypes; no-call everyone for now GenotypesContext genotypes = GenotypesContext.create(); - final List noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. @@ -148,6 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final GenotypeBuilder b = new GenotypeBuilder(sample.getKey()); final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey())); b.PL(genotypeLikelihoods); + b.alleles(noCall); b.DP(getFilteredDepth(pileup)); genotypes.add(b.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 5e8914c8e..dad2780cd 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -64,6 +64,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -180,7 +181,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // create the genotypes; no-call everyone for now final GenotypesContext genotypes = GenotypesContext.create(); - + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); for ( SampleGenotypeData sampleData : GLs ) { final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] myLikelihoods = new double[numLikelihoods]; @@ -193,6 +195,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true); gb.PL(genotypeLikelihoods); gb.DP(sampleData.depth); + gb.alleles(noCall); if (UAC.annotateAllSitesWithPLs) gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true))); genotypes.add(gb.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java index 9a2d123e5..f75c2be05 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java @@ -46,10 +46,12 @@ package org.broadinstitute.gatk.tools.walkers.genotyper; -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.walkers.*; -import org.broadinstitute.gatk.utils.commandline.*; +import htsjdk.variant.variantcontext.GenotypeLikelihoods; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.*; import org.broadinstitute.gatk.engine.CommandLineGATK; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; @@ -59,18 +61,18 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter; import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.baq.BAQ; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; -import htsjdk.variant.vcf.*; +import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import htsjdk.variant.variantcontext.GenotypeLikelihoods; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.io.PrintStream; import java.util.*; @@ -284,7 +286,9 @@ public class UnifiedGenotyper extends LocusWalker, Unif verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); - genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode); + + final AFCalculatorProvider afCalcAFCalculatorProvider = FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),UAC,logger); + genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), afCalcAFCalculatorProvider, toolkit.getArguments().BAQMode); genotypingEngine.setVerboseWriter(verboseWriter); genotypingEngine.setAnnotationEngine(annotationEngine); @@ -308,6 +312,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader)); } + + public static Set getHeaderInfo(final UnifiedArgumentCollection UAC, final VariantAnnotatorEngine annotationEngine, final DbsnpArgumentCollection dbsnp) { diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java index f2bea114a..4522c2809 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java @@ -54,7 +54,8 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.baq.BAQ; @@ -104,9 +105,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, final AlignmentContext rawContext, final Map stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List alleleCountsofMLE, final boolean bestGuessIsRef, - final AFCalcResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, + final AFCalculationResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { final Map result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef, AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap); @@ -375,7 +376,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine stratifiedContexts, final RefMetaDataTracker tracker, - final ReferenceContext refContext, final AFCalcResult AFresult, + final ReferenceContext refContext, final AFCalculationResult AFresult, final List allAllelesToUse, final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { @@ -385,13 +386,13 @@ public class UnifiedGenotypingEngine extends GenotypingEngine= requestedMaxAltAlleles); - } - - public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; } - - /** - * Returns the best (fastest) model give the required ploidy and alternative allele count. - * @param requiredPloidy required ploidy - * @param requiredAlternativeAlleleCount required alternative allele count. - * @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead. - * @return never {@code null} - */ - public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) { - final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel; - if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return preferred; - if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return EXACT_INDEPENDENT; - if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) - return EXACT_REFERENCE; - return EXACT_GENERAL_PLOIDY; - } - } - - private static final Map> afClasses; - static { - afClasses = new PluginManager(AFCalc.class).getPluginsByName(); - } - - private AFCalcFactory() { - - } - - private static Class getClassByName(final String name) { - for ( final Class clazz : afClasses.values() ) { - if ( clazz.getSimpleName().contains(name) ) { - return clazz; - } - } - - return null; - } - - /** - * Create a new AFCalc based on the parameters in the UAC - * - * @param UAC the UnifiedArgumentCollection containing the command-line parameters for the caller - * @param nSamples the number of samples we will be using - * @param logger an optional (can be null) logger to override the default in the model - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC, - final int nSamples, final boolean emitConfModel, - final Logger logger) { - final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, - UAC.requestedAlleleFrequencyCalculationModel); - - final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy); - - if ( logger != null ) calc.setLogger(logger); - if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog); - - return calc; - } - - /** - * Create a new AFCalc, choosing the best implementation based on the given parameters, assuming - * that we will only be requesting bi-allelic variants to diploid genotypes - * - * @param nSamples the number of samples we'll be using - * - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final int nSamples) { - return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2); - } - - /** - * Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes - * - * @param calc the calculation we'd like to use - * @param nSamples the number of samples we'll be using - * @param maxAltAlleles the max. alt alleles for both SNPs and indels - * - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) { - return createAFCalc(calc, nSamples, maxAltAlleles, 2); - } - - /** - * Create a new AFCalc, choosing the best implementation based on the given parameters - * - * @param nSamples the number of samples we'll be using - * @param maxAltAlleles the max. alt alleles to consider for SNPs - * @param ploidy the sample ploidy. Must be consistent with the calc - * - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy); - } - - /** - * Choose the best calculation for nSamples and ploidy - * - * @param nSamples - * @param ploidy - * @param maxAltAlleles - * @return - */ - private static Calculation chooseBestCalculation(final int nSamples, final int ploidy, final int maxAltAlleles) { - for ( final Calculation calc : Calculation.values() ) { - if ( calc.usableForParams(ploidy, maxAltAlleles) ) { - return calc; - } - } - - throw new IllegalStateException("no calculation found that supports nSamples " + nSamples + " ploidy " + ploidy + " and maxAltAlleles " + maxAltAlleles); - } - - /** - * Create a new AFCalc - * - * @param calc the calculation to use - * @param nSamples the number of samples we'll be using - * @param maxAltAlleles the max. alt alleles to consider for SNPs - * @param ploidy the sample ploidy. Must be consistent with the calc - * - * @return an initialized AFCalc - */ - public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) { - if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null"); - if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); - if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); - if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy); - - if ( ! calc.usableForParams(ploidy, maxAltAlleles) ) - throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy); - - final Class afClass = getClassByName(calc.className); - if ( afClass == null ) - throw new IllegalArgumentException("Unexpected AFCalc " + calc); - - try { - Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy}; - Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class); - return (AFCalc)c.newInstance(args); - } catch (Exception e) { - throw new ReviewedGATKException("Could not instantiate AFCalc " + calc, e); - } - } - - protected static List createAFCalcs(final List calcs, final int nSamples, final int maxAltAlleles, final int ploidy) { - final List AFCalcs = new LinkedList(); - - for ( final Calculation calc : calcs ) - AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy)); - - return AFCalcs; - } -} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResult.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationResult.java similarity index 97% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResult.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationResult.java index b4d22592e..00cd9038a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResult.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationResult.java @@ -63,7 +63,7 @@ import java.util.*; * Note that all of the values -- i.e. priors -- are checked now that they are meaningful, which means * that users of this code can rely on the values coming out of these functions. */ -public class AFCalcResult { +public class AFCalculationResult { private final static int AF0 = 0; private final static int AF1p = 1; private final static int LOG_10_ARRAY_SIZES = 2; @@ -89,12 +89,12 @@ public class AFCalcResult { /** * Create a results object capability of storing results for calls with up to maxAltAlleles */ - public AFCalcResult(final int[] alleleCountsOfMLE, - final int nEvaluations, - final List allelesUsedInGenotyping, - final double[] log10LikelihoodsOfAC, - final double[] log10PriorsOfAC, - final Map log10pRefByAllele) { + public AFCalculationResult(final int[] alleleCountsOfMLE, + final int nEvaluations, + final List allelesUsedInGenotyping, + final double[] log10LikelihoodsOfAC, + final double[] log10PriorsOfAC, + final Map log10pRefByAllele) { if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping); if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null"); if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size()); @@ -123,8 +123,8 @@ public class AFCalcResult { * @param log10PriorsOfAC * @return */ - public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) { - return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); + public AFCalculationResult withNewPriors(final double[] log10PriorsOfAC) { + return new AFCalculationResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); } /** diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java similarity index 82% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java index ade6de0d6..74249be9d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculator.java @@ -61,35 +61,26 @@ import java.util.List; /** * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods */ -public abstract class AFCalc implements Cloneable { - private final static Logger defaultLogger = Logger.getLogger(AFCalc.class); +public abstract class AFCalculator implements Cloneable { + private final static Logger defaultLogger = Logger.getLogger(AFCalculator.class); - protected final int nSamples; - protected final int maxAlternateAllelesToGenotype; protected Logger logger = defaultLogger; private SimpleTimer callTimer = new SimpleTimer(); - private final StateTracker stateTracker; + private StateTracker stateTracker; private ExactCallLogger exactCallLogger = null; /** * Create a new AFCalc object capable of calculating the prob. that alleles are - * segregating among nSamples with up to maxAltAlleles for SNPs and maxAltAllelesForIndels - * for indels for samples with ploidy + * segregating among many samples. * - * @param nSamples number of samples, must be > 0 - * @param maxAltAlleles maxAltAlleles for SNPs - * @param ploidy the ploidy, must be > 0 + *

+ * Restrictions in ploidy and number of alternative alleles that a instance can handle will be determined + * by its implementation class {@link AFCalculatorImplementation} + *

*/ - protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); - if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); - if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy); - - this.nSamples = nSamples; - this.maxAlternateAllelesToGenotype = maxAltAlleles; - this.stateTracker = new StateTracker(maxAltAlleles); + protected AFCalculator() { } /** @@ -118,19 +109,18 @@ public abstract class AFCalc implements Cloneable { * @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i) * @return result (for programming convenience) */ - public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { + public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) { if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null"); if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc); if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null"); - if ( stateTracker == null ) throw new IllegalArgumentException("Results object cannot be null"); // reset the result, so we can store our new result there - stateTracker.reset(); + final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles); - final VariantContext vcWorking = reduceScope(vc); + final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles); callTimer.start(); - final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors); + final AFCalculationResult result = computeLog10PNonRef(vcWorking, defaultPloidy, log10AlleleFrequencyPriors, stateTracker); final long nanoTime = callTimer.getElapsedTimeNano(); if ( exactCallLogger != null ) @@ -140,19 +130,19 @@ public abstract class AFCalc implements Cloneable { } /** - * Convert the final state of the state tracker into our result as an AFCalcResult + * Convert the final state of the state tracker into our result as an AFCalculationResult * * Assumes that stateTracker has been updated accordingly * * @param vcWorking the VariantContext we actually used as input to the calc model (after reduction) * @param log10AlleleFrequencyPriors the priors by AC vector - * @return a AFCalcResult describing the result of this calculation + * @return a AFCalculationResult describing the result of this calculation */ @Requires("stateTracker.getnEvaluations() >= 0") @Ensures("result != null") - protected AFCalcResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors) { + protected AFCalculationResult getResultFromFinalState(final VariantContext vcWorking, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { stateTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles()); - return stateTracker.toAFCalcResult(log10AlleleFrequencyPriors); + return stateTracker.toAFCalculationResult(log10AlleleFrequencyPriors); } // --------------------------------------------------------------------------- @@ -174,7 +164,7 @@ public abstract class AFCalc implements Cloneable { */ @Requires({"vc != null", "vc.getNAlleles() > 1"}) @Ensures("result != null") - protected abstract VariantContext reduceScope(final VariantContext vc); + protected abstract VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles); /** * Actually carry out the log10PNonRef calculation on vc, storing results in results @@ -185,8 +175,8 @@ public abstract class AFCalc implements Cloneable { * @return a AFCalcResult object describing the results of this calculation */ @Requires({"vc != null", "log10AlleleFrequencyPriors != null", "vc.getNAlleles() > 1"}) - protected abstract AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors); + protected abstract AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker); /** * Subset VC to the just allelesToUse, updating genotype likelihoods @@ -194,15 +184,15 @@ public abstract class AFCalc implements Cloneable { * Must be overridden by concrete subclasses * * @param vc variant context with alleles and genotype likelihoods + * @param defaultPloidy default ploidy to assume in case {@code vc} does not indicate it for a sample. * @param allelesToUse alleles to subset * @param assignGenotypes - * @param ploidy * @return GenotypesContext object */ public abstract GenotypesContext subsetAlleles(final VariantContext vc, + final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy); + final boolean assignGenotypes); // --------------------------------------------------------------------------- // @@ -210,12 +200,41 @@ public abstract class AFCalc implements Cloneable { // // --------------------------------------------------------------------------- - public int getMaxAltAlleles() { - return maxAlternateAllelesToGenotype; - } - protected StateTracker getStateTracker() { + /** + * Retrieves the state tracker. + * + *

+ * The tracker will be reset if so requested or if it needs to be resized due to an increase in the + * maximum number of alleles is must be able to handle. + *

+ * + * @param reset make sure the tracker is reset. + * @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if + * the current tracker is able to handle that number. + * + * @return never {@code null} + */ + protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) { + if (stateTracker == null) + stateTracker = new StateTracker(maximumAlternativeAlleleCount); + else if (reset) + stateTracker.reset(maximumAlternativeAlleleCount); + else + stateTracker.ensureMaximumAlleleCapacity(maximumAlternativeAlleleCount); return stateTracker; } + /** + * Used by testing code. + * + * Please don't use this method in production. + * + * @deprecated + */ + @Deprecated + protected int getAltAlleleCountOfMAP(final int allele) { + return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele]; + } + } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorFactory.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorFactory.java new file mode 100644 index 000000000..7630c4aed --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorFactory.java @@ -0,0 +1,135 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +/** + * Factory to make AFCalculations + */ +public class AFCalculatorFactory { + + + + /** + * Create a new AFCalc, choosing the best implementation based on the given parameters, assuming + * that we will only be requesting bi-allelic variants to diploid genotypes + * + * @return an initialized AFCalc + * + * @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the + * ploidy as this is a sign of a hidden diploid assumption which is bad. + */ + @Deprecated + public static AFCalculator createCalculatorForDiploidBiAllelicAnalysis() { + return AFCalculatorImplementation.bestValue(2,1,null).newInstance(); + } + + /** + * Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes + * + * + * @deprecated hardcoded special reference to diploid analysis... eventually using code must explicitly indicate the + * ploidy as this is a sign of a hidden diploid assumption which is bad. + * @return an initialized AFCalc + */ + @Deprecated + public static AFCalculator createCalculatorForDiploidAnalysis() { + return AFCalculatorImplementation.bestValue(2,AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,null).newInstance(); + } + + /** + * Create a new AFCalc, choosing the best implementation based on the given parameters + * + * @param nSamples the number of samples we'll be using + * @param maxAltAlleles the max. alt alleles to consider for SNPs + * @param ploidy the sample ploidy. Must be consistent with the calc + * + * @return an initialized AFCalc + */ + public static AFCalculator createCalculator(final int nSamples, final int maxAltAlleles, final int ploidy) { + return createCalculator(chooseBestImplementation(ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy); + } + + /** + * Choose the best calculation for nSamples and ploidy + * + * @param ploidy + * @param maxAltAlleles + * @return + */ + private static AFCalculatorImplementation chooseBestImplementation(final int ploidy, final int maxAltAlleles) { + for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) { + if ( calc.usableForParams(ploidy, maxAltAlleles) ) { + return calc; + } + } + + throw new IllegalStateException("no calculation found that supports nSamples " + ploidy + " and maxAltAlleles " + maxAltAlleles); + } + + /** + * Create a new AFCalc + * + * @param implementation the calculation to use + * @param nSamples the number of samples we'll be using + * @param maxAltAlleles the max. alt alleles to consider for SNPs + * @param ploidy the sample ploidy. Must be consistent with the implementation + * + * @return an initialized AFCalc + */ + public static AFCalculator createCalculator(final AFCalculatorImplementation implementation, final int nSamples, final int maxAltAlleles, final int ploidy) { + if ( implementation == null ) throw new IllegalArgumentException("Calculation cannot be null"); + if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); + if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); + if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy); + + if ( ! implementation.usableForParams(ploidy, maxAltAlleles) ) + throw new IllegalArgumentException("AFCalc " + implementation + " does not support requested ploidy " + ploidy); + + return implementation.newInstance(); + } + +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java new file mode 100644 index 000000000..b227189a0 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java @@ -0,0 +1,236 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.util.HashMap; +import java.util.Map; + +/** + * Enumeration of usable AF calculation, their constraints (i.e. ploidy). + * + * Note that the order these occur in the enum is the order of preference, so + * the first value is taken over the second when multiple calculations satisfy + * the needs of the request (i.e., considering ploidy). + */ +public enum AFCalculatorImplementation { + + /** default implementation */ + EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalculator.class, 2), + + /** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */ + EXACT_REFERENCE(ReferenceDiploidExactAFCalculator.class, 2), + + /** original biallelic exact model, for testing only */ + EXACT_ORIGINAL(OriginalDiploidExactAFCalculator.class, 2, 2), + + /** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */ + EXACT_GENERAL_PLOIDY(GeneralPloidyExactAFCalculator.class); + + /** + * Special max alt allele count indicating that this maximum is in fact unbound (can be anything). + */ + public final static int UNBOUND_ALTERNATIVE_ALLELE_COUNT = -1; + + /** + * Special ploidy constant that indicates that in fact the ploidy is unbound (can be anything). + */ + public final static int UNBOUND_PLOIDY = -1; + + private static Map,AFCalculatorImplementation> calculatorClassToValue = buildCalculatorClassToValueMap(); + + /** + * Reference to the calculator class. + */ + public final Class calculatorClass; + + /** + * Maximum number of supported alternative alleles. + */ + public final int maxAltAlleles; + + /** + * Reference to the constructor to instantiate a calculator for this implementation. + */ + protected final Constructor constructor; + + /** + * Supported ploidy. + * + * This is equal to {@link #UNBOUND_PLOIDY} if the class can handle any ploidy. + */ + public final int requiredPloidy; + + /** + * Reference to the default implementation. + */ + public final static AFCalculatorImplementation DEFAULT = EXACT_INDEPENDENT; + + + /** + * Constructs a new instance given all its properties + * @param clazz the calculator class that realizes this implementation. + * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. + * @param maxAltAlleles the maximum alternative alleles; zero or greater or {@link #UNBOUND_ALTERNATIVE_ALLELE_COUNT} to indicate that any maximum number of alternative alleles is supported. + */ + AFCalculatorImplementation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) { + calculatorClass = clazz; + this.requiredPloidy = requiredPloidy; + this.maxAltAlleles = maxAltAlleles; + this.constructor = findInstantiationConstructor(calculatorClass); + } + + /** + * Constructs a new instance leaving ploidy and max-allele count unbound. + * @param clazz the calculator class that realizes this implementation. + */ + AFCalculatorImplementation(final Class clazz) { + this(clazz,UNBOUND_PLOIDY, UNBOUND_ALTERNATIVE_ALLELE_COUNT); + } + + /** Constructs a new instance leaving max-allele count unbound. + * @param clazz the calculator class that realizes this implementation. + * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. + */ + AFCalculatorImplementation(final Class clazz, final int requiredPloidy) { + this(clazz,requiredPloidy,UNBOUND_PLOIDY); + } + + /** + * Checks whether a given ploidy and max alternative alleles combination is supported or not. + * @param requestedPloidy the targeted ploidy. + * @param requestedMaxAltAlleles the targeted max alternative alleles. + * @return {@code true} iff this calculator implementation satisfies both requirements. + */ + public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) { + return (requiredPloidy == UNBOUND_PLOIDY || requiredPloidy == requestedPloidy) + && (maxAltAlleles == UNBOUND_ALTERNATIVE_ALLELE_COUNT || maxAltAlleles >= requestedMaxAltAlleles); + } + + /** + * Resolve the constructor to use to instantiate calculators. + * + * @param clazz target class. Assume not to be {@code null}. + */ + private Constructor findInstantiationConstructor(final Class clazz) { + if (Modifier.isAbstract(clazz.getModifiers())) + throw new IllegalStateException("AF calculator implementation class cannot be abstract"); + + final Constructor result; + try { + result = clazz.getDeclaredConstructor(); + } catch (final NoSuchMethodException e) { + throw new IllegalStateException("cannot find a suitable (int,int) constructor for the AFCalculator implementation " + this + " class " + clazz.getName()); + } + + // Check whether there will be issue calling the constructor just due to protections: + if (Modifier.isPrivate(result.getModifiers()) || (!Modifier.isPublic(result.getModifiers()) && !clazz.getPackage().equals(getClass().getPackage()))) + throw new IllegalStateException("triple int constructor for AFCalculator implementation " + this + " class " + clazz.getName() + " is not public "); + return result; + } + + /** + * Creates new instance + * + * @throws IllegalStateException if the instance could not be create due to some exception. The {@link Exception#getCause() cause} will hold a reference to the actual exception. + * @return never {@code null}. + */ + public AFCalculator newInstance() { + try { + return constructor.newInstance(); + } catch (final Throwable e) { + throw new IllegalStateException("could not instantiate AFCalculator for implementation " + this + " class " + calculatorClass.getName()); + } + } + + /** + * Returns the best (fastest) model give the required ploidy and alternative allele count. + * + * @param requiredPloidy required ploidy + * @param requiredAlternativeAlleleCount required alternative allele count. + * @param preferred a preferred mode if any. A {@code null} indicate that we should be try to use the default instead. + * @return never {@code null} + */ + public static AFCalculatorImplementation bestValue(final int requiredPloidy, final int requiredAlternativeAlleleCount, final AFCalculatorImplementation preferred) { + final AFCalculatorImplementation preferredValue = preferred == null ? DEFAULT : preferred; + if (preferredValue.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return preferredValue; + if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return EXACT_INDEPENDENT; + if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) + return EXACT_REFERENCE; + return EXACT_GENERAL_PLOIDY; + } + + /** + * Returns the value that corresponds to a given implementation calculator class. + * + * @param clazz the target class. + * + * @throws IllegalArgumentException if {@code clazz} is {@code null} or if it is abstract. + * @throws IllegalStateException if + * + * @return never {@code null}. + */ + public static AFCalculatorImplementation fromCalculatorClass(final Class clazz) { + if (clazz == null) + throw new IllegalArgumentException("input class cannot be null"); + final AFCalculatorImplementation result = calculatorClassToValue.get(clazz); + if (result == null) + throw new IllegalStateException("Attempt to retrieve AFCalculatorImplementation instance from a non-registered calculator class " + clazz.getName()); + return result; + } + + // Initializes the content of the class to value map. + private static Map, AFCalculatorImplementation> buildCalculatorClassToValueMap() { + final Map,AFCalculatorImplementation> result = new HashMap<>(values().length); + for (final AFCalculatorImplementation value : values()) + if (result.put(value.calculatorClass,value) != null) + throw new IllegalStateException("more than one value associated with class " + value.calculatorClass.getName()); + return result; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceTest.java similarity index 87% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceTest.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceTest.java index 4895ab303..2b1f1b17e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceTest.java @@ -60,6 +60,7 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import java.io.*; import java.util.*; @@ -68,8 +69,8 @@ import java.util.*; * A simple GATK utility (i.e, runs from command-line) for assessing the performance of * the exact model */ -public class AFCalcPerformanceTest { - final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class); +public class AFCalculatorPerformanceTest { + final static Logger logger = Logger.getLogger(AFCalculatorPerformanceTest.class); private static abstract class Analysis { final GATKReport report; @@ -78,7 +79,7 @@ public class AFCalcPerformanceTest { report = GATKReport.newSimpleReport(name, columns); } - public abstract void run(final AFCalcTestBuilder testBuilder, + public abstract void run(final AFCalculatorTestBuilder testBuilder, final List coreColumns); public String getName() { @@ -95,18 +96,18 @@ public class AFCalcPerformanceTest { super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac", "n.alt.seg", "other.ac")); } - public void run(final AFCalcTestBuilder testBuilder, final List coreValues) { + public void run(final AFCalculatorTestBuilder testBuilder, final List coreValues) { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final AFCalc calc = testBuilder.makeModel(); + final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) { final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); timer.start(); - final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalculationResult resultTracker = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors); final long runtime = timer.getElapsedTimeNano(); int otherAC = 0; @@ -154,11 +155,11 @@ public class AFCalcPerformanceTest { super("AnalyzeBySingletonPosition", Utils.append(columns, "non.type.pls", "position.of.singleton")); } - public void run(final AFCalcTestBuilder testBuilder, final List coreValues) { + public void run(final AFCalculatorTestBuilder testBuilder, final List coreValues) { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final AFCalc calc = testBuilder.makeModel(); + final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final int[] ac = new int[testBuilder.numAltAlleles]; @@ -172,7 +173,7 @@ public class AFCalcPerformanceTest { vcb.genotypes(genotypes); timer.start(); - final AFCalcResult resultTracker = calc.getLog10PNonRef(vcb.make(), priors); + final AFCalculationResult resultTracker = calc.getLog10PNonRef(vcb.make(), HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -188,11 +189,11 @@ public class AFCalcPerformanceTest { super("AnalyzeByNonInformative", Utils.append(columns, "non.type.pls", "n.non.informative")); } - public void run(final AFCalcTestBuilder testBuilder, final List coreValues) { + public void run(final AFCalculatorTestBuilder testBuilder, final List coreValues) { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final AFCalc calc = testBuilder.makeModel(); + final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final int[] ac = new int[testBuilder.numAltAlleles]; @@ -202,7 +203,7 @@ public class AFCalcPerformanceTest { final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL); timer.start(); - final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors); + final AFCalculationResult resultTracker = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); @@ -214,10 +215,10 @@ public class AFCalcPerformanceTest { } private static class ModelParams { - final AFCalcFactory.Calculation modelType; + final AFCalculatorImplementation modelType; final int maxBiNSamples, maxTriNSamples; - private ModelParams(AFCalcFactory.Calculation modelType, int maxBiNSamples, int maxTriNSamples) { + private ModelParams(AFCalculatorImplementation modelType, int maxBiNSamples, int maxTriNSamples) { this.modelType = modelType; this.maxBiNSamples = maxBiNSamples; this.maxTriNSamples = maxTriNSamples; @@ -238,6 +239,7 @@ public class AFCalcPerformanceTest { SINGLE, EXACT_LOG } + public static void main(final String[] args) throws Exception { final TTCCLayout layout = new TTCCLayout(); layout.setThreadPrinting(false); @@ -269,12 +271,12 @@ public class AFCalcPerformanceTest { final List loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser); for ( final ExactCallLogger.ExactCall call : loggedCalls ) { - final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, - AFCalcFactory.Calculation.EXACT_INDEPENDENT, - AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(call.vc.getNSamples(), 1, + AFCalculatorImplementation.EXACT_INDEPENDENT, + AFCalculatorTestBuilder.PriorType.human); logger.info(call); final SimpleTimer timer = new SimpleTimer().start(); - final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); + final AFCalculationResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles,testBuilder.makePriors()); final long newNanoTime = timer.getElapsedTimeNano(); if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) { logger.info("**** ONE IS POLY"); @@ -297,14 +299,14 @@ public class AFCalcPerformanceTest { final int nSamples = Integer.valueOf(args[1]); final int ac = Integer.valueOf(args[2]); - final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(nSamples, 1, - AFCalcFactory.Calculation.EXACT_INDEPENDENT, - AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(nSamples, 1, + AFCalculatorImplementation.EXACT_INDEPENDENT, + AFCalculatorTestBuilder.PriorType.human); final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100); final SimpleTimer timer = new SimpleTimer().start(); - final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors()); + final AFCalculationResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, testBuilder.makePriors()); final long runtime = timer.getElapsedTimeNano(); logger.info("result " + resultTracker.getLog10PosteriorOfAFGT0()); logger.info("runtime " + runtime); @@ -317,14 +319,14 @@ public class AFCalcPerformanceTest { final PrintStream out = new PrintStream(new FileOutputStream(args[1])); final List modelParams = Arrays.asList( - new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10), + new ModelParams(AFCalculatorImplementation.EXACT_REFERENCE, 10000, 10), // new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10), - new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000)); + new ModelParams(AFCalculatorImplementation.EXACT_INDEPENDENT, 10000, 1000)); final boolean ONLY_HUMAN_PRIORS = false; - final List priorTypes = ONLY_HUMAN_PRIORS - ? Arrays.asList(AFCalcTestBuilder.PriorType.values()) - : Arrays.asList(AFCalcTestBuilder.PriorType.human); + final List priorTypes = ONLY_HUMAN_PRIORS + ? Arrays.asList(AFCalculatorTestBuilder.PriorType.values()) + : Arrays.asList(AFCalculatorTestBuilder.PriorType.human); final List analyzes = new ArrayList(); analyzes.add(new AnalyzeByACAndPL(coreColumns)); @@ -336,9 +338,9 @@ public class AFCalcPerformanceTest { for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) { for ( final ModelParams modelToRun : modelParams) { if ( modelToRun.meetsConstraints(nAltAlleles, nSamples) ) { - for ( final AFCalcTestBuilder.PriorType priorType : priorTypes ) { - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, nAltAlleles, modelToRun.modelType, priorType); + for ( final AFCalculatorTestBuilder.PriorType priorType : priorTypes ) { + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(nSamples, nAltAlleles, modelToRun.modelType, priorType); for ( final Analysis analysis : analyzes ) { logger.info(Utils.join("\t", Arrays.asList(iteration, nAltAlleles, nSamples, modelToRun.modelType, priorType, analysis.getName()))); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java new file mode 100644 index 000000000..57b710ba1 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java @@ -0,0 +1,101 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypesContext; +import htsjdk.variant.variantcontext.VariantContext; + +/** + * Instantiates Exact AF calculators given the required ploidy specs. + * + *

Unless you know better implementations of this class might return the same instance several times + * and so the client code might need to make sure that there are no collisions or race conditions.

+ * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public abstract class AFCalculatorProvider { + + /** + * Returns a AF calculator capable to handle a particular variant-context. + * @param variantContext the target context build. + * @param defaultPloidy the assumed ploidy in case that there is no a GT call present to determine it. + * @return never {@code null} + */ + public AFCalculator getInstance(final VariantContext variantContext, final int defaultPloidy, final int maximumAltAlleles) { + if (variantContext == null) + throw new IllegalArgumentException("variant context cannot be null"); + + final int sampleCount = variantContext.getNSamples(); + if (sampleCount == 0) + return getInstance(defaultPloidy,maximumAltAlleles); + + final GenotypesContext genotypes = variantContext.getGenotypes(); + + final Genotype firstGenotype = genotypes.get(0); + int ploidy = firstGenotype.getPloidy(); + if (ploidy <= 0) ploidy = defaultPloidy; + for (int i = 1 ; i < sampleCount; i++) { + final Genotype genotype = genotypes.get(i); + final int declaredPloidy = genotype.getPloidy(); + final int actualPloidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; + if (actualPloidy != ploidy) { + ploidy = AFCalculatorImplementation.UNBOUND_PLOIDY; + break; + } + } + return getInstance(ploidy,Math.min(variantContext.getNAlleles() - 1, maximumAltAlleles)); + } + + /** + * Returns a AF calculator given the required homogeneous ploidy and allele count (including the reference). + * @param ploidy the required ploidy. + * @param maximumAltAlleles the allele count. + * @return never {@code null} + */ + public abstract AFCalculator getInstance(final int ploidy, final int maximumAltAlleles); + +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java similarity index 96% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcTestBuilder.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java index 9e8d18ddf..7a3ce5203 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java @@ -51,13 +51,14 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.Utils; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -public class AFCalcTestBuilder { +public class AFCalculatorTestBuilder { final static Allele A = Allele.create("A", true); final static Allele C = Allele.create("C"); final static Allele G = Allele.create("G"); @@ -70,11 +71,11 @@ public class AFCalcTestBuilder { final int nSamples; final int numAltAlleles; - final AFCalcFactory.Calculation modelType; + final AFCalculatorImplementation modelType; final PriorType priorType; - public AFCalcTestBuilder(final int nSamples, final int numAltAlleles, - final AFCalcFactory.Calculation modelType, final PriorType priorType) { + public AFCalculatorTestBuilder(final int nSamples, final int numAltAlleles, + final AFCalculatorImplementation modelType, final PriorType priorType) { this.nSamples = nSamples; this.numAltAlleles = numAltAlleles; this.modelType = modelType; @@ -99,8 +100,8 @@ public class AFCalcTestBuilder { return nSamples; } - public AFCalc makeModel() { - return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2); + public AFCalculator makeModel() { + return AFCalculatorFactory.createCalculator(modelType, nSamples, getNumAltAlleles(), HomoSapiensConstants.DEFAULT_PLOIDY); } public double[] makePriors() { diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java new file mode 100644 index 000000000..3c764f155 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java @@ -0,0 +1,83 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + + +import htsjdk.variant.variantcontext.VariantContext; + +/** + * Produces independent AF calculators per thread. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public abstract class ConcurrentAFCalculatorProvider extends AFCalculatorProvider { + + private final ThreadLocal threadLocal; + + /** + * Create a new concurrent af-calculator provider instance. + */ + public ConcurrentAFCalculatorProvider() { + threadLocal = new ThreadLocal() { + public AFCalculatorProvider initialValue() { + return createProvider(); + } + }; + } + + @Override + public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maxAltAlleleCount) { + return threadLocal.get().getInstance(vc,defaultPloidy,maxAltAlleleCount); + } + + + @Override + public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) { + return threadLocal.get().getInstance(ploidy, maxAltAlleleCount); + } + + protected abstract AFCalculatorProvider createProvider(); +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java similarity index 93% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java index 3399d8ace..dc07fc2ae 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/DiploidExactAFCalculator.java @@ -55,19 +55,18 @@ import htsjdk.variant.variantcontext.VariantContext; import java.util.*; -public abstract class DiploidExactAFCalc extends ExactAFCalc { +public abstract class DiploidExactAFCalculator extends ExactAFCalculator { private static final double LOG10_OF_2 = MathUtils.Log10Cache.get(2); - public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); + public DiploidExactAFCalculator() { } @Override - protected AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { + protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { final int numAlternateAlleles = vc.getNAlleles() - 1; + final ArrayList genotypeLikelihoods = getGLs(vc.getGenotypes(), true); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; @@ -85,12 +84,13 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { indexesToACset.put(zeroSet.getACcounts(), zeroSet); while ( !ACqueue.isEmpty() ) { - getStateTracker().incNEvaluations(); // keep track of the number of evaluations + stateTracker.incNEvaluations(); // keep track of the number of evaluations // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors); + calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, + indexesToACset, log10AlleleFrequencyPriors,stateTracker); // clean up memory indexesToACset.remove(set.getACcounts()); @@ -98,17 +98,17 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { // System.out.printf(" *** removing used set=%s%n", set.ACcounts); } - return getResultFromFinalState(vc, log10AlleleFrequencyPriors); + return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); } @Override - protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse) { + protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse) { return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); } @Override - protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) { + protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final ArrayList GLs = getGLs(vc.getGenotypes(), true); for ( final double[] likelihoods : GLs ) { final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); @@ -140,18 +140,19 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { //if ( DEBUG ) // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors); + computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, stateTracker); final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; // can we abort early because the log10Likelihoods are so small? - if ( getStateTracker().abort(log10LofK, set.getACcounts(), true) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK; @@ -227,7 +228,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { private void computeLofK(final ExactACset set, final ArrayList genotypeLikelihoods, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { set.getLog10Likelihoods()[0] = 0.0; // the zero case final int totalK = set.getACsum(); @@ -238,8 +239,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { set.getLog10Likelihoods()[j] = set.getLog10Likelihoods()[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; final double log10Lof0 = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; - getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0); - getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); + stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); return; } @@ -261,7 +262,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; // update the MLE if necessary - getStateTracker().updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); + stateTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); // apply the priors over each alternate allele for ( final int ACcount : set.getACcounts().getCounts() ) { @@ -269,7 +270,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { log10LofK += log10AlleleFrequencyPriors[ACcount]; } - getStateTracker().updateMAPifNeeded(log10LofK, set.getACcounts().getCounts()); + stateTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts()); } private void pushData(final ExactACset targetSet, @@ -324,12 +325,15 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { return coeff; } + @Override public GenotypesContext subsetAlleles(final VariantContext vc, + final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy) { + final boolean assignGenotypes) { + if (defaultPloidy != 2) + throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy); return allelesToUse.size() == 1 - ? GATKVariantContextUtils.subsetToRefOnly(vc, ploidy) + ? GATKVariantContextUtils.subsetToRefOnly(vc, 2) : GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java similarity index 93% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java index d1aa86bfe..d1ab3871d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactAFCalculator.java @@ -55,10 +55,11 @@ import java.util.*; /** * Uses the Exact calculation of Heng Li */ -abstract class ExactAFCalc extends AFCalc { +abstract class ExactAFCalculator extends AFCalculator { + protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first. + * Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first. */ protected static final Comparator LIKELIHOOD_SUM_COMPARATOR = new Comparator() { @@ -68,7 +69,7 @@ abstract class ExactAFCalc extends AFCalc { } }; /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that + * Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with higher likelihood are first but make sure that * NON_REF alleles are place are last. */ protected static final Comparator LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator() { @@ -83,7 +84,7 @@ abstract class ExactAFCalc extends AFCalc { } }; /** - * Sorts {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of + * Sorts {@link ExactAFCalculator.LikelihoodSum} instances where those with lower alternative allele index are first regardless of * the likelihood sum. */ protected static final Comparator LIKELIHOOD_INDEX_COMPARATOR = new Comparator() { @@ -93,8 +94,8 @@ abstract class ExactAFCalc extends AFCalc { } }; - protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); + protected ExactAFCalculator() { + } /** @@ -135,10 +136,10 @@ abstract class ExactAFCalc extends AFCalc { } @Override - protected VariantContext reduceScope(final VariantContext vc) { + protected VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles) { // don't try to genotype too many alternate alleles final List inputAltAlleles = vc.getAlternateAlleles(); - final List outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles()); + final List outputAltAlleles = reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles); // only if output allele has reduced from the input alt allele set size we should care. final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size(); @@ -146,17 +147,17 @@ abstract class ExactAFCalc extends AFCalc { if (altAlleleReduction == 0) return vc; else if (altAlleleReduction != 0) { - logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + logger.warn("this tool is currently set to genotype at most " + maximumAlternativeAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); - final List alleles = new ArrayList<>(getMaxAltAlleles() + 1); + final List alleles = new ArrayList<>(maximumAlternativeAlleles + 1); alleles.add(vc.getReference()); - alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles())); + alleles.addAll(reduceScopeAlleles(vc, defaultPloidy, maximumAlternativeAlleles)); final VariantContextBuilder builder = new VariantContextBuilder(vc); builder.alleles(alleles); - builder.genotypes(reduceScopeGenotypes(vc, alleles)); + builder.genotypes(reduceScopeGenotypes(vc, defaultPloidy, alleles)); if (altAlleleReduction < 0) throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make()); return builder.make(); @@ -170,7 +171,7 @@ abstract class ExactAFCalc extends AFCalc { * @param numAllelesToChoose number of alleles to keep. * @return the list of alternative allele to keep. */ - protected List reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) { + protected List reduceScopeAlleles(final VariantContext vc, final int defaultPloidy, final int numAllelesToChoose) { // Look for the allele to exclude it from the pruning if present. final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); @@ -194,7 +195,7 @@ abstract class ExactAFCalc extends AFCalc { } // Calculate the allele likelihood sums. - reduceScopeCalculateLikelihoodSums(vc, likelihoodSums); + reduceScopeCalculateLikelihoodSums(vc, defaultPloidy, likelihoodSums); // sort them by probability mass and choose the best ones // Make sure that the allele is last if present. @@ -227,7 +228,7 @@ abstract class ExactAFCalc extends AFCalc { * @param vc source variant context. * @param likelihoodSums where to update the likelihood sums. */ - protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums); + protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums); /** * Transforms the genotypes of the variant context according to the new subset of possible alleles. @@ -236,5 +237,5 @@ abstract class ExactAFCalc extends AFCalc { * @param allelesToUse possible alleles. * @return never {@code null}, the new set of genotype calls for the reduced scope. */ - protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse); + protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse); } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactCallLogger.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactCallLogger.java index a6b936e61..6ff430be9 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ExactCallLogger.java @@ -83,9 +83,9 @@ public class ExactCallLogger implements Cloneable { public static class ExactCall { final VariantContext vc; final long runtime; - final AFCalcResult originalCall; + final AFCalculationResult originalCall; - public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) { + public ExactCall(VariantContext vc, final long runtime, final AFCalculationResult originalCall) { this.vc = vc; this.runtime = runtime; this.originalCall = originalCall; @@ -103,7 +103,7 @@ public class ExactCallLogger implements Cloneable { protected final void printCallInfo(final VariantContext vc, final double[] log10AlleleFrequencyPriors, final long runtimeNano, - final AFCalcResult result) { + final AFCalculationResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; @@ -194,7 +194,7 @@ public class ExactCallLogger implements Cloneable { builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); - final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele); + final AFCalculationResult result = new AFCalculationResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java new file mode 100644 index 000000000..a2cac9063 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java @@ -0,0 +1,238 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection; + +/** + * A single fixed instance AF calculator provider. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class FixedAFCalculatorProvider extends AFCalculatorProvider { + + private final AFCalculator singleton; + + private final boolean verifyRequests; + + private final int maximumAltAlleleCount; + + private final int ploidy; + + /** + * Constructs a fixed AF Calculator provider. + * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of + * supported alleles. + * @param logger the logger to be used by the AF calculator instance. + * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the + * initial parameter values (ploidy, sample-count and maximum number of alleles. + * + * @throws NullPointerException if {@code configuration} is {@code null}, or it contains invalid values for + * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. + */ + public FixedAFCalculatorProvider(final StandardCallerArgumentCollection configuration, + final Logger logger, final boolean verifyRequests) { + this(configuration.requestedAlleleFrequencyCalculationModel,configuration.genotypeArgs,logger,verifyRequests); + + } + + + /** + * Constructs a fixed AF Calculator provider. + * + * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of + * supported alleles. + * @param logger the logger to be used by the AF calculator instance. + * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the + * initial parameter values (ploidy, sample-count and maximum number of alleles. + * + * @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for + * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. + */ + public FixedAFCalculatorProvider(final GenotypeCalculationArgumentCollection configuration, + final Logger logger, final boolean verifyRequests) { + this(null,configuration,logger,verifyRequests); + } + + /** + * Constructs a fixed AF Calculator provider. + * + * @param preferred preferred implementation. + * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of + * supported alleles. + * @param logger the logger to be used by the AF calculator instance. + * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the + * initial parameter values (ploidy, sample-count and maximum number of alleles. + * + * @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for + * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. + */ + public FixedAFCalculatorProvider(final AFCalculatorImplementation preferred, final GenotypeCalculationArgumentCollection configuration, + final Logger logger, final boolean verifyRequests) { + + if (configuration == null) + throw new IllegalArgumentException("null configuration"); + if (configuration == null) + throw new IllegalArgumentException("null configuration genotype arguments"); + if (configuration.samplePloidy < 1) + throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy); + if (configuration.MAX_ALTERNATE_ALLELES < 0) + throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1)); + + ploidy = configuration.samplePloidy; + maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES; + singleton = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance(); + singleton.setLogger(logger); + this.verifyRequests = verifyRequests; + } + + @Override + public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maximumAlleleCount) { + if (verifyRequests) + // supers implementation will call eventually one of the other methods, so no need to verify anything here. + return super.getInstance(vc,defaultPloidy,maximumAlleleCount); + return singleton; + } + + @Override + public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) { + if (verifyRequests) { + if (this.ploidy != AFCalculatorImplementation.UNBOUND_PLOIDY && ploidy != this.ploidy) + throw new IllegalStateException("non-supported ploidy"); + if (maximumAltAlleleCount != AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT && maxAltAlleleCount > maximumAltAlleleCount) + throw new IllegalStateException("non-supported alleleCount"); + } + return singleton; + } + + /** + * Creates a fixed AF calculator provider that is thread safe given the engine configuration. + * + * @param toolkit the engine. + * @param config the caller configuration. + * @param logger reference to the logger for the AF calculator to dump messages to. + * + * @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration + * like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles. + * + * @return never {@code null} + */ + public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit, + final StandardCallerArgumentCollection config, + final Logger logger) { + final GATKArgumentCollection arguments = toolkit.getArguments(); + final boolean isMultithread = arguments.numberOfCPUThreadsPerDataThread > 1 || arguments.numberOfDataThreads > 1; + return !isMultithread + ? new FixedAFCalculatorProvider(config,logger,false) : + new ConcurrentAFCalculatorProvider() { + @Override + protected AFCalculatorProvider createProvider() { + return new FixedAFCalculatorProvider(config,logger,false); + } + }; + } + + /** + * Creates a fixed AF calculator provider that is thread safe given the engine configuration. + * + * @param toolkit the engine. + * @param config the caller configuration. + * @param logger reference to the logger for the AF calculator to dump messages to. + * @param verifyRequests whether each request should be check for a compatible ploidy and max-alt-allele count. + * A non-compliant request would result in an exception when requesting the AF calculator instance. + * + * @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration + * like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles. + * + * @return never {@code null}. + */ + @SuppressWarnings("unused") + public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit, + final StandardCallerArgumentCollection config, + final Logger logger, final boolean verifyRequests) { + final GATKArgumentCollection arguments = toolkit.getArguments(); + final boolean isMultithread = arguments.numberOfCPUThreadsPerDataThread > 1 || arguments.numberOfDataThreads > 1; + return !isMultithread + ? new FixedAFCalculatorProvider(config,logger,verifyRequests) : + new ConcurrentAFCalculatorProvider() { + @Override + protected AFCalculatorProvider createProvider() { + return new FixedAFCalculatorProvider(config,logger,verifyRequests); + } + }; + } + + + /** + * Creates a fixed AF calculator provider that is thread safe given the engine configuration. + * + * @param toolkit the engine. + * @param config the caller configuration. + * @param logger reference to the logger for the AF calculator to dump messages to. + * + * @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration + * like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles. + * + * @return never {@code null} + */ + public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit, + final GenotypeCalculationArgumentCollection config, + final Logger logger, final boolean verifyRequests) { + return toolkit.getArguments().numberOfCPUThreadsPerDataThread <= 1 + ? new FixedAFCalculatorProvider(config,logger,false) : + new ConcurrentAFCalculatorProvider() { + @Override + protected AFCalculatorProvider createProvider() { + return new FixedAFCalculatorProvider(config,logger,verifyRequests); + } + }; + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java similarity index 87% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java index 6b28a9fae..316f6212d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java @@ -57,27 +57,25 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; -public class GeneralPloidyExactAFCalc extends ExactAFCalc { +public class GeneralPloidyExactAFCalculator extends ExactAFCalculator { + static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them - private final int ploidy; private final static boolean VERBOSE = false; - protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - this.ploidy = ploidy; + protected GeneralPloidyExactAFCalculator() { } @Override - protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List allelesToUse) { - return subsetAlleles(vc,allelesToUse,false,ploidy); + protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List allelesToUse) { + return subsetAlleles(vc,defaultPloidy,allelesToUse,false); } @Override - public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - combineSinglePools(vc.getGenotypes(), vc.getNAlleles(), ploidy, log10AlleleFrequencyPriors); - return getResultFromFinalState(vc, log10AlleleFrequencyPriors); + protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { + combineSinglePools(vc.getGenotypes(), defaultPloidy, vc.getNAlleles(), log10AlleleFrequencyPriors); + return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); } /** @@ -126,17 +124,29 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { } } + @Override - protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) { + protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final int numOriginalAltAlleles = likelihoodSums.length; - final ArrayList GLs = getGLs(vc.getGenotypes(), false); - for ( final double[] likelihoods : GLs ) { - final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); + final GenotypesContext genotypes = vc.getGenotypes(); + for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + if (!genotype.hasPL()) + continue; + final double[] gls = genotype.getLikelihoods().getAsVector(); + if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) + continue; + + final int PLindexOfBestGL = MathUtils.maxElementIndex(gls); + + final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF]; + final int declaredPloidy = genotype.getPloidy(); + final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; + final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele for (int k=1; k < acCount.length;k++) if (acCount[k] > 0 ) - likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]); + likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL; } } @@ -144,51 +154,51 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { * Simple non-optimized version that combines GLs from several pools and produces global AF distribution. * @param GLs Inputs genotypes context with per-pool GLs * @param numAlleles Number of alternate alleles - * @param ploidyPerPool Number of samples per pool * @param log10AlleleFrequencyPriors Frequency priors */ protected void combineSinglePools(final GenotypesContext GLs, + final int defaultPloidy, final int numAlleles, - final int ploidyPerPool, final double[] log10AlleleFrequencyPriors) { - final ArrayList genotypeLikelihoods = getGLs(GLs, true); - - - int combinedPloidy = 0; - // Combine each pool incrementally - likelihoods will be renormalized at each step - CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); // first element: zero ploidy, e.g. trivial degenerate distribution + final int numAltAlleles = numAlleles - 1; final int[] zeroCounts = new int[numAlleles]; final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts)); set.getLog10Likelihoods()[0] = 0.0; - - final int genotypeCount = genotypeLikelihoods.size(); + final StateTracker stateTracker = getStateTracker(false,numAltAlleles); + int combinedPloidy = 0; + CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); combinedPoolLikelihoods.add(set); - getStateTracker().reset(numAlleles - 1); - if ( genotypeCount <= 1 ) { - // no meaningful GLs at all, just set the tracker to non poly values - // just mimic-ing call below - getStateTracker().setLog10LikelihoodOfAFzero(0.0); - } else { - for (int p=1; p= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) + continue; + stateTracker.reset(); + final int declaredPloidy = genotype.getPloidy(); + final int ploidy = declaredPloidy < 1 ? defaultPloidy : declaredPloidy; + // they do qualify so we proceed. + combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, gls, + combinedPloidy, ploidy, numAlleles, log10AlleleFrequencyPriors, stateTracker); + combinedPloidy = ploidy + combinedPloidy; // total number of chromosomes in combinedLikelihoods } + if (combinedPloidy == 0) + stateTracker.setLog10LikelihoodOfAFzero(0.0); } - public CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, + private CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles, - final double[] log10AlleleFrequencyPriors) { + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { final LinkedList ACqueue = new LinkedList<>(); // mapping of ExactACset indexes to the objects final HashMap indexesToACset = new HashMap<>(); @@ -206,11 +216,11 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // keep processing while we have AC conformations that need to be calculated while ( !ACqueue.isEmpty() ) { - getStateTracker().incNEvaluations(); + stateTracker.incNEvaluations(); // compute log10Likelihoods final ExactACset ACset = ACqueue.remove(); - calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset); + calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset, stateTracker); // clean up memory indexesToACset.remove(ACset.getACcounts()); @@ -243,12 +253,13 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { final int originalPloidy, final int newGLPloidy, final LinkedList ACqueue, - final HashMap indexesToACset) { + final HashMap indexesToACset, + final StateTracker stateTracker) { // compute likelihood in "set" of new set based on original likelihoods final int numAlleles = set.getACcounts().getCounts().length; final int newPloidy = set.getACsum(); - final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy); + final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, stateTracker); // add to new pool @@ -256,7 +267,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { newPool.add(set); // TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix) - if ( getStateTracker().abort(log10LofK, set.getACcounts(), false) ) { + if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) { return log10LofK; } @@ -364,7 +375,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { final CombinedPoolLikelihoods firstGLs, final double[] secondGL, final double[] log10AlleleFrequencyPriors, - final int numAlleles, final int ploidy1, final int ploidy2) { + final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) { final int newPloidy = ploidy1 + ploidy2; @@ -381,9 +392,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { if ( totalAltK == 0 ) { // all-ref case final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX]; set.getLog10Likelihoods()[0] = log10Lof0; - - getStateTracker().setLog10LikelihoodOfAFzero(log10Lof0); - getStateTracker().setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); + stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); return log10Lof0; } else { @@ -427,7 +437,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // update the MLE if necessary final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length); // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); + stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); // apply the priors over each alternate allele for (final int ACcount : altCounts ) { @@ -435,7 +445,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { log10LofK += log10AlleleFrequencyPriors[ACcount]; } // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - getStateTracker().updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); + stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); return log10LofK; } @@ -462,21 +472,17 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { * From a given variant context, extract a given subset of alleles, and update genotype context accordingly, * including updating the PL's, and assign genotypes accordingly * @param vc variant context with alleles and genotype likelihoods + * @param defaultPloidy ploidy to assume in case that {@code vc} does not contain that information + * for a sample. * @param allelesToUse alleles to subset * @param assignGenotypes true: assign hard genotypes, false: leave as no-call - * @param ploidy number of chromosomes per sample (pool) * @return GenotypesContext with new PLs */ - public GenotypesContext subsetAlleles(final VariantContext vc, + public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy, final List allelesToUse, - final boolean assignGenotypes, - final int ploidy) { + final boolean assignGenotypes) { // the genotypes with PLs final GenotypesContext oldGTs = vc.getGenotypes(); - List NO_CALL_ALLELES = new ArrayList<>(ploidy); - - for (int k=0; k < ploidy; k++) - NO_CALL_ALLELES.add(Allele.NO_CALL); // samples final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); @@ -492,8 +498,10 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // create the new genotypes for ( int k = 0; k < oldGTs.size(); k++ ) { final Genotype g = oldGTs.get(sampleIndices.get(k)); + final int declaredPloidy = g.getPloidy(); + final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + newGTs.add(GenotypeBuilder.create(g.getSampleName(),GATKVariantContextUtils.noCallAlleles(ploidy))); continue; } @@ -514,7 +522,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // if there is no mass on the (new) likelihoods, then just no-call the sample if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + newGTs.add(GenotypeBuilder.create(g.getSampleName(), GATKVariantContextUtils.noCallAlleles(ploidy))); } else { final GenotypeBuilder gb = new GenotypeBuilder(g); @@ -526,7 +534,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { // if we weren't asked to assign a genotype, then just no-call the sample if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) - gb.alleles(NO_CALL_ALLELES); + gb.alleles(GATKVariantContextUtils.noCallAlleles(ploidy)); else assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); newGTs.add(gb.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java new file mode 100644 index 000000000..12c5a1874 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java @@ -0,0 +1,121 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import org.apache.log4j.Logger; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; + +/** + * Provider that defaults to the general ploidy implementation when the preferred one does not handle the required + * ploidy. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class GeneralPloidyFailOverAFCalculatorProvider extends AFCalculatorProvider { + + private final AFCalculator preferred; + private final AFCalculatorImplementation preferredImplementation; + private final AFCalculator failOver; + + /** + * Creates a new AF calculator provider given the genotyping arguments and logger reference. + * @param genotypeArgs genotyping parameter collection. + * @param logger where the AF calculator logging messages go. If {@code null}, logging message will not be emitted. + * + * @throws IllegalStateException if {@code genotypeArgs} is {@code null}. + */ + public GeneralPloidyFailOverAFCalculatorProvider(final GenotypeCalculationArgumentCollection genotypeArgs, final Logger logger) { + if (genotypeArgs == null) + throw new IllegalArgumentException("genotype arguments object cannot be null"); + preferredImplementation = AFCalculatorImplementation.bestValue(genotypeArgs.samplePloidy,genotypeArgs.MAX_ALTERNATE_ALLELES, null); + preferred = preferredImplementation.newInstance(); + preferred.setLogger(logger); + failOver = AFCalculatorImplementation.EXACT_GENERAL_PLOIDY.newInstance(); + failOver.setLogger(logger); + } + + /** + * {@inheritDoc} + * @param ploidy {@inheritDoc} + * @param maximumAlternativeAlleles {@inheritDoc} + * @return {@inheritDoc} + */ + @Override + public AFCalculator getInstance(final int ploidy, final int maximumAlternativeAlleles) { + return preferredImplementation.usableForParams(ploidy,maximumAlternativeAlleles) ? preferred : failOver; + } + + /** + * Creates a AF calculator provider that complies to the contract of this class and that make sures that is + * safe to use in multi-thread walker runs. + * + *

+ * Whether this is part of a multi-thread run is determined by looking into the engine configuration in {@code toolkit}. + *

+ * + * @param toolkit enclosing engine. + * @param genotypeArgs the genotyping arguments. + * @param logger where to output logging messages for instantiated AF calculators. + * @return never {@code null}. + */ + public static AFCalculatorProvider createThreadSafeProvider(final GenomeAnalysisEngine toolkit, + final GenotypeCalculationArgumentCollection genotypeArgs, + final Logger logger) { + if (genotypeArgs == null) + throw new IllegalArgumentException("genotype arguments object cannot be null"); + final GATKArgumentCollection arguments = toolkit.getArguments(); + final boolean isMultithread = arguments.numberOfDataThreads > 1 || arguments.numberOfCPUThreadsPerDataThread > 1; + return isMultithread ? new ConcurrentAFCalculatorProvider() { + @Override + protected AFCalculatorProvider createProvider() { + return new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs,logger); + + } + } : new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs,logger); + } +} diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java similarity index 91% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java index 6a740164f..882627d36 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java @@ -106,7 +106,7 @@ import java.util.*; * as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i * prior for the ith least likely allele. */ - public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { + public class IndependentAllelesDiploidExactAFCalculator extends DiploidExactAFCalculator { /** * The min. confidence of an allele to be included in the joint posterior. @@ -116,47 +116,49 @@ import java.util.*; private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0}; private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + + /** * Sorts AFCalcResults by their posteriors of AF > 0, so the */ - private final static class CompareAFCalcResultsByPNonRef implements Comparator { + private final static class CompareAFCalculatorResultsByPNonRef implements Comparator { @Override - public int compare(AFCalcResult o1, AFCalcResult o2) { + public int compare(AFCalculationResult o1, AFCalculationResult o2) { return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0()); } } - private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef(); + private final static CompareAFCalculatorResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalculatorResultsByPNonRef(); /** * The AFCalc model we are using to do the bi-allelic computation */ - final AFCalc biAlleleExactModel; + final AFCalculator biAlleleExactModel; - protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); - biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy); + protected IndependentAllelesDiploidExactAFCalculator() { + super(); + biAlleleExactModel = new ReferenceDiploidExactAFCalculator(); } /** * Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call */ - private static class MyAFCalcResult extends AFCalcResult { + private static class MyAFCalculationResult extends AFCalculationResult { /** * List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call */ - final List supporting; + final List supporting; - private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pRefByAllele, List supporting) { + private MyAFCalculationResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pRefByAllele, List supporting) { super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); this.supporting = supporting; } } @Override - public AFCalcResult computeLog10PNonRef(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { - final List independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors); + public AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { + final List independentResultTrackers = computeAlleleIndependentExact(vc, defaultPloidy, log10AlleleFrequencyPriors); if ( independentResultTrackers.size() == 0 ) throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc); @@ -166,7 +168,7 @@ import java.util.*; return independentResultTrackers.get(0); } else { // we are a multi-allelic, so we need to actually combine the results - final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); + final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); return combineIndependentPNonRefs(vc, withMultiAllelicPriors); } } @@ -181,12 +183,12 @@ import java.util.*; */ @Requires({"vc != null", "log10AlleleFrequencyPriors != null"}) @Ensures("goodIndependentResult(vc, result)") - protected final List computeAlleleIndependentExact(final VariantContext vc, + protected final List computeAlleleIndependentExact(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors) { - final List results = new LinkedList(); + final List results = new LinkedList(); for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) { - final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalculationResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors); results.add(resultTracker); } @@ -196,7 +198,7 @@ import java.util.*; /** * Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results */ - private static boolean goodIndependentResult(final VariantContext vc, final List results) { + private static boolean goodIndependentResult(final VariantContext vc, final List results) { if ( results.size() != vc.getNAlleles() - 1) return false; for ( int i = 0; i < results.size(); i++ ) { if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 ) @@ -387,8 +389,8 @@ import java.util.*; return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make(); } - protected final List applyMultiAllelicPriors(final List conditionalPNonRefResults) { - final ArrayList sorted = new ArrayList(conditionalPNonRefResults); + protected final List applyMultiAllelicPriors(final List conditionalPNonRefResults) { + final ArrayList sorted = new ArrayList(conditionalPNonRefResults); // sort the results, so the most likely allele is first Collections.sort(sorted, compareAFCalcResultsByPNonRef); @@ -424,8 +426,8 @@ import java.util.*; * * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ - protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc, - final List sortedResultsWithThetaNPriors) { + protected AFCalculationResult combineIndependentPNonRefs(final VariantContext vc, + final List sortedResultsWithThetaNPriors) { int nEvaluations = 0; final int nAltAlleles = sortedResultsWithThetaNPriors.size(); final int[] alleleCountsOfMLE = new int[nAltAlleles]; @@ -437,7 +439,7 @@ import java.util.*; double log10PosteriorOfACGt0Sum = 0.0; boolean anyPoly = false; - for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { + for ( final AFCalculationResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1); final int altI = vc.getAlleles().indexOf(altAllele) - 1; @@ -486,7 +488,7 @@ import java.util.*; log10PosteriorOfACGt0 - log10PriorsOfAC[1] }; - return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), + return new MyAFCalculationResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), // necessary to ensure all values < 0 MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // priors incorporate multiple alt alleles, must be normalized diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java similarity index 95% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java index 20ef6d370..0a98100c5 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java @@ -58,13 +58,16 @@ import java.util.Map; /** * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference */ -class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { - protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); +class OriginalDiploidExactAFCalculator extends DiploidExactAFCalculator { + protected OriginalDiploidExactAFCalculator() { } @Override - protected AFCalcResult computeLog10PNonRef(VariantContext vc, double[] log10AlleleFrequencyPriors) { + protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, + @SuppressWarnings("unused") + final int defaultPloidy, + final double[] log10AlleleFrequencyPriors, + final StateTracker stateTracker) { final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length]; final double[] log10AlleleFrequencyPosteriors = new double[log10AlleleFrequencyPriors.length]; final Pair result = linearExact(vc, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); @@ -79,7 +82,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0; final Map log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef); - return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(), + return new AFCalculationResult(new int[]{mleK}, 0, vc.getAlleles(), MathUtils.normalizeFromLog10(log10Likelihoods, true), MathUtils.normalizeFromLog10(log10Priors, true), log10pRefByAllele); @@ -122,7 +125,7 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { } } - public Pair linearExact(final VariantContext vc, + private Pair linearExact(final VariantContext vc, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyLikelihoods, double[] log10AlleleFrequencyPosteriors) { @@ -194,6 +197,6 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { logY.rotate(); } - return new Pair(lastK, mleK); + return new Pair<>(lastK, mleK); } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java similarity index 97% rename from protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java rename to protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java index 955fa86f2..91d5ad67a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java @@ -46,8 +46,7 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; -public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc { - protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, ploidy); +public class ReferenceDiploidExactAFCalculator extends DiploidExactAFCalculator { + protected ReferenceDiploidExactAFCalculator() { } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java index 27ae69512..34df64d31 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/StateTracker.java @@ -57,9 +57,15 @@ import java.util.List; import java.util.Map; /** + * TODO this class (+AFCalculator) is a bit messy... it seems that it combines "debugging" (unnecessarily adding CPU cost in production) + * TODO but it also contains important part of the AF calculation state... why mix both!!!? It seems that the second + * TODO part could be just blend into AFCalculator ... one one hand you want to reduce classes code size ... but these + * TODO two classes code seems to be quite intertwine and makes it difficult to understand what is going on. + * in the production setting without much need + * * Keeps track of the state information during the exact model AF calculation. * - * Tracks things like the MLE and MAP AC values, their corresponding likelhood and posterior + * Tracks things like the MLE and MAP AC values, their corresponding likelihood and posterior * values, the likelihood of the AF == 0 state, and the number of evaluations needed * by the calculation to compute the P(AF == 0) */ @@ -120,7 +126,7 @@ final class StateTracker { * @param maxAltAlleles an integer >= 1 */ public StateTracker(final int maxAltAlleles) { - if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles); + if ( maxAltAlleles < 0 ) throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles); alleleCountsOfMLE = new int[maxAltAlleles]; alleleCountsOfMAP = new int[maxAltAlleles]; @@ -170,7 +176,7 @@ final class StateTracker { } @Ensures("result >= 0") - protected int getnEvaluations() { + protected int getNEvaluations() { return nEvaluations; } @@ -206,7 +212,7 @@ final class StateTracker { * @return an AFCalcResult summarizing the final results of this calculation */ @Requires("allelesUsedInGenotyping != null") - protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) { + protected AFCalculationResult toAFCalculationResult(final double[] log10PriorsByAC) { final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1); final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true); final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true); @@ -218,7 +224,7 @@ final class StateTracker { log10pRefByAllele.put(allele, log10PRef); } - return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele); + return new AFCalculationResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele); } // -------------------------------------------------------------------------------- @@ -351,4 +357,9 @@ final class StateTracker { this.allelesUsedInGenotyping = allelesUsedInGenotyping; } + + public void ensureMaximumAlleleCapacity(final int capacity) { + if (this.alleleCountsOfMAP.length < capacity) + reset(capacity); + } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 86b790307..ee5c59fd8 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -68,6 +68,7 @@ import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.genotyper.*; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; @@ -96,6 +97,7 @@ import org.broadinstitute.gatk.utils.sam.AlignmentUtils; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import java.io.FileNotFoundException; @@ -697,7 +699,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // Seems that at least with some test data we can lose genuine haploid variation if we use // UGs engine with ploidy == 1 simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy); - activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit); + + activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, + FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),simpleUAC,logger), toolkit); activeRegionEvaluationGenotyperEngine.setLogger(logger); if( SCAC.CONTAMINATION_FRACTION_FILE != null ) @@ -707,7 +711,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser(); - genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing); + + genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),SCAC,logger), !doNotRunPhysicalPhasing); // initialize the output VCF header final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); @@ -880,7 +885,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // if we don't have any data, just abort early return new ActivityProfileState(ref.getLocus(), 0.0); - final List noCall = Collections.singletonList(Allele.NO_CALL); // used to noCall all genotypes until the exact model is applied + final int ploidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // used to noCall all genotypes until the exact model is applied final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 7f19861bc..d1a569c8a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -51,8 +51,7 @@ import com.google.java.contract.Requires; import htsjdk.variant.variantcontext.*; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.genotyper.*; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.Utils; @@ -71,7 +70,6 @@ import java.util.*; */ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine { - private final static List NO_CALL = Collections.singletonList(Allele.NO_CALL); private static final int ALLELE_EXTENSION = 2; private static final String phase01 = "0|1"; private static final String phase10 = "1|0"; @@ -91,8 +89,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine getAlleleFrequencyCalculatorThreadLocal() { - return new ThreadLocal() { - - @Override - public AFCalc initialValue() { - return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, - configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger); - } - }; - } - /** * Change the merge variant across haplotypes for this engine. * @@ -226,6 +205,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine calledHaplotypes = new HashSet<>(); final List returnCalls = new ArrayList<>(); + final int ploidy = configuration.genotypeArgs.samplePloidy; + final List noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy); for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region @@ -276,7 +257,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine readLikelihoods, final VariantContext mergedVC ) { + private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods readLikelihoods, final VariantContext mergedVC, final List noCallAlleles ) { final List vcAlleles = mergedVC.getAlleles(); final AlleleList alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); final int sampleCount = samples.sampleCount(); final GenotypesContext result = GenotypesContext.create(sampleCount); for (int s = 0; s < sampleCount; s++) - result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make()); + result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(noCallAlleles).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make()); return result; } @@ -769,23 +750,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { - if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return NO_CALL; } - final List eventAllelesForSample = new ArrayList<>(); - for( final Allele a : haplotypeAllelesForSample ) { - final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); - for( int iii = 0; iii < alleleMapper.size(); iii++ ) { - final List mappedHaplotypes = alleleMapper.get(iii); - if( mappedHaplotypes.contains(haplotype) ) { - eventAllelesForSample.add(eventAlleles.get(iii)); - break; - } - } - } - return eventAllelesForSample; - } - @Deprecated protected static Map generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) { return new EventMap(haplotype, ref, refLoc, sourceNameToAdd); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java index f78519f3f..8e67691be 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java @@ -48,6 +48,7 @@ package org.broadinstitute.gatk.tools.walkers.validation; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.walkers.*; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; @@ -352,12 +353,15 @@ public class GenotypeAndValidate extends RodWalker sm, double refLik) { super(sm); @@ -78,9 +80,9 @@ public class GLBasedSampleSelector extends SampleSelector { // do we want to apply a prior? maybe user-spec? if ( flatPriors == null ) { flatPriors = new double[1+2*samples.size()]; - AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2); + afCalculator = AFCalculatorFactory.createCalculator(samples.size(), MAX_ALT_ALLELES, HomoSapiensConstants.DEFAULT_PLOIDY); } - final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); + final AFCalculationResult result = afCalculator.getLog10PNonRef(subContext, HomoSapiensConstants.DEFAULT_PLOIDY, MAX_ALT_ALLELES, flatPriors); // do we want to let this qual go up or down? if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) { return true; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java index 80f5afa25..c54cce704 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java @@ -63,11 +63,11 @@ import org.broadinstitute.gatk.engine.walkers.Window; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.genotyper.*; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.commandline.*; -import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; @@ -166,7 +166,8 @@ public class GenotypeGVCFs extends RodWalker vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants); final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); // create the genotyping engine - genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode); + genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger), + toolkit.getArguments().BAQMode); // create the annotation engine annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.emptyList(), this, toolkit); @@ -182,6 +183,8 @@ public class GenotypeGVCFs extends RodWalker sampleNameSet = SampleListUtils.asSet(samples); final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet); vcfWriter.writeHeader(vcfHeader); + + logger.info("Notice that the -ploidy parameter is ignored in " + getClass().getSimpleName() + " tool as this is automatically determined by the input variant files"); } public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { @@ -192,23 +195,9 @@ public class GenotypeGVCFs extends RodWalker implements T final SampleList samples = new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName))); final Set sampleNameSet = SampleListUtils.asSet(samples); - UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode); + UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(), + FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,UAC,logger), + toolkit.getArguments().BAQMode); final Set hInfo = new HashSet(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java index 77c30eb19..62cafb898 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java @@ -56,6 +56,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.Utils; @@ -78,7 +79,8 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest { final UnifiedArgumentCollection args = new UnifiedArgumentCollection(); final SampleList fakeSamples = SampleListUtils.singletonList("fake"); - ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),engine.getArguments().BAQMode); + ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(), + new FixedAFCalculatorProvider(args,null,true),engine.getArguments().BAQMode); } private UnifiedGenotypingEngine getEngine() { diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 270d2c3c9..231e1512c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,16 +69,16 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "39cccae4a26c64e09631e28f17db2792"); + executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c731cd0ddb0fa153bffa7bc19586bfa6"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5e4013183ca26b0d7c48ea2ec147d9d7"); + executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "b6ed1729a25670d81aff61099fb13998"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "af69cf83a27d082d76b18fede7ac2699"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "54d718401c413fdbc1f801061bf8e7d1"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 5326eb5b2..54282b05c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","68a2aa0b213fa5a53e925542e340fbe8"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1426e311501a7bd96ecb75ed839f7c64"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index c7b510301..c1d43f328 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -135,7 +135,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("8084abd2460e934e483403aff76a3698")); + Arrays.asList("34a92dd832bbb8ed53abf21ba88c6faa")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 7fb019f10..ae34ca172 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -65,7 +65,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("9824ea30340fd3632bad672fc6ade114")); + Arrays.asList("a15a28d854789c71ef2879dd4c606b1a")); executeTest("test MultiSample Pilot1", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("2474c60302fc79f28bb0c721d305c09b")); + Arrays.asList("1a36b5c036e0452f526dc1a5fdd60929")); executeTest("test Multiple SNP alleles", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java similarity index 84% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcUnitTest.java rename to protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java index 0bbc0ecfa..b36b2334b 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java @@ -46,13 +46,14 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; +import htsjdk.variant.variantcontext.*; import org.apache.commons.lang.ArrayUtils; -import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotypingEngine; +import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.QualityUtils; import org.broadinstitute.gatk.utils.Utils; -import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; @@ -61,7 +62,7 @@ import org.testng.annotations.Test; import java.util.*; -public class AFCalcUnitTest extends BaseTest { +public class AFCalculationUnitTest extends BaseTest { static Allele A = Allele.create("A", true); static Allele C = Allele.create("C"); static Allele G = Allele.create("G"); @@ -76,6 +77,19 @@ public class AFCalcUnitTest extends BaseTest { final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug final private static boolean DEBUG_ONLY = false; + protected static List createAFCalculators(final List calcs, final int maxAltAlleles, final int ploidy) { + final List AFCalculators = new LinkedList<>(); + + for ( final AFCalculatorImplementation calc : calcs ) { + if (calc.usableForParams(ploidy,maxAltAlleles)) + AFCalculators.add(calc.newInstance()); + else + throw new IllegalStateException("cannot use " + calc + " calculator instance with combination " + maxAltAlleles + " " + ploidy); + } + + return AFCalculators; + } + @BeforeSuite public void before() { AA1 = makePL(Arrays.asList(A, A), 0, 20, 20); @@ -102,14 +116,14 @@ public class AFCalcUnitTest extends BaseTest { private class GetGLsTest extends TestDataProvider { GenotypesContext GLs; int numAltAlleles; - final AFCalc calc; + final AFCalculator calc; final int[] expectedACs; final double[] priors; final String priorName; - private GetGLsTest(final AFCalc calc, int numAltAlleles, List arg, final double[] priors, final String priorName) { + private GetGLsTest(final AFCalculator calc, int numAltAlleles, List arg, final double[] priors, final String priorName) { super(GetGLsTest.class); - GLs = GenotypesContext.create(new ArrayList(arg)); + GLs = GenotypesContext.create(new ArrayList<>(arg)); this.numAltAlleles = numAltAlleles; this.calc = calc; this.priors = priors; @@ -125,20 +139,20 @@ public class AFCalcUnitTest extends BaseTest { } } - public AFCalcResult execute() { - return getCalc().getLog10PNonRef(getVC(), getPriors()); + public AFCalculationResult execute() { + return getCalc().getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors()); } - public AFCalcResult executeRef() { - final AFCalc ref = AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_REFERENCE, getCalc().nSamples, getCalc().getMaxAltAlleles()); - return ref.getLog10PNonRef(getVC(), getPriors()); + public AFCalculationResult executeRef() { + final AFCalculator ref = AFCalculatorImplementation.EXACT_REFERENCE.newInstance(); + return ref.getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors()); } public double[] getPriors() { return priors; } - public AFCalc getCalc() { + public AFCalculator getCalc() { return calc; } @@ -165,13 +179,18 @@ public class AFCalcUnitTest extends BaseTest { } } + + private static final int MAX_ALT_ALLELES = 2; + private static final int PLOIDY = 2; + + @DataProvider(name = "wellFormedGLs") public Object[][] createSimpleGLsData() { final List biAllelicSamples = Arrays.asList(AA1, AB1, BB1); final List triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2); for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) { - List calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2); + List calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY); final int nPriorValues = 2*nSamples+1; final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors @@ -179,7 +198,7 @@ public class AFCalcUnitTest extends BaseTest { UnifiedGenotypingEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, new ArrayList()); for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) { - for ( AFCalc model : calcs ) { + for ( AFCalculator model : calcs ) { final String priorName = priors == humanPriors ? "human" : "flat"; // bi-allelic @@ -188,7 +207,7 @@ public class AFCalcUnitTest extends BaseTest { new GetGLsTest(model, 1, genotypes, priors, priorName); // tri-allelic - if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || Guillermo_FIXME ) && ! ( model instanceof OriginalDiploidExactAFCalc) ) // || model != generalCalc ) ) + if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || Guillermo_FIXME ) && ! ( model instanceof OriginalDiploidExactAFCalculator) ) // || model != generalCalc ) ) for ( List genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) ) new GetGLsTest(model, 2, genotypes, priors, priorName); } @@ -261,12 +280,12 @@ public class AFCalcUnitTest extends BaseTest { samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative)); final int nSamples = samples.size(); - List calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2); + List calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY); final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors - for ( AFCalc model : calcs ) { - if ( testData.nAltAlleles > 1 && model instanceof OriginalDiploidExactAFCalc ) + for ( AFCalculator model : calcs ) { + if ( testData.nAltAlleles > 1 && model instanceof OriginalDiploidExactAFCalculator) continue; final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); @@ -285,19 +304,22 @@ public class AFCalcUnitTest extends BaseTest { @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "GLsWithNonInformative", dependsOnMethods = {"testBiallelicGLs", "testTriallelicGLs"}) public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) { - final AFCalcResult expected = onlyInformative.execute(); - final AFCalcResult actual = withNonInformative.execute(); + final AFCalculationResult expected = onlyInformative.execute(); + final AFCalculationResult actual = withNonInformative.execute(); testResultSimple(withNonInformative); - compareAFCalcResults(actual, expected, onlyInformative.getCalc(), true); + compareAFCalcResults(actual, expected, onlyInformative.getCalc(), onlyInformative.numAltAlleles, true); } private void testResultSimple(final GetGLsTest cfg) { - final AFCalcResult refResultTracker = cfg.executeRef(); - final AFCalcResult resultTracker = cfg.execute(); - - compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc(), true); - + final AFCalculationResult refResultTracker = cfg.executeRef(); + final AFCalculationResult resultTracker = cfg.execute(); + try { + compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc(), cfg.numAltAlleles, true); + } catch (Throwable t) { + cfg.execute(); + throw new RuntimeException(t); + } Assert.assertNotNull(resultTracker.getAllelesUsedInGenotyping()); Assert.assertTrue(cfg.getAlleles().containsAll(resultTracker.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list"); @@ -310,9 +332,9 @@ public class AFCalcUnitTest extends BaseTest { } } - private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected, final AFCalc calc, final boolean onlyPosteriorsShouldBeEqual) { + private void compareAFCalcResults(final AFCalculationResult actual, final AFCalculationResult expected, final AFCalculator calc, final int maxAltAlleles, final boolean onlyPosteriorsShouldBeEqual) { // note we cannot really test the multi-allelic case because we actually meaningfully differ among the models here - final double TOLERANCE = calc.getMaxAltAlleles() > 1 ? 1000 : 0.1; // much tighter constraints on bi-allelic results + final double TOLERANCE = maxAltAlleles > 1 ? 1000 : 0.1; // much tighter constraints on bi-allelic results if ( ! onlyPosteriorsShouldBeEqual ) { Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE, "Priors AF == 0"); @@ -322,7 +344,7 @@ public class AFCalcUnitTest extends BaseTest { } Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE, "Posteriors AF == 0"); Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE, "Posteriors AF > 0"); - Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE(), "MLE ACs"); + Assert.assertTrue(Arrays.equals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()), "MLE ACs "); Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping(), "Alleles used in genotyping"); for ( final Allele a : expected.getAllelesUsedInGenotyping() ) { @@ -337,23 +359,23 @@ public class AFCalcUnitTest extends BaseTest { } @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models") - public void testLargeGLs(final ExactAFCalc calc) { + public void testLargeGLs(final ExactAFCalculator calc) { final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0); GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResult resultTracker = cfg.execute(); + final AFCalculationResult resultTracker = cfg.execute(); int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0]; Assert.assertEquals(calculatedAlleleCount, 6); } @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models") - public void testMismatchedGLs(final ExactAFCalc calc) { + public void testMismatchedGLs(final ExactAFCalculator calc) { final Genotype AB = makePL(Arrays.asList(A, C), 2000, 0, 2000, 2000, 2000, 2000); final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100); GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat"); - final AFCalcResult resultTracker = cfg.execute(); + final AFCalculationResult resultTracker = cfg.execute(); Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1); Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1); @@ -369,14 +391,14 @@ public class AFCalcUnitTest extends BaseTest { final Genotype g; final double pNonRef, tolerance; final boolean canScale; - final List badModels; + final List badModels; final VariantContext vc; private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale) { - this(vc, g, pNonRef, tolerance, canScale, Collections.emptyList()); + this(vc, g, pNonRef, tolerance, canScale, Collections.emptyList()); } - private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List badModels) { + private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List badModels) { this.g = g; this.pNonRef = pNonRef; this.tolerance = tolerance; @@ -411,7 +433,7 @@ public class AFCalcUnitTest extends BaseTest { final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); - final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat; + final AFCalculatorTestBuilder.PriorType priorType = AFCalculatorTestBuilder.PriorType.flat; final double TOLERANCE = 0.5; @@ -433,7 +455,7 @@ public class AFCalcUnitTest extends BaseTest { new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false) ); - for ( AFCalcFactory.Calculation modelType : Arrays.asList(AFCalcFactory.Calculation.EXACT_REFERENCE, AFCalcFactory.Calculation.EXACT_INDEPENDENT) ) { + for ( AFCalculatorImplementation modelType : Arrays.asList(AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorImplementation.EXACT_INDEPENDENT) ) { for ( int nNonInformative = 0; nNonInformative < 3; nNonInformative++ ) { for ( final PNonRefData rootData : initialPNonRefData ) { for ( int plScale = 1; plScale <= 100000; plScale *= 10 ) { @@ -451,19 +473,19 @@ public class AFCalcUnitTest extends BaseTest { @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRef") private void testPNonRef(final VariantContext vcRoot, - AFCalcFactory.Calculation modelType, - AFCalcTestBuilder.PriorType priorType, + AFCalculatorImplementation modelType, + AFCalculatorTestBuilder.PriorType priorType, final List genotypes, final double expectedPNonRef, final double tolerance, final int nNonInformative) { - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(1, vcRoot.getNAlleles()-1, modelType, priorType); + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(1, vcRoot.getNAlleles()-1, modelType, priorType); final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot); vcb.genotypes(genotypes); - final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); + final AFCalculationResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance, "Actual pNonRef not within tolerance " + tolerance + " of expected"); @@ -476,12 +498,12 @@ public class AFCalcUnitTest extends BaseTest { final List bigNonRefPLs = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 100, 1000); final List> bigDiploidPLs = removeBadPLs(Utils.makePermutations(bigNonRefPLs, 3, true)); - for ( AFCalcFactory.Calculation modelType : AFCalcFactory.Calculation.values() ) { + for ( AFCalculatorImplementation modelType : AFCalculatorImplementation.values() ) { if ( false ) { // for testing only tests.add(new Object[]{modelType, toGenotypes(Arrays.asList(Arrays.asList(0,100,0)))}); } else { - if ( modelType == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) continue; // TODO -- GENERAL_PLOIDY DOESN'T WORK + if ( modelType == AFCalculatorImplementation.EXACT_GENERAL_PLOIDY ) continue; // TODO -- GENERAL_PLOIDY DOESN'T WORK // test all combinations of PLs for 1 sample for ( final List> PLsPerSample : Utils.makePermutations(bigDiploidPLs, 1, true) ) { @@ -539,16 +561,16 @@ public class AFCalcUnitTest extends BaseTest { } @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRefBiallelicSystematic") - private void PNonRefBiallelicSystematic(AFCalcFactory.Calculation modelType, final List genotypes) { + private void PNonRefBiallelicSystematic(AFCalculatorImplementation modelType, final List genotypes) { //logger.warn("Running " + modelType + " with " + genotypes); - final AFCalcTestBuilder refBuilder = new AFCalcTestBuilder(genotypes.size(), 1, AFCalcFactory.Calculation.EXACT_REFERENCE, AFCalcTestBuilder.PriorType.human); - final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(genotypes.size(), 1, modelType, AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder refBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, modelType, AFCalculatorTestBuilder.PriorType.human); final VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, Arrays.asList(A, C)); vcb.genotypes(genotypes); - final AFCalcResult refResult = refBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); - final AFCalcResult testResult = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors()); + final AFCalculationResult refResult = refBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); + final AFCalculationResult testResult = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); final double tolerance = 1e-3; Assert.assertEquals(testResult.getLog10PosteriorOfAFGT0(), refResult.getLog10PosteriorOfAFGT0(), tolerance, @@ -567,9 +589,9 @@ public class AFCalcUnitTest extends BaseTest { public Object[][] makeModels() { List tests = new ArrayList(); - for ( final AFCalcFactory.Calculation calc : AFCalcFactory.Calculation.values() ) { + for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) { if ( calc.usableForParams(2, 4) ) - tests.add(new Object[]{AFCalcFactory.createAFCalc(calc, 2, 4)}); + tests.add(new Object[]{AFCalculatorFactory.createCalculatorForDiploidAnalysis()}); } return tests.toArray(new Object[][]{}); @@ -577,7 +599,7 @@ public class AFCalcUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "Models") - public void testNoPrior(final AFCalc model) { + public void testNoPrior(final AFCalculator model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); @@ -592,8 +614,8 @@ public class AFCalcUnitTest extends BaseTest { GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior"); GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "noPrior"); - final AFCalcResult resultTrackerFlat = cfgFlatPrior.execute(); - final AFCalcResult resultTrackerNoPrior = cfgNoPrior.execute(); + final AFCalculationResult resultTrackerFlat = cfgFlatPrior.execute(); + final AFCalculationResult resultTrackerNoPrior = cfgNoPrior.execute(); final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0]; final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5); @@ -609,7 +631,7 @@ public class AFCalcUnitTest extends BaseTest { } @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") - public void testBiallelicPriors(final AFCalc model) { + public void testBiallelicPriors(final AFCalculator model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); @@ -620,7 +642,7 @@ public class AFCalcUnitTest extends BaseTest { final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true); if ( ! Double.isInfinite(priors[1]) ) { GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResult resultTracker = cfg.execute(); + final AFCalculationResult resultTracker = cfg.execute(); final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; @@ -656,14 +678,14 @@ public class AFCalcUnitTest extends BaseTest { List tests = new ArrayList(); // list of all high-quality models in the system - final List models = Arrays.asList( - AFCalcFactory.Calculation.getDefaultModel(), - AFCalcFactory.Calculation.EXACT_REFERENCE, - AFCalcFactory.Calculation.EXACT_INDEPENDENT); + final List models = Arrays.asList( + AFCalculatorImplementation.DEFAULT, + AFCalculatorImplementation.EXACT_REFERENCE, + AFCalculatorImplementation.EXACT_INDEPENDENT); // note that we cannot use small PLs here or the thresholds are hard to set for ( final int nonTypePLs : Arrays.asList(100, 1000) ) { - for ( final AFCalcFactory.Calculation model : models ) { + for ( final AFCalculatorImplementation model : models ) { for ( final int allele1AC : Arrays.asList(0, 1, 2, 10, 100, 1000, 10000) ) { for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) { // for ( final int nonTypePLs : Arrays.asList(10) ) { @@ -678,8 +700,8 @@ public class AFCalcUnitTest extends BaseTest { // bi-allelic tests { - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, 1, model, AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(nSamples, 1, model, AFCalculatorTestBuilder.PriorType.human); final List ACs = Arrays.asList(allele1AC); tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1)}); } @@ -689,8 +711,8 @@ public class AFCalcUnitTest extends BaseTest { if ( nSamples < allele2AC || allele1AC + allele2AC > nSamples || nSamples > 100 || nSamples == 1) continue; - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, 2, model, AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(nSamples, 2, model, AFCalculatorTestBuilder.PriorType.human); final List ACs = Arrays.asList(allele1AC, allele2AC); final boolean poly2 = allele2AC > errorFreq && (nonTypePLs * allele2AC) > 90; tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1, poly2)}); @@ -704,7 +726,7 @@ public class AFCalcUnitTest extends BaseTest { } @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProvider") - public void testCallingGeneral(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { + public void testCallingGeneral(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { testCalling(testBuilder, ACs, nonTypePL, expectedPoly); } @@ -713,13 +735,13 @@ public class AFCalcUnitTest extends BaseTest { List tests = new ArrayList(); // list of all high-quality models in the system - final List models = Arrays.asList(AFCalcFactory.Calculation.EXACT_INDEPENDENT); + final List models = Arrays.asList(AFCalculatorImplementation.EXACT_INDEPENDENT); final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 20); final int nonTypePLs = 1000; final int nAlleles = 4; - for ( final AFCalcFactory.Calculation model : models ) { + for ( final AFCalculatorImplementation model : models ) { for ( final List ACs : Utils.makePermutations(alleleCounts, nAlleles, true) ) { final List isPoly = new ArrayList(ACs.size()); for ( final int ac : ACs ) isPoly.add(ac > 0); @@ -727,8 +749,8 @@ public class AFCalcUnitTest extends BaseTest { final double acSum = MathUtils.sum(ACs); for ( final int nSamples : Arrays.asList(1, 10, 100) ) { if ( nSamples < acSum ) continue; - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, nAlleles, model, AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(nSamples, nAlleles, model, AFCalculatorTestBuilder.PriorType.human); tests.add(new Object[]{testBuilder, ACs, nonTypePLs, isPoly}); } } @@ -738,15 +760,15 @@ public class AFCalcUnitTest extends BaseTest { } @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProviderLotsOfAlleles") - public void testCallingLotsOfAlleles(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { + public void testCallingLotsOfAlleles(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { testCalling(testBuilder, ACs, nonTypePL, expectedPoly); } - private void testCalling(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly) { - final AFCalc calc = testBuilder.makeModel(); + private void testCalling(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly) { + final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); - final AFCalcResult result = calc.getLog10PNonRef(vc, priors); + final AFCalculationResult result = calc.getLog10PNonRef(vc, PLOIDY, testBuilder.numAltAlleles, priors); boolean anyPoly = false; for ( final boolean onePoly : expectedPoly ) anyPoly = anyPoly || onePoly; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceUnitTest.java similarity index 89% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java rename to protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceUnitTest.java index bdbc475a6..1714b5fa9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorPerformanceUnitTest.java @@ -52,6 +52,7 @@ import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.collections.Pair; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -60,18 +61,18 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -public class AFCalcPerformanceUnitTest extends BaseTest { +public class AFCalculatorPerformanceUnitTest extends BaseTest { @DataProvider(name = "ScalingTests") public Object[][] makepolyTestProviderLotsOfAlleles() { List tests = new ArrayList(); // list of all high-quality models in the system - final List biAllelicModels = Arrays.asList( - AFCalcFactory.Calculation.EXACT_INDEPENDENT, - AFCalcFactory.Calculation.EXACT_REFERENCE); + final List biAllelicModels = Arrays.asList( + AFCalculatorImplementation.EXACT_INDEPENDENT, + AFCalculatorImplementation.EXACT_REFERENCE); - final List multiAllelicModels = Arrays.asList( - AFCalcFactory.Calculation.EXACT_INDEPENDENT); + final List multiAllelicModels = Arrays.asList( + AFCalculatorImplementation.EXACT_INDEPENDENT); // for ( final int nonTypePLs : Arrays.asList(100) ) { // for ( final int nSamples : Arrays.asList(10000) ) { @@ -81,12 +82,12 @@ public class AFCalcPerformanceUnitTest extends BaseTest { for ( final int nSamples : Arrays.asList(100, 1000) ) { final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500); for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) { - final List models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels; - for ( final AFCalcFactory.Calculation model : models ) { + final List models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels; + for ( final AFCalculatorImplementation model : models ) { for ( final List ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) { if ( MathUtils.sum(ACs) < nSamples * 2 ) { - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human); + final AFCalculatorTestBuilder testBuilder + = new AFCalculatorTestBuilder(nSamples, nAltAlleles, model, AFCalculatorTestBuilder.PriorType.human); tests.add(new Object[]{testBuilder, ACs, nonTypePLs}); } } @@ -98,7 +99,7 @@ public class AFCalcPerformanceUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - private Pair estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { + private Pair estNumberOfEvaluations(final AFCalculatorTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { final int evalOverhead = 2; // 2 final int maxEvalsPerSamplePerAC = 3; @@ -114,11 +115,11 @@ public class AFCalcPerformanceUnitTest extends BaseTest { } @Test(dataProvider = "ScalingTests") - private void testScaling(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL) { - final AFCalc calc = testBuilder.makeModel(); + private void testScaling(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL) { + final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); - final AFCalcResult result = calc.getLog10PNonRef(vc, priors); + final AFCalculationResult result = calc.getLog10PNonRef(vc, HomoSapiensConstants.DEFAULT_PLOIDY, testBuilder.numAltAlleles, priors); final Pair expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL); final int minEvals = expectedNEvaluation.getFirst(); final int maxEvals = expectedNEvaluation.getSecond(); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorResultUnitTest.java similarity index 96% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResultUnitTest.java rename to protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorResultUnitTest.java index 644df8ea6..178f8cc9c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalcResultUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/AFCalculatorResultUnitTest.java @@ -57,7 +57,7 @@ import org.testng.annotations.Test; import java.util.*; -public class AFCalcResultUnitTest extends BaseTest { +public class AFCalculatorResultUnitTest extends BaseTest { private static class MyTest { final double[] Ls, expectedPosteriors; @@ -115,7 +115,7 @@ public class AFCalcResultUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestComputePosteriors") private void testComputingPosteriors(final MyTest data) { - final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); + final AFCalculationResult result = new AFCalculationResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected"); Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected"); @@ -145,8 +145,8 @@ public class AFCalcResultUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - private AFCalcResult makePolymorphicTestData(final double pNonRef) { - return new AFCalcResult( + private AFCalculationResult makePolymorphicTestData(final double pNonRef) { + return new AFCalculationResult( new int[]{0}, 1, alleles, @@ -157,7 +157,7 @@ public class AFCalcResultUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestIsPolymorphic") private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { - final AFCalcResult result = makePolymorphicTestData(pNonRef); + final AFCalculationResult result = makePolymorphicTestData(pNonRef); final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(1 - pThreshold)); Assert.assertEquals(actualIsPoly, shouldBePoly, "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned " @@ -166,7 +166,7 @@ public class AFCalcResultUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestIsPolymorphic") private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { - final AFCalcResult result = makePolymorphicTestData(pNonRef); + final AFCalculationResult result = makePolymorphicTestData(pNonRef); final double qual = QualityUtils.phredScaleCorrectRate(pThreshold); final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual); Assert.assertEquals(actualIsPoly, shouldBePoly, diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java new file mode 100644 index 000000000..fd137940e --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java @@ -0,0 +1,166 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class FixedAFCalculatorProviderUnitTest { + + @Test(dataProvider="nonThreadSafeConstructorsData") + public void testNonThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final AFCalculatorImplementation preferred) { + final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); + args.MAX_ALTERNATE_ALLELES = maxAltAlleles; + args.samplePloidy = ploidy; + final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); + if (preferred != null ) callerArgs.requestedAlleleFrequencyCalculationModel = preferred; + callerArgs.genotypeArgs = args; + final FixedAFCalculatorProvider providerCallerArgs = new FixedAFCalculatorProvider(callerArgs,null,true); + final FixedAFCalculatorProvider providerGenotypingArgs = new FixedAFCalculatorProvider(args,null,true); + + Assert.assertNotNull(providerCallerArgs.getInstance(ploidy,maxAltAlleles)); + Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy,maxAltAlleles).getClass()).usableForParams(ploidy,maxAltAlleles)); + Assert.assertNotNull(providerGenotypingArgs.getInstance(ploidy,maxAltAlleles)); + Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerGenotypingArgs.getInstance(ploidy,maxAltAlleles).getClass()).usableForParams(ploidy,maxAltAlleles)); + + if (preferred != null && preferred.usableForParams(ploidy,maxAltAlleles)) { + Assert.assertEquals(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy, maxAltAlleles).getClass()), preferred); + } + } + + @Test(dataProvider="threadSafeFactoryData") + public void testThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final AFCalculatorImplementation impl, final int cpuThreadCount, final int dataThreadCount) { + final GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine(); + final GATKArgumentCollection gatkArguments = new GATKArgumentCollection(); + gatkArguments.numberOfCPUThreadsPerDataThread =cpuThreadCount; + gatkArguments.numberOfDataThreads = dataThreadCount; + toolkit.setArguments(gatkArguments); + final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); + if (impl != null) callerArgs.requestedAlleleFrequencyCalculationModel = impl; + final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); + callerArgs.genotypeArgs = genotypeArgs; + genotypeArgs.samplePloidy = ploidy; + genotypeArgs.MAX_ALTERNATE_ALLELES = maxAltAlleles; + final AFCalculatorProvider provider = FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,callerArgs,null); + final Hashtable perThreadProvider = new Hashtable(cpuThreadCount * dataThreadCount); + final List threads = new ArrayList<>(); + // execute different threads. + for (int i = 0; i < cpuThreadCount; i++) + for (int j = 0; j < cpuThreadCount; j++) { + final Thread thread = new Thread(new Runnable() { + @Override + public void run() { + synchronized (perThreadProvider) { + perThreadProvider.put(Thread.currentThread(), provider.getInstance(ploidy, maxAltAlleles)); + } + } + }); + thread.start(); + threads.add(thread); + } + // wait all threads to have finished. + for (final Thread thread : threads) + try { + thread.join(); + } catch (InterruptedException e) { + Assert.fail(); + } + // check that each thread gave a different calculator. + final Set calculators = new HashSet<>(perThreadProvider.values()); + Assert.assertEquals(calculators.size(),threads.size()); + } + + private final static int[] PLOIDIES = new int[] { 1,2,3,4,10 }; + private final static int[] MAX_ALT_ALLELES = new int[] { 1,2,3,4,10}; + private final static int[] CPU_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10}; + private final static int[] DATA_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10}; + + @DataProvider(name="nonThreadSafeConstructorsData") + public Object[][] nonThreadSafeConstructorsData() { + final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length * (AFCalculatorImplementation.values().length + 1)][]; + int idx = 0; + for (int i = 0; i < PLOIDIES.length; i++) { + for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { + result[idx++] = new Object[] { PLOIDIES[i], MAX_ALT_ALLELES[j], null }; + for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()) { + result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], impl}; + } + } + } + return result; + } + + @DataProvider(name="threadSafeFactoryData") + public Object[][] threadSafeFactoryData() { + final Object[][] result = new Object[DATA_THREAD_COUNT.length * CPU_THREAD_COUNT.length * PLOIDIES.length * MAX_ALT_ALLELES.length * (AFCalculatorImplementation.values().length + 1)][]; + int idx = 0; + for (int i = 0; i < PLOIDIES.length; i++) { + for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { + for (int k = 0; k < CPU_THREAD_COUNT.length; k++) { + for (int l = 0; l < DATA_THREAD_COUNT.length; l++) { + result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], null, CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]}; + for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()) { + result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], impl, CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]}; + } + } + } + } + } + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java index f2658a4c5..fd75cd551 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java @@ -186,12 +186,13 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest { final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size()); double[] priors = new double[len]; // flat priors - final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, cfg.ploidy); - calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors); + final GeneralPloidyExactAFCalculator calc = new GeneralPloidyExactAFCalculator(); + calc.combineSinglePools(cfg.GLs, cfg.ploidy,cfg.numAltAlleles + 1, priors); int nameIndex = 1; + for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { - int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); - int calculatedAlleleCount = calc.getStateTracker().getAlleleCountsOfMAP()[allele]; + int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex + 1)); + int calculatedAlleleCount = calc.getAltAlleleCountOfMAP(allele); Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java new file mode 100644 index 000000000..4214f408a --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java @@ -0,0 +1,162 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Tests {@link GeneralPloidyFailOverAFCalculatorProvider} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class GeneralPloidyFailOverAFCalculatorProviderUnitTest { + + @Test(dataProvider="nonThreadSafeConstructorsData") + public void testNonThreadSafeConstructors(final int ploidy, final int maxAltAlleles) { + final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); + args.MAX_ALTERNATE_ALLELES = maxAltAlleles; + args.samplePloidy = ploidy; + + final GeneralPloidyFailOverAFCalculatorProvider provider = new GeneralPloidyFailOverAFCalculatorProvider(args,null); + + final AFCalculator calculator = provider.getInstance(ploidy,maxAltAlleles); + Assert.assertNotNull(calculator); + final AFCalculatorImplementation implementation = AFCalculatorImplementation.fromCalculatorClass(calculator.getClass()); + Assert.assertTrue(implementation.usableForParams(ploidy,maxAltAlleles)); + for (int i = 0; i < PLOIDIES.length; i++) { + for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { + if (implementation.usableForParams(PLOIDIES[i],MAX_ALT_ALLELES[j])) + Assert.assertSame(provider.getInstance(PLOIDIES[i],MAX_ALT_ALLELES[j]),calculator); + else { + final AFCalculator failOver = provider.getInstance(PLOIDIES[i],MAX_ALT_ALLELES[j]); + Assert.assertNotNull(failOver); + final AFCalculatorImplementation failOverImplementation = AFCalculatorImplementation.fromCalculatorClass(failOver.getClass()); + Assert.assertTrue(failOverImplementation.usableForParams(PLOIDIES[i],MAX_ALT_ALLELES[j])); + Assert.assertEquals(failOverImplementation, AFCalculatorImplementation.EXACT_GENERAL_PLOIDY); + } + } + } + } + + @Test(dataProvider="threadSafeFactoryData") + public void testThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final int cpuThreadCount, final int dataThreadCount) { + final GenomeAnalysisEngine toolkit = new GenomeAnalysisEngine(); + final GATKArgumentCollection gatkArguments = new GATKArgumentCollection(); + gatkArguments.numberOfCPUThreadsPerDataThread =cpuThreadCount; + gatkArguments.numberOfDataThreads = dataThreadCount; + toolkit.setArguments(gatkArguments); + final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); + genotypeArgs.samplePloidy = ploidy; + genotypeArgs.MAX_ALTERNATE_ALLELES = maxAltAlleles; + final AFCalculatorProvider provider = GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit,genotypeArgs,null); + final Hashtable perThreadProvider = new Hashtable(cpuThreadCount * dataThreadCount); + final List threads = new ArrayList<>(); + // execute different threads. + for (int i = 0; i < cpuThreadCount; i++) + for (int j = 0; j < cpuThreadCount; j++) { + final Thread thread = new Thread(new Runnable() { + @Override + public void run() { + synchronized (perThreadProvider) { + perThreadProvider.put(Thread.currentThread(), provider.getInstance(ploidy, maxAltAlleles)); + } + } + }); + thread.start(); + threads.add(thread); + } + // wait all threads to have finished. + for (final Thread thread : threads) + try { + thread.join(); + } catch (InterruptedException e) { + Assert.fail(); + } + // check that each thread gave a different calculator. + final Set calculators = new HashSet<>(perThreadProvider.values()); + Assert.assertEquals(calculators.size(),threads.size()); + } + + private final static int[] PLOIDIES = new int[] { AFCalculatorImplementation.UNBOUND_PLOIDY,1,2,3,4,10 }; + private final static int[] MAX_ALT_ALLELES = new int[] { AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,1,2,3,4,10}; + private final static int[] CPU_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10}; + private final static int[] DATA_THREAD_COUNT = new int[] { 1, 2, 3, 4, 10}; + + @DataProvider(name="nonThreadSafeConstructorsData") + public Object[][] nonThreadSafeConstructorsData() { + final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length][]; + int idx = 0; + for (int i = 0; i < PLOIDIES.length; i++) { + for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { + result[idx++] = new Object[] { PLOIDIES[i], MAX_ALT_ALLELES[j]}; + } + } + return result; + } + + @DataProvider(name="threadSafeFactoryData") + public Object[][] threadSafeFactoryData() { + final Object[][] result = new Object[DATA_THREAD_COUNT.length * CPU_THREAD_COUNT.length * PLOIDIES.length * MAX_ALT_ALLELES.length][]; + int idx = 0; + for (int i = 0; i < PLOIDIES.length; i++) { + for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { + for (int k = 0; k < CPU_THREAD_COUNT.length; k++) { + for (int l = 0; l < DATA_THREAD_COUNT.length; l++) { + result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], CPU_THREAD_COUNT[k], DATA_THREAD_COUNT[l]}; + } + } + } + } + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java similarity index 92% rename from protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java rename to protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java index 756b6dce3..03b7b08c7 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java @@ -61,7 +61,7 @@ import java.util.*; // SEE private/R/pls.R if you want the truth output for these tests -public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { +public class IndependentAllelesDiploidExactAFCalculatorUnitTest extends BaseTest { @DataProvider(name = "TestCombineGLs") public Object[][] makeTestCombineGLs() { List tests = new ArrayList(); @@ -102,12 +102,12 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { } private Genotype makePL(final int ... PLs) { - return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs); + return AFCalculationUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs); } @Test(enabled = true, dataProvider = "TestCombineGLs") public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); + final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance(); final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts); Assert.assertEquals(combined.getPL(), expected.getPL(), @@ -116,7 +116,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestCombineGLs") public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); + final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance(); final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts); Assert.assertEquals(combined.getPL(), expected.getPL(), @@ -156,7 +156,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts") private void testMakeAlleleConditionalContexts(final VariantContext vc, final List expectedVCs) { - final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); + final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance(); final List biAllelicVCs = calc.makeAlleleConditionalContexts(vc); Assert.assertEquals(biAllelicVCs.size(), expectedVCs.size()); @@ -197,24 +197,24 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { final double log10pNonRef = Math.log10(1-pRef); - final List originalPriors = new LinkedList(); + final List originalPriors = new LinkedList(); final List pNonRefN = new LinkedList(); for ( int i = 0; i < log10LAlleles.size(); i++ ) { final double log10LAllele1 = log10LAlleles.get(i); final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true); - final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0)); + final AFCalculationResult result1 = new AFCalculationResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0)); originalPriors.add(result1); pNonRefN.add(log10pNonRef*(i+1)); } - final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 2); - final List thetaNPriors = calc.applyMultiAllelicPriors(originalPriors); + final IndependentAllelesDiploidExactAFCalculator calc = (IndependentAllelesDiploidExactAFCalculator) AFCalculatorImplementation.EXACT_INDEPENDENT.newInstance(); + final List thetaNPriors = calc.applyMultiAllelicPriors(originalPriors); double prevPosterior = 0.0; for ( int i = 0; i < log10LAlleles.size(); i++ ) { - final AFCalcResult thetaN = thetaNPriors.get(i); - AFCalcResult orig = null; - for ( final AFCalcResult x : originalPriors ) + final AFCalculationResult thetaN = thetaNPriors.get(i); + AFCalculationResult orig = null; + for ( final AFCalculationResult x : originalPriors ) if ( x.getAllelesUsedInGenotyping().equals(thetaN.getAllelesUsedInGenotyping())) orig = x; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AFPriorProviderUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AFPriorProviderUnitTest.java new file mode 100644 index 000000000..97854e9e2 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AFPriorProviderUnitTest.java @@ -0,0 +1,143 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.tools.walkers.haplotypecaller; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.tools.walkers.genotyper.AFPriorProvider; +import org.broadinstitute.gatk.tools.walkers.genotyper.CustomAFPriorProvider; +import org.broadinstitute.gatk.tools.walkers.genotyper.HeterozygosityAFPriorProvider; +import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.MathUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * TODO document this. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class AFPriorProviderUnitTest extends BaseTest { + + private static final double TOLERANCE = 0.0001; + + @Test(dataProvider="HeterozygosityProviderData") + public void testHeterozygosityProvider(final double h, final int useCount, final int minPloidy, final int maxPloidy) { + final double het = h / maxPloidy; + final Random rdn = GenomeAnalysisEngine.getRandomGenerator(); + final int[] plodies = new int[useCount]; + for (int i = 0; i < useCount; i++) + plodies[i] = rdn.nextInt(maxPloidy - minPloidy + 1) + minPloidy; + + final AFPriorProvider provider = new HeterozygosityAFPriorProvider(het); + for (int i = 0; i < useCount; i++) { + final int ploidy = plodies[i]; + double[] priors = provider.forTotalPloidy(ploidy); + Assert.assertNotNull(priors); + Assert.assertEquals(priors.length, ploidy + 1); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(priors),0,TOLERANCE); + for (int j = 0; j < priors.length; j++) { + Assert.assertTrue(!Double.isNaN(priors[j])); + Assert.assertTrue(priors[j] < 0); + if (j > 0) Assert.assertEquals(priors[j], Math.log10(het) - Math.log10(j)); + } + } + } + + @Test(dataProvider="CustomProviderData") + public void testCustomProvider(final int ploidy) { + final double[] priors = new double[ploidy]; + final Random rdn = GenomeAnalysisEngine.getRandomGenerator(); + double remaining = 1; + final List priorsList = new ArrayList(); + for (int i = 0; i < priors.length; i++) { + priors[i] = remaining * rdn.nextDouble() * (.1 / ploidy ); + remaining -= priors[i]; + priorsList.add(priors[i]); + } + + final AFPriorProvider provider = new CustomAFPriorProvider(priorsList); + + final double[] providedPriors = provider.forTotalPloidy(ploidy); + Assert.assertNotNull(providedPriors); + Assert.assertEquals(providedPriors.length, priors.length + 1); + for (int i = 0; i < priors.length; i++) + Assert.assertEquals(providedPriors[i+1],Math.log10(priors[i]),TOLERANCE); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(providedPriors),0,TOLERANCE); + } + + + private double[] hets = new double[] { 0.00001, 0.001, 0.1, 0.5, 0.99, 0.999 }; + private int[] useCounts = new int[] { 10, 100, 1000 }; + + private int[] ploidy = new int[] { 1 , 2, 3, 10, 100, 200, 500}; + + @DataProvider(name="CustomProviderData") + public Object[][] customProviderData() { + final Object[][] result = new Object[ploidy.length][]; + for (int i = 0; i < result.length; i++) + result[i] = new Object[] { ploidy[i] }; + return result; + } + + @DataProvider(name="HeterozygosityProviderData") + public Object[][] heterozygosityProviderData() { + final Object[][] result = new Object[hets.length * useCounts.length * ((ploidy.length + 1) * (ploidy.length) / 2)][]; + int idx = 0; + for (double h : hets) + for (int sc : useCounts) + for (int i = 0; i < ploidy.length; i++) + for (int j = i; j < ploidy.length; j++) + result[idx++] = new Object[] { h, sc, ploidy[i], ploidy[j]}; + return result; + } + + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java index e4e189611..456dddb7d 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java @@ -91,107 +91,6 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { genomeLocParser = new GenomeLocParser(seq); } - @Test - public void testFindHomVarEventAllelesInSample() { - final List eventAlleles = new ArrayList(); - eventAlleles.add( Allele.create("A", true) ); - eventAlleles.add( Allele.create("C", false) ); - final List haplotypeAlleles = new ArrayList(); - haplotypeAlleles.add( Allele.create("AATA", true) ); - haplotypeAlleles.add( Allele.create("AACA", false) ); - haplotypeAlleles.add( Allele.create("CATA", false) ); - haplotypeAlleles.add( Allele.create("CACA", false) ); - final List haplotypes = new ArrayList(); - haplotypes.add(new Haplotype("AATA".getBytes())); - haplotypes.add(new Haplotype("AACA".getBytes())); - haplotypes.add(new Haplotype("CATA".getBytes())); - haplotypes.add(new Haplotype("CACA".getBytes())); - final List haplotypeAllelesForSample = new ArrayList(); - haplotypeAllelesForSample.add( Allele.create("CATA", false) ); - haplotypeAllelesForSample.add( Allele.create("CACA", false) ); - final List> alleleMapper = new ArrayList>(); - List Aallele = new ArrayList(); - Aallele.add(haplotypes.get(0)); - Aallele.add(haplotypes.get(1)); - List Callele = new ArrayList(); - Callele.add(haplotypes.get(2)); - Callele.add(haplotypes.get(3)); - alleleMapper.add(Aallele); - alleleMapper.add(Callele); - final List eventAllelesForSample = new ArrayList(); - eventAllelesForSample.add( Allele.create("C", false) ); - eventAllelesForSample.add( Allele.create("C", false) ); - - if(!compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) { - logger.warn("calc alleles = " + HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)); - logger.warn("expected alleles = " + eventAllelesForSample); - } - Assert.assertTrue(compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))); - } - - @Test - public void testFindHetEventAllelesInSample() { - final List eventAlleles = new ArrayList(); - eventAlleles.add( Allele.create("A", true) ); - eventAlleles.add( Allele.create("C", false) ); - eventAlleles.add( Allele.create("T", false) ); - final List haplotypeAlleles = new ArrayList(); - haplotypeAlleles.add( Allele.create("AATA", true) ); - haplotypeAlleles.add( Allele.create("AACA", false) ); - haplotypeAlleles.add( Allele.create("CATA", false) ); - haplotypeAlleles.add( Allele.create("CACA", false) ); - haplotypeAlleles.add( Allele.create("TACA", false) ); - haplotypeAlleles.add( Allele.create("TTCA", false) ); - haplotypeAlleles.add( Allele.create("TTTA", false) ); - final List haplotypes = new ArrayList(); - haplotypes.add(new Haplotype("AATA".getBytes())); - haplotypes.add(new Haplotype("AACA".getBytes())); - haplotypes.add(new Haplotype("CATA".getBytes())); - haplotypes.add(new Haplotype("CACA".getBytes())); - haplotypes.add(new Haplotype("TACA".getBytes())); - haplotypes.add(new Haplotype("TTCA".getBytes())); - haplotypes.add(new Haplotype("TTTA".getBytes())); - final List haplotypeAllelesForSample = new ArrayList(); - haplotypeAllelesForSample.add( Allele.create("TTTA", false) ); - haplotypeAllelesForSample.add( Allele.create("AATA", true) ); - final List> alleleMapper = new ArrayList>(); - List Aallele = new ArrayList(); - Aallele.add(haplotypes.get(0)); - Aallele.add(haplotypes.get(1)); - List Callele = new ArrayList(); - Callele.add(haplotypes.get(2)); - Callele.add(haplotypes.get(3)); - List Tallele = new ArrayList(); - Tallele.add(haplotypes.get(4)); - Tallele.add(haplotypes.get(5)); - Tallele.add(haplotypes.get(6)); - alleleMapper.add(Aallele); - alleleMapper.add(Callele); - alleleMapper.add(Tallele); - final List eventAllelesForSample = new ArrayList(); - eventAllelesForSample.add( Allele.create("A", true) ); - eventAllelesForSample.add( Allele.create("T", false) ); - - if(!compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) { - logger.warn("calc alleles = " + HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)); - logger.warn("expected alleles = " + eventAllelesForSample); - } - Assert.assertTrue(compareAlleleLists(eventAllelesForSample, HaplotypeCallerGenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))); - } - - private boolean compareAlleleLists(List l1, List l2) { - if( l1.size() != l2.size() ) { - return false; // sanity check - } - - for( int i=0; i < l1.size(); i++ ){ - if ( !l2.contains(l1.get(i)) ) - return false; - } - return true; - } - - private class BasicGenotypingTestProvider extends TestDataProvider { byte[] ref; byte[] hap; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java index e540185ee..16b15544c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -90,12 +90,24 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testTetraploidRun() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" + + "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" + " -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" + " -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" + - " -L " + privateTestDir + "tetraploid-gvcfs.intervals"), + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", 1, - Arrays.asList("41de0808e029ebefd8b28d31ce10109c")); + Arrays.asList("20f55be01d01bed48bf66f354fa72e5b")); + executeTest("combineSingleSamplePipelineGVCF", spec); + } + + @Test(enabled= true) + public void testMixedPloidyRun() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V:sample1 " + privateTestDir + "haploid-gvcf-1.vcf" + + " -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" + + " -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" + + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", + 1, + Arrays.asList("c8bf3da5eb641d0082bdd5f12ea58e1e")); executeTest("combineSingleSamplePipelineGVCF", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 6424b9ae1..f0a0f669e 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -85,7 +85,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testTetraploidRun() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -ploidy 4 -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" + + baseTestString(" -V:sample1 " + privateTestDir + "tetraploid-gvcf-1.vcf" + " -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" + " -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference), @@ -94,6 +94,18 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { executeTest("combineSingleSamplePipelineGVCF", spec); } + @Test(enabled= true) + public void testMixedPloidyRun() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -V:sample1 " + privateTestDir + "haploid-gvcf-1.vcf" + + " -V:sample2 " + privateTestDir + "tetraploid-gvcf-2.vcf" + + " -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" + + " -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference), + 1, + Arrays.asList("0ad7d784a15ad7f8b386ec7ca34032af")); + executeTest("combineSingleSamplePipelineGVCF", spec); + } + @Test(enabled = true) public void combineSingleSamplePipelineGVCF_includeNonVariants() { WalkerTestSpec spec = new WalkerTestSpec( @@ -148,7 +160,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V " + privateTestDir + "gvcfExample1.vcf" + " -V " + privateTestDir + "gvcfExample2.vcf", 1, - Arrays.asList("81c4cc8a6b72c24598ee899df838f1e8")); + Arrays.asList("ec63a629cc707554d3dd2ba7254b3b8d")); executeTest("testSamplesWithDifferentLs", spec); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java index 4e47da387..01aa13354 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java @@ -293,11 +293,12 @@ public class MathUtils { public static double log10sumLog10(final double[] log10p, final int start, final int finish) { + if (start >= finish) + return Double.NEGATIVE_INFINITY; final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish); - double maxValue = log10p[maxElementIndex]; - if(maxValue == Double.NEGATIVE_INFINITY) { + final double maxValue = log10p[maxElementIndex]; + if(maxValue == Double.NEGATIVE_INFINITY) return maxValue; - } double sum = 1.0; for (int i = start; i < finish; i++) { double curVal = log10p[i]; @@ -888,6 +889,7 @@ public class MathUtils { if (start > endIndex) { throw new IllegalArgumentException("Start cannot be after end."); } + int maxI = start; for (int i = (start+1); i < endIndex; i++) { if (array[i] > array[maxI]) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java index 337f64345..f818d8d33 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java @@ -47,7 +47,14 @@ public class GATKVariantContextUtils { public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + /** + * Diploid NO_CALL allele list... + * + * @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad. + */ + @Deprecated public final static List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF"; public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site @@ -143,6 +150,26 @@ public class GATKVariantContextUtils { return ref; } + /** + * Calculates the total ploidy of a variant context as the sum of all plodies across genotypes. + * @param vc the target variant context. + * @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype. + * @return never {@code null}. + */ + public static int totalPloidy(final VariantContext vc, final int defaultPloidy) { + if (vc == null) + throw new IllegalArgumentException("the vc provided cannot be null"); + if (defaultPloidy < 0) + throw new IllegalArgumentException("the default ploidy must 0 or greater"); + int result = 0; + for (final Genotype genotype : vc.getGenotypes()) { + final int declaredPloidy = genotype.getPloidy(); + result += declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; + } + + return result; + } + public enum GenotypeMergeType { /** * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. @@ -1322,6 +1349,28 @@ public class GATKVariantContextUtils { } } + /** + * Cached NO_CALL immutable lists where the position ith contains the list with i elements. + */ + private static List[] NOCALL_LISTS = new List[] { + Collections.emptyList(), + Collections.singletonList(Allele.NO_CALL), + Collections.nCopies(2,Allele.NO_CALL) + }; + + /** + * Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy + * @param capacity the requested ploidy. + */ + private static synchronized void ensureNoCallListsCapacity(final int capacity) { + final int currentCapacity = NOCALL_LISTS.length - 1; + if (currentCapacity >= capacity) + return; + NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS,Math.max(capacity,currentCapacity << 1) + 1); + for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++) + NOCALL_LISTS[i] = Collections.nCopies(i,Allele.NO_CALL); + } + /** * Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy. * @@ -1331,16 +1380,9 @@ public class GATKVariantContextUtils { * might or might not be mutable. */ public static List noCallAlleles(final int ploidy) { - if (ploidy <= 0) - return Collections.EMPTY_LIST; - else if (ploidy == 1) - return Collections.singletonList(Allele.NO_CALL); - else { - final List result = new ArrayList<>(ploidy); - for (int i = 0; i < ploidy; i++) - result.add(Allele.NO_CALL); - return result; - } + if (NOCALL_LISTS.length <= ploidy) + ensureNoCallListsCapacity(ploidy); + return NOCALL_LISTS[ploidy]; } diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java index 13de6b4ed..feb10a7d4 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -1573,6 +1573,35 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { locs[nextIndex++] = GenomeLoc.UNMAPPED; } + @Test(dataProvider = "totalPloidyData") + public void testTotalPloidy(final int[] ploidies, final int defaultPloidy, final int expected) { + final Genotype[] genotypes = new Genotype[ploidies.length]; + final List vcAlleles = Arrays.asList(Aref,C); + for (int i = 0; i < genotypes.length; i++) + genotypes[i] = new GenotypeBuilder().alleles(GATKVariantContextUtils.noCallAlleles(ploidies[i])).make(); + final VariantContext vc = new VariantContextBuilder().chr("seq1").genotypes(genotypes).alleles(vcAlleles).make(); + Assert.assertEquals(GATKVariantContextUtils.totalPloidy(vc,defaultPloidy),expected," " + defaultPloidy + " " + Arrays.toString(ploidies)); + } + + @DataProvider(name="totalPloidyData") + public Object[][] totalPloidyData() { + final Random rdn = GenomeAnalysisEngine.getRandomGenerator(); + final List resultList = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + final int sampleCount = rdn.nextInt(10); + + int expected = 0; + final int defaultPloidy = rdn.nextInt(10) + 1; + final int[] plodies = new int[sampleCount]; + for (int j = 0; j < sampleCount; j++) { + plodies[j] = rdn.nextInt(10); + expected += plodies[j] == 0 ? defaultPloidy : plodies[j]; + } + resultList.add(new Object[] { plodies, defaultPloidy, expected }); + } + return resultList.toArray(new Object[100][]); + } + private byte[] randomBases(final int length, final boolean reference) { final byte[] bases = new byte[length]; bases[0] = (byte) (reference ? 'A' : 'C');