Backport new AFCalculator
This commit is contained in:
parent
7b74dc1634
commit
318bee2269
|
|
@ -71,6 +71,12 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
|||
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
|
||||
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
|
||||
|
||||
/**
|
||||
* Use the new allele frequency / QUAL score model
|
||||
*/
|
||||
@Argument(fullName = "useNewAFCalculator", shortName = "newQual", doc = "If provided, we will use the new AF model instead of the so-called exact model", required = false)
|
||||
public boolean USE_NEW_AF_CALCULATOR = false;
|
||||
|
||||
/**
|
||||
* The expected heterozygosity value used to compute prior probability that a locus is non-reference.
|
||||
*
|
||||
|
|
@ -102,6 +108,13 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
|||
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double indelHeterozygosity = HomoSapiensConstants.INDEL_HETEROZYGOSITY;
|
||||
|
||||
/**
|
||||
* The standard deviation of the distribution of alt allele fractions. The above heterozygosity parameters give the
|
||||
* *mean* of this distribution; this parameter gives its spread.
|
||||
*/
|
||||
@Argument(fullName = "heterozygosity_stdev", shortName = "heterozygosityStandardDeviation", doc = "Standard deviation of eterozygosity for SNP and indel calling.", required = false)
|
||||
public double heterozygosityStandardDeviation = 0.01;
|
||||
|
||||
/**
|
||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.IndexRange;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -108,7 +109,18 @@ import java.util.List;
|
|||
*/
|
||||
public class GenotypeAlleleCounts implements Comparable<GenotypeAlleleCounts>, Cloneable {
|
||||
|
||||
private double log10CombinationCount;
|
||||
private static final double UNCOMPUTED_LOG_10_COMBINATION_COUNT = -1;
|
||||
|
||||
/**
|
||||
* The log10 number of phased genotypes corresponding to this unphased genotype. For example,
|
||||
* [0, 1, 1, 1] = AB: log10(2)
|
||||
* [0, 2] = AA: log10(1)
|
||||
* [0, 1, 1, 1, 2, 1] = ABC: log10(6)
|
||||
* [0, 2, 1, 2] = AABB: log10(4!/(2!2!))
|
||||
* This is evaluated lazily i.e. it is initialized to {@link GenotypeAlleleCounts::UNCOMPUTED_LOG_10_COMBINATION_COUNT}
|
||||
* and only calculated if its getter is invoked.
|
||||
*/
|
||||
private double log10CombinationCount = UNCOMPUTED_LOG_10_COMBINATION_COUNT;
|
||||
|
||||
/**
|
||||
* The ploidy of the genotype.
|
||||
|
|
@ -156,38 +168,30 @@ public class GenotypeAlleleCounts implements Comparable<GenotypeAlleleCounts>, C
|
|||
* @param index the genotype index.
|
||||
*/
|
||||
private GenotypeAlleleCounts(final int ploidy, final int index, final int... sortedAlleleCounts) {
|
||||
this(ploidy, index, sortedAlleleCounts, sortedAlleleCounts.length >> 1);
|
||||
}
|
||||
|
||||
private GenotypeAlleleCounts(final int ploidy, final int index, final int[] sortedAlleleCounts, final int distinctAlleleCount){
|
||||
this.ploidy = ploidy;
|
||||
this.sortedAlleleCounts = sortedAlleleCounts;
|
||||
distinctAlleleCount = sortedAlleleCounts.length >> 1;
|
||||
log10CombinationCount = -1;
|
||||
this.index = index;
|
||||
this.sortedAlleleCounts = sortedAlleleCounts;
|
||||
this.distinctAlleleCount = distinctAlleleCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the log10 of the number of possible allele combinations that would give raise to this allele count.
|
||||
* @return 0 or less.
|
||||
* Gets the log10 combination count, computing it if uninitialized. Note that the invoked MathUtils method uses fast cached
|
||||
* log10 values of integers for any reasonable ploidy.
|
||||
*
|
||||
* This method should be invoked on instances of {@link GenotypeAlleleCounts} cached in {@link GenotypeLikelihoodCalculators::genotypeTableByPloidy}.
|
||||
* Such usage allows the result of this computation to be cached once for an entire run of HaplotypeCaller.
|
||||
* @return
|
||||
*/
|
||||
public double log10CombinationCount() {
|
||||
if (log10CombinationCount == -1)
|
||||
return log10CombinationCount = calculateLog10CombinationCount();
|
||||
else
|
||||
return log10CombinationCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates log10 combination count.
|
||||
*
|
||||
* @return 0 or less.
|
||||
*/
|
||||
private double calculateLog10CombinationCount() {
|
||||
if (ploidy <= 1)
|
||||
return 0;
|
||||
else {
|
||||
final int[] counts = new int[distinctAlleleCount];
|
||||
for (int i = 0, j = 1; i < distinctAlleleCount; i++, j+=2)
|
||||
counts[i] = sortedAlleleCounts[j];
|
||||
return MathUtils.log10MultinomialCoefficient(ploidy, counts);
|
||||
if (log10CombinationCount == UNCOMPUTED_LOG_10_COMBINATION_COUNT) {
|
||||
log10CombinationCount = MathUtils.log10Factorial(ploidy)
|
||||
- new IndexRange(0, distinctAlleleCount).sum(n -> MathUtils.log10Factorial(sortedAlleleCounts[2*n+1]));
|
||||
}
|
||||
return log10CombinationCount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -785,4 +789,22 @@ public class GenotypeAlleleCounts implements Comparable<GenotypeAlleleCounts>, C
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
public interface IntBiConsumer {
|
||||
void accept(final int alleleIndex, final int alleleCount);
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
public interface IntToDoubleBiFunction {
|
||||
double apply(final int alleleIndex, final int alleleCount);
|
||||
}
|
||||
|
||||
public void forEachAlleleIndexAndCount(final IntBiConsumer action) {
|
||||
new IndexRange(0, distinctAlleleCount).forEach(n -> action.accept(sortedAlleleCounts[2*n], sortedAlleleCounts[2*n+1]));
|
||||
}
|
||||
|
||||
public double sumOverAlleleIndicesAndCounts(final IntToDoubleBiFunction func) {
|
||||
return new IndexRange(0, distinctAlleleCount).sum(n -> func.apply(sortedAlleleCounts[2*n], sortedAlleleCounts[2*n+1]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AlleleFrequencyCalculator;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
|
|
@ -107,6 +108,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
|
||||
private final List<GenomeLoc> upstreamDeletionsLoc = new LinkedList<>();
|
||||
|
||||
protected final AFCalculator newAFCalculator;
|
||||
|
||||
/**
|
||||
* Construct a new genotyper engine, on a specific subset of samples.
|
||||
*
|
||||
|
|
@ -139,6 +142,11 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
log10AlleleFrequencyPriorsIndels = composeAlleleFrequencyPriorProvider(numberOfGenomes,
|
||||
configuration.genotypeArgs.indelHeterozygosity, configuration.genotypeArgs.inputPrior);
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
final double refPseudocount = configuration.genotypeArgs.snpHeterozygosity / Math.pow(configuration.genotypeArgs.heterozygosityStandardDeviation,2);
|
||||
final double snpPseudocount = configuration.genotypeArgs.snpHeterozygosity * refPseudocount;
|
||||
final double indelPseudocount = configuration.genotypeArgs.indelHeterozygosity * refPseudocount;
|
||||
newAFCalculator = new AlleleFrequencyCalculator(refPseudocount, snpPseudocount, indelPseudocount, configuration.genotypeArgs.samplePloidy);
|
||||
}
|
||||
|
||||
protected GenotypingEngine(final Config configuration, final SampleList samples,
|
||||
|
|
@ -230,8 +238,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
|
||||
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
|
||||
final int maxNumPLValues = configuration.genotypeArgs.MAX_NUM_PL_VALUES;
|
||||
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
||||
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
||||
|
||||
// NOTE: in GATK4, allele subsetting has been extracted out of the AFCalculator into a utils class
|
||||
// The new AFCalculator (AlleleFrequencyCalculator) of GATK4 therefore does not implement this subsetting,
|
||||
// which *includes attaching a genotype call to a VariantContext*. In order to backport the new AFCalculator
|
||||
// it is necessary to use it only for the qual score calculation and not for any other duties.
|
||||
final AFCalculator afCalculatorForAlleleSubsetting = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
||||
final AFCalculator afCalculatorForQualScore = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorForAlleleSubsetting;
|
||||
|
||||
final AFCalculationResult AFresult = afCalculatorForQualScore.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
||||
|
||||
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
|
||||
|
||||
|
|
@ -274,7 +289,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
|
||||
// create the genotypes
|
||||
|
||||
final GenotypesContext genotypes = afCalculator.subsetAlleles(vc, defaultPloidy, outputAlleles, true);
|
||||
final GenotypesContext genotypes = afCalculatorForAlleleSubsetting.subsetAlleles(vc, defaultPloidy, outputAlleles, true);
|
||||
builder.genotypes(genotypes);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ public class AFCalculationResult {
|
|||
if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
|
||||
this.alleleCountsOfMLE = alleleCountsOfMLE;
|
||||
this.nEvaluations = nEvaluations;
|
||||
|
|
|
|||
|
|
@ -238,7 +238,7 @@ public abstract class AFCalculator implements Cloneable {
|
|||
* @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if
|
||||
* the current tracker is able to handle that number.
|
||||
*
|
||||
* @return never {@code null}
|
||||
* @return {@code null} iff this calculator implementation does not use a state tracker.
|
||||
*/
|
||||
protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) {
|
||||
if (stateTracker == null)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.apache.commons.math3.util.MathArrays;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeAlleleCounts;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||
import org.broadinstitute.gatk.utils.Dirichlet;
|
||||
import org.broadinstitute.gatk.utils.IndexRange;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* @author David Benjamin <davidben@broadinstitute.org>
|
||||
*/
|
||||
public final class AlleleFrequencyCalculator extends AFCalculator {
|
||||
private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators();
|
||||
private static final double THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE = 0.1;
|
||||
private static final int HOM_REF_GENOTYPE_INDEX = 0;
|
||||
|
||||
private final double refPseudocount;
|
||||
private final double snpPseudocount;
|
||||
private final double indelPseudocount;
|
||||
private final int defaultPloidy;
|
||||
|
||||
// this doesn't use the exact model, so number of evaluations is irrelevant
|
||||
private static final int DUMMY_N_EVALUATIONS = 1;
|
||||
|
||||
|
||||
public AlleleFrequencyCalculator(final double refPseudocount, final double snpPseudocount, final double indelPseudocount, final int defaultPloidy) {
|
||||
this.refPseudocount = refPseudocount;
|
||||
this.snpPseudocount = snpPseudocount;
|
||||
this.indelPseudocount = indelPseudocount;
|
||||
this.defaultPloidy = defaultPloidy;
|
||||
}
|
||||
|
||||
public AFCalculationResult getLog10PNonRef(final VariantContext vc) {
|
||||
// maxAltAlleles is not used by getLog10PNonRef, so don't worry about the 0
|
||||
return getLog10PNonRef(vc, defaultPloidy, 0, null);
|
||||
}
|
||||
//TODO: this should be a class of static methods once the old AFCalculator is gone.
|
||||
/**
|
||||
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
|
||||
*
|
||||
* @param vc the VariantContext holding the alleles and sample information. The VariantContext
|
||||
* must have at least 1 alternative allele
|
||||
* @param refSnpIndelPseudocounts a total hack. A length-3 vector containing Dirichlet prior pseudocounts to
|
||||
* be given to ref, alt SNP, and alt indel alleles. Hack won't be necessary when we destroy the old AF calculators
|
||||
* @return result (for programming convenience)
|
||||
*/
|
||||
@Override
|
||||
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
|
||||
Utils.nonNull(vc, "vc is null");
|
||||
final int numAlleles = vc.getNAlleles();
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
Utils.validateArg( numAlleles > 1, "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
|
||||
|
||||
final double[] priorPseudocounts = alleles.stream()
|
||||
.mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
|
||||
|
||||
double[] alleleCounts = new double[numAlleles];
|
||||
final double flatLog10AlleleFrequency = -MathUtils.Log10Cache.get(numAlleles); // log10(1/numAlleles)
|
||||
double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
|
||||
double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
|
||||
|
||||
while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
|
||||
final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
|
||||
alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
|
||||
alleleCounts = newAlleleCounts;
|
||||
final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
|
||||
|
||||
// first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
|
||||
// effective allele frequency that it overwhelms the genotype likelihood of a real variant
|
||||
// basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
|
||||
log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
|
||||
}
|
||||
|
||||
double[] log10POfZeroCountsByAllele = new double[numAlleles];
|
||||
double log10PNoVariant = 0;
|
||||
|
||||
for (final Genotype g : vc.getGenotypes()) {
|
||||
if (!g.hasLikelihoods()) {
|
||||
continue;
|
||||
}
|
||||
final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
|
||||
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
|
||||
final double[] genotypePosteriors = normalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
|
||||
|
||||
//the total probability
|
||||
log10PNoVariant += Math.log10(genotypePosteriors[HOM_REF_GENOTYPE_INDEX]);
|
||||
|
||||
// per allele non-log space probabilities of zero counts for this sample
|
||||
// for each allele calculate the total probability of genotypes containing at least one copy of the allele
|
||||
final double[] pOfNonZeroAltAlleles = new double[numAlleles];
|
||||
|
||||
for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
|
||||
final double genotypePosterior = genotypePosteriors[genotype];
|
||||
glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) ->
|
||||
pOfNonZeroAltAlleles[alleleIndex] += genotypePosterior);
|
||||
}
|
||||
|
||||
for (int allele = 0; allele < numAlleles; allele++) {
|
||||
log10POfZeroCountsByAllele[allele] += Math.log10(1 - pOfNonZeroAltAlleles[allele]);
|
||||
}
|
||||
}
|
||||
|
||||
// unfortunately AFCalculationResult expects integers for the MLE. We really should emit the EM no-integer values
|
||||
// which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
|
||||
final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
|
||||
final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
|
||||
|
||||
//skip the ref allele (index 0)
|
||||
final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed()
|
||||
.collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
|
||||
|
||||
// we compute posteriors here and don't have the same prior that AFCalculationResult expects. Therefore, we
|
||||
// give it our posterior as its "likelihood" along with a flat dummy prior
|
||||
final double[] dummyFlatPrior = {-1e-10, -1e-10}; //TODO: HACK must be negative for AFCalcResult
|
||||
final double[] log10PosteriorOfNoVariantYesVariant = {log10PNoVariant, Math.log10(1 - Math.pow(10, log10PNoVariant))};
|
||||
|
||||
return new AFCalculationResult(integerAltAlleleCounts, DUMMY_N_EVALUATIONS, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
|
||||
}
|
||||
|
||||
private double[] effectiveAlleleCounts(final VariantContext vc, final double[] log10AlleleFrequencies) {
|
||||
final int numAlleles = vc.getNAlleles();
|
||||
Utils.validateArg(numAlleles == log10AlleleFrequencies.length, "number of alleles inconsistent");
|
||||
final double[] result = new double[numAlleles];
|
||||
for (final Genotype g : vc.getGenotypes()) {
|
||||
if (!g.hasLikelihoods()) {
|
||||
continue;
|
||||
}
|
||||
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(g.getPloidy(), numAlleles);
|
||||
final double[] genotypePosteriors = normalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
|
||||
|
||||
new IndexRange(0, glCalc.genotypeCount()).forEach(genotypeIndex ->
|
||||
glCalc.genotypeAlleleCountsAt(genotypeIndex).forEachAlleleIndexAndCount((alleleIndex, count) ->
|
||||
result[alleleIndex] += genotypePosteriors[genotypeIndex] * count));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static double[] normalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) {
|
||||
final double[] log10Likelihoods = g.getLikelihoods().getAsVector();
|
||||
final double[] unnormalizedLog10Likelihoods = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> {
|
||||
final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex);
|
||||
return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex]
|
||||
+ gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]);
|
||||
});
|
||||
return MathUtils.normalizeFromLog10(unnormalizedLog10Likelihoods);
|
||||
}
|
||||
|
||||
@Override //Note: unused
|
||||
protected AFCalculationResult getResultFromFinalState(final VariantContext vc, final double[] priors, final StateTracker st) { return null; }
|
||||
|
||||
@Override//Note: unused
|
||||
protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy,
|
||||
final double[] priors, final StateTracker st) { return null; }
|
||||
|
||||
@Override //Note: unused
|
||||
protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) { return null; }
|
||||
|
||||
@Override //trivial implementation -- new AFCalculator can handle multiallelics so we're not afraid
|
||||
protected VariantContext reduceScope(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles) {
|
||||
return vc;
|
||||
}
|
||||
|
||||
@Override //also trivial
|
||||
public GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
final int defaultPloidy,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
return vc.getGenotypes();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils;
|
||||
|
||||
import org.apache.commons.math3.special.Gamma;
|
||||
import org.apache.commons.math3.util.MathArrays;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* The Dirichlet distribution is a distribution on multinomial distributions: if pi is a vector of positive multinomial weights
|
||||
* such that sum_i pi[i] = 1, the Dirichlet pdf is P(pi) = [prod_i Gamma(alpha[i]) / Gamma(sum_i alpha[i])] * prod_i pi[i]^(alpha[i] - 1)
|
||||
*
|
||||
* The vector alpha comprises the sufficient statistics for the Dirichlet distribution.
|
||||
*
|
||||
* Since the Dirichlet is the conjugate prior to the multinomial, if one has a Dirichlet prior with concentration alpha
|
||||
* and observes each category i n_i times (assuming categories are drawn from a multinomial distribution pi)
|
||||
* the posterior is alpha_i -> alpha_i + n_i
|
||||
*
|
||||
*
|
||||
* @author David Benjamin <davidben@broadinstitute.org>
|
||||
*/
|
||||
public class Dirichlet {
|
||||
final double[] alpha;
|
||||
|
||||
public Dirichlet(final double... alpha) {
|
||||
Utils.nonNull(alpha);
|
||||
Utils.validateArg(alpha.length >= 1, "Dirichlet parameters must have at least one element");
|
||||
Utils.validateArg(MathUtils.allMatch(alpha, x -> x >= 0), "Dirichlet parameters may not be negative");
|
||||
Utils.validateArg(MathUtils.allMatch(alpha, Double::isFinite), "Dirichlet parameters must be finite");
|
||||
this.alpha = alpha.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a symmetric distribution Dir(a/K, a/K, a/K . . .) where K is the number of states and
|
||||
* a is the concentration.
|
||||
*/
|
||||
public static Dirichlet symmetricDirichlet(final int numStates, final double concentration) {
|
||||
Utils.validateArg(numStates > 0, "Must have at leat one state");
|
||||
Utils.validateArg(concentration > 0, "concentration must be positive");
|
||||
return new Dirichlet(Collections.nCopies(numStates, concentration/numStates).stream().mapToDouble(x->x).toArray());
|
||||
}
|
||||
|
||||
// in variational Bayes one often needs the effective point estimate of a multinomial distribution with a
|
||||
// Dirichlet prior. This value is not the mode or mean of the Dirichlet but rather the exp of the expected log weights.
|
||||
// note that these effective weights do not add up to 1. This is fine because in any probabilistic model scaling all weights
|
||||
// amounts to an arbitrary normalization constant, but it's important to keep in mind because some classes may expect
|
||||
// normalized weights. In that case the calling code must normalize the weights.
|
||||
public double[] effectiveMultinomialWeights() {
|
||||
final double digammaOfSum = Gamma.digamma(MathUtils.sum(alpha));
|
||||
return MathUtils.applyToArray(alpha, a -> Math.exp(Gamma.digamma(a) - digammaOfSum));
|
||||
}
|
||||
|
||||
public double[] effectiveLog10MultinomialWeights() {
|
||||
final double digammaOfSum = Gamma.digamma(MathUtils.sum(alpha));
|
||||
return MathUtils.applyToArray(alpha, a -> (Gamma.digamma(a) - digammaOfSum) * MathUtils.LOG10_OF_E);
|
||||
}
|
||||
|
||||
public double[] meanWeights() {
|
||||
final double sum = MathUtils.sum(alpha);
|
||||
return MathUtils.applyToArray(alpha, x -> x / sum);
|
||||
}
|
||||
|
||||
public double[] log10MeanWeights() {
|
||||
final double sum = MathUtils.sum(alpha);
|
||||
return MathUtils.applyToArray(alpha, x -> Math.log10(x / sum));
|
||||
}
|
||||
|
||||
public int size() { return alpha.length; }
|
||||
}
|
||||
|
|
@ -0,0 +1,248 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.*;
|
||||
|
||||
/**
|
||||
* Represents 0-based integer index range.
|
||||
*
|
||||
* <p>
|
||||
* It represents an integer index range as the pair values:
|
||||
* <dl>
|
||||
* <dt>{@link #from}</dt>
|
||||
* <dd>- index of the first element in range (i.e. inclusive).</dd>
|
||||
* <dt>{@link #to}</dt>
|
||||
* <dd>- index of the element following the last element in range (i.e. exclusive).</dd>
|
||||
* </dl>
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* This class is intended to specify a valid index range in arrays or ordered collections.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* All instances are constraint so that neither <code>from</code> nor <code>to</code> can
|
||||
* be negative nor <code>from</code> can be larger than <code>to</code>.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* You can use {@link #isValidLength(int) isValidFor(length)} to verify that a range instance represents a valid
|
||||
* range for an 0-based indexed object with {@code length} elements.
|
||||
* </p>
|
||||
*/
|
||||
public final class IndexRange {
|
||||
|
||||
/**
|
||||
* First index in the range.
|
||||
* <p>
|
||||
* It won't ever be negative nor greater than {@link #to}.
|
||||
* </p>
|
||||
*/
|
||||
public final int from;
|
||||
|
||||
/**
|
||||
* Index following the last index included in the range.
|
||||
*
|
||||
* <p>
|
||||
* It won't ever be negative nor less than {@link #from}.
|
||||
* </p>
|
||||
*/
|
||||
public final int to;
|
||||
|
||||
/**
|
||||
* Creates a new range given its {@code from} and {@code to} indices.
|
||||
*
|
||||
* @param fromIndex the {@code from} index value.
|
||||
* @param toIndex the {@code to} index value.
|
||||
* @throws IllegalArgumentException if {@code fromIndex} is larger than {@code toIndex} or either is
|
||||
* negative.
|
||||
*/
|
||||
public IndexRange(final int fromIndex, final int toIndex) {
|
||||
Utils.validateArg(fromIndex <= toIndex, "the range size cannot be negative");
|
||||
Utils.validateArg(fromIndex >= 0, "the range cannot contain negative indices");
|
||||
from = fromIndex;
|
||||
to = toIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether this range is valid for a collection or array of a given size.
|
||||
*
|
||||
* <p>
|
||||
* It assume that 0 is the first valid index for target indexed object which is true
|
||||
* for Java Arrays and mainstream collections.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* If the input length is less than 0, thus incorrect, this method is guaranteed to return
|
||||
* {@code false}. No exception is thrown.
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* @param length the targeted collection or array length.
|
||||
* @return {@code true} if this range is valid for that {@code length}, {@code false} otherwise.
|
||||
*/
|
||||
public boolean isValidLength(final int length) {
|
||||
return to <= length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns number indexes expanded by this range.
|
||||
*
|
||||
* @return 0 or greater.
|
||||
*/
|
||||
public int size() {
|
||||
return to - from;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate through all indexes in the range in ascending order to be processed by the
|
||||
* provided {@link IntConsumer integer consumer} lambda function.
|
||||
*
|
||||
* <p>
|
||||
* Exceptions thrown by the execution of the index consumer {@code lambda}
|
||||
* will be propagated to the caller immediately thus stopping early and preventing
|
||||
* further indexes to be processed.
|
||||
* </p>
|
||||
* @param lambda the index consumer lambda.
|
||||
* @throws IllegalArgumentException if {@code lambda} is {@code null}.
|
||||
* @throws RuntimeException if thrown by {@code lambda} for some index.
|
||||
* @throws Error if thrown by {@code lambda} for some index.
|
||||
*/
|
||||
public void forEach(final IntConsumer lambda) {
|
||||
Utils.nonNull(lambda, "the lambda function cannot be null");
|
||||
for (int i = from; i < to; i++) {
|
||||
lambda.accept(i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply an int -> double function to this range, producing a double[]
|
||||
*
|
||||
* @param lambda the int -> double function
|
||||
*/
|
||||
public double[] mapToDouble(final IntToDoubleFunction lambda) {
|
||||
Utils.nonNull(lambda, "the lambda function cannot be null");
|
||||
final double[] result = new double[size()];
|
||||
for (int i = from; i < to; i++) {
|
||||
result[i - from] = lambda.applyAsDouble(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sums the values of an int -> double function applied to this range
|
||||
*
|
||||
* @param lambda the int -> double function
|
||||
*/
|
||||
public double sum(final IntToDoubleFunction lambda) {
|
||||
Utils.nonNull(lambda, "the lambda function cannot be null");
|
||||
double result = 0;
|
||||
for (int i = from; i < to; i++) {
|
||||
result += lambda.applyAsDouble(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply an int -> int function to this range, producing an int[]
|
||||
*
|
||||
* @param lambda the int -> int function
|
||||
*/
|
||||
public int[] mapToInteger(final IntUnaryOperator lambda) {
|
||||
Utils.nonNull(lambda, "the lambda function cannot be null");
|
||||
final int[] result = new int[size()];
|
||||
for (int i = from; i < to; i++) {
|
||||
result[i - from] = lambda.applyAsInt(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the elements of this range for which an int -> boolean predicate is true
|
||||
*
|
||||
* @param predicate the int -> boolean predicate
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> filter(final IntPredicate predicate) {
|
||||
Utils.nonNull(predicate, "predicate may not be null");
|
||||
final List<Integer> result = new ArrayList<>();
|
||||
forEach(i -> {if (predicate.test(i)) result.add(i); } );
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
} else if (!(other instanceof IndexRange)) {
|
||||
return false;
|
||||
} else {
|
||||
final IndexRange otherCasted = (IndexRange) other;
|
||||
return otherCasted.from == this.from && otherCasted.to == this.to;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// Inspired on {@link Arrays#hashCode(Object[])}.
|
||||
return (( 31 + Integer.hashCode(from) ) * 31 ) + Integer.hashCode(to);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d-%d",from,to);
|
||||
}
|
||||
}
|
||||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -76,6 +77,7 @@ public class GenotypeAlleleCountsUnitTest {
|
|||
Assert.assertEquals(subject.distinctAlleleCount(),1);
|
||||
Assert.assertEquals(subject.alleleCountAt(0),ploidy);
|
||||
Assert.assertEquals(subject.alleleCountFor(0),ploidy);
|
||||
Assert.assertEquals(subject.log10CombinationCount(), 0.0);
|
||||
Assert.assertEquals(subject.alleleRankFor(0),0);
|
||||
Assert.assertEquals(subject.alleleRankFor(1),-2);
|
||||
Assert.assertTrue(subject.containsAllele(0));
|
||||
|
|
@ -175,6 +177,31 @@ public class GenotypeAlleleCountsUnitTest {
|
|||
|
||||
while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
|
||||
final GenotypeAlleleCounts next = current.next();
|
||||
|
||||
// test log10CombinationCount
|
||||
if (ploidy == 2) {
|
||||
Assert.assertEquals(next.log10CombinationCount(), next.distinctAlleleCount() == 2 ? Math.log10(2) : 0.0);
|
||||
} else if (ploidy == 3) {
|
||||
Assert.assertEquals(next.log10CombinationCount(),
|
||||
next.distinctAlleleCount() == 3 ? Math.log10(6) : (next.distinctAlleleCount() == 2 ? Math.log10(6) - Math.log10(2) : 0.0));
|
||||
} else {
|
||||
if (next.distinctAlleleCount() == 1) {
|
||||
Assert.assertEquals(next.log10CombinationCount(), 0.0);
|
||||
} else if (next.distinctAlleleCount() == ploidy) {
|
||||
Assert.assertEquals(next.log10CombinationCount(), MathUtils.log10Factorial(ploidy));
|
||||
}
|
||||
}
|
||||
|
||||
//test forEach
|
||||
final List<Integer> alleleCountsAsList = new ArrayList<>(next.distinctAlleleCount()*2);
|
||||
next.forEachAlleleIndexAndCount((alleleIndex, alleleCount) -> {
|
||||
alleleCountsAsList.add(alleleIndex);
|
||||
alleleCountsAsList.add(alleleCount);});
|
||||
final int[] actualAlleleCounts = new int[next.distinctAlleleCount()*2];
|
||||
next.copyAlleleCounts(actualAlleleCounts, 0);
|
||||
|
||||
Assert.assertEquals(alleleCountsAsList.stream().mapToInt(n->n).toArray(), actualAlleleCounts);
|
||||
|
||||
if (current.distinctAlleleCount() == 1) {
|
||||
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1);
|
||||
Assert.assertEquals(next.distinctAlleleCount(), 2 );
|
||||
|
|
|
|||
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
||||
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.apache.commons.math3.util.Pair;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Created by davidben on 7/28/16.
|
||||
*/
|
||||
public class AlleleFrequencyCalculatorUnitTest extends BaseTest {
|
||||
private static final double EPS = 1.0e-8;
|
||||
private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators();
|
||||
|
||||
private static final Allele A = Allele.create("A", true);
|
||||
private static final Allele B = Allele.create("C");
|
||||
private static final Allele C = Allele.create("G");
|
||||
private static final Allele indel1 = Allele.create("AA");
|
||||
|
||||
private static final int HAPLOID = 1;
|
||||
private static final int DIPLOID = 2;
|
||||
private static final int TRIPLOID = 3;
|
||||
|
||||
private static final int BIALLELIC = 2;
|
||||
private static final int TRIALLELIC = 3;
|
||||
|
||||
private static final int EXTREMELY_CONFIDENT_PL = 1000;
|
||||
private static final int FAIRLY_CONFIDENT_PL = 20;
|
||||
private static final int LOW_CONFIDENCE_PL = 10;
|
||||
|
||||
private static final int DEFAULT_PLOIDY = 2;
|
||||
|
||||
private static int sampleNameCounter = 0;
|
||||
|
||||
@Test
|
||||
public void testSymmetries() {
|
||||
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 0.1, 0.1, DEFAULT_PLOIDY);
|
||||
final List<Allele> alleles = Arrays.asList(A,B,C);
|
||||
final Genotype AA = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype BB = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {1,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype CC = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {2,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype AB = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,1,1,1}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype AC = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,1,2,1}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
final Genotype BBB = genotypeWithObviousCall(TRIPLOID, TRIALLELIC, new int[] {1,3}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype CCC = genotypeWithObviousCall(TRIPLOID, TRIALLELIC, new int[] {2,3}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
// make pairs of VCs tht differ only by B <--> C
|
||||
final List<Pair<VariantContext, VariantContext>> switchBWithCPairs = Arrays.asList(
|
||||
new Pair<>(makeVC(alleles, AA, BB), makeVC(alleles, AA, CC)),
|
||||
new Pair<>(makeVC(alleles, AA, AB), makeVC(alleles, AA, AC)),
|
||||
new Pair<>(makeVC(alleles, AB, AB), makeVC(alleles, AC, AC)),
|
||||
new Pair<>(makeVC(alleles, AA, AA, BB), makeVC(alleles, AA, AA, CC)),
|
||||
new Pair<>(makeVC(alleles, AA, AB, AB), makeVC(alleles, AA, AC, AC)),
|
||||
new Pair<>(makeVC(alleles, AA, BBB), makeVC(alleles, AA, CCC))
|
||||
);
|
||||
for (final Pair<VariantContext, VariantContext> pair : switchBWithCPairs) {
|
||||
final VariantContext vc1 = pair.getFirst();
|
||||
final VariantContext vc2 = pair.getSecond();
|
||||
final AFCalculationResult result1 = afCalc.getLog10PNonRef(vc1);
|
||||
final AFCalculationResult result2 = afCalc.getLog10PNonRef(vc2);
|
||||
Assert.assertEquals(result1.getLog10PosteriorOfAFEq0(), result2.getLog10PosteriorOfAFEq0(), EPS);
|
||||
Assert.assertEquals(result1.getLog10PosteriorOfAFEq0ForAllele(B), result2.getLog10PosteriorOfAFEq0ForAllele(C), EPS);
|
||||
Assert.assertEquals(result1.getLog10PosteriorOfAFEq0ForAllele(C), result2.getLog10PosteriorOfAFEq0ForAllele(B), EPS);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMLECounts() {
|
||||
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 1, 1, DEFAULT_PLOIDY);
|
||||
final List<Allele> alleles = Arrays.asList(A,B,C);
|
||||
final Genotype AA = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype BB = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {1,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype AB = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,1,1,1}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype AC = genotypeWithObviousCall(DIPLOID, TRIALLELIC, new int[] {0,1,2,1}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
final Genotype BBB = genotypeWithObviousCall(TRIPLOID, TRIALLELIC, new int[] {1,3}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype CCC = genotypeWithObviousCall(TRIPLOID, TRIALLELIC, new int[] {2,3}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
final List<Pair<VariantContext, int[]>> vcWithExpectedCounts = Arrays.asList(
|
||||
new Pair<>(makeVC(alleles, AA, BB), new int[] {2,0}),
|
||||
new Pair<>(makeVC(alleles, AA, AB), new int[] {1,0}),
|
||||
new Pair<>(makeVC(alleles, AB, AB), new int[] {2,0}),
|
||||
new Pair<>(makeVC(alleles, AA, AA, BB), new int[] {2,0}),
|
||||
new Pair<>(makeVC(alleles, AA, AB, AB), new int[] {2,0}),
|
||||
new Pair<>(makeVC(alleles, AA, BBB), new int[] {3,0}),
|
||||
new Pair<>(makeVC(alleles, AA, BBB, CCC), new int[] {3,3}),
|
||||
new Pair<>(makeVC(alleles, AA, AB, AC), new int[] {1,1}),
|
||||
new Pair<>(makeVC(alleles, AA, AB, AC, BBB, CCC), new int[] {4,4})
|
||||
|
||||
);
|
||||
for (final Pair<VariantContext, int[]> pair : vcWithExpectedCounts) {
|
||||
final VariantContext vc = pair.getFirst();
|
||||
final int[] expected = pair.getSecond();
|
||||
final int[] actual = afCalc.getLog10PNonRef(vc).getAlleleCountsOfMLE();
|
||||
Assert.assertEquals(Arrays.asList(expected), Arrays.asList(actual));
|
||||
}
|
||||
}
|
||||
|
||||
// many samples with low confidence should yield a non-zero MLE, in contrast to the old exact model
|
||||
@Test
|
||||
public void testManySamplesWithLowConfidence() {
|
||||
// prior corresponding to 1000 observations of ref, 1 of a SNP
|
||||
// for this test, we want many pseudocounts in the prior because the new AF calculator learns the allele frequency
|
||||
// and we don't want the complication of the posterior being differetn from the prior
|
||||
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1000, 1, 1, DEFAULT_PLOIDY); //prior corresponding to 1000 observations of ref, 1 of a SNP
|
||||
final List<Allele> alleles = Arrays.asList(A,B);
|
||||
|
||||
// for FAIRLY_CONFIDENT_PL = 20, this genotype has about 100 times greater likelihood to be het than hom ref
|
||||
// with our prior giving 1000 times as much weight to ref, this implies a 1 in 5 chance of each sample having a copy of the alt allele
|
||||
// (that is, 100/1000 times the combinatorial factor of 2). Thus the MLE for up to 2 samples should be zero
|
||||
// for five samples we should have one
|
||||
// for ten samples we will have more than twice as many as for five since the counts fromt he samples start to influence
|
||||
// the estimated allele frequency
|
||||
final Genotype AB = genotypeWithObviousCall(DIPLOID, BIALLELIC, new int[] {0,1,1,1}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
final List<VariantContext> vcsWithDifferentNumbersOfSamples = IntStream.range(1, 11)
|
||||
.mapToObj(n -> makeVC(alleles, Collections.nCopies(n, AB))).collect(Collectors.toList());
|
||||
final int[] counts = vcsWithDifferentNumbersOfSamples.stream().mapToInt(vc -> afCalc.getLog10PNonRef(vc).getAlleleCountAtMLE(B)).toArray();
|
||||
Assert.assertEquals(counts[0],0); // one sample
|
||||
Assert.assertEquals(counts[1],0); // two samples
|
||||
Assert.assertEquals(counts[4],2); // five samples
|
||||
Assert.assertTrue(counts[8] >= 3); // ten samples
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApproximateMultiplicativeConfidence() {
|
||||
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 1, 1, DEFAULT_PLOIDY); //flat prior -- we will choose genotypes such that the posterior remains flat
|
||||
final List<Allele> alleles = Arrays.asList(A,B);
|
||||
|
||||
final Genotype AA = genotypeWithObviousCall(DIPLOID, BIALLELIC, new int[] {0,2}, FAIRLY_CONFIDENT_PL);
|
||||
final Genotype BB = genotypeWithObviousCall(DIPLOID, BIALLELIC, new int[] {1,2}, FAIRLY_CONFIDENT_PL);
|
||||
|
||||
final List<VariantContext> vcsWithDifferentNumbersOfSamples = new ArrayList<>();
|
||||
final List<Genotype> genotypeList = new ArrayList<>();
|
||||
|
||||
for (int n = 0; n < 10; n++) {
|
||||
genotypeList.add(AA);
|
||||
genotypeList.add(BB); //adding both keeps the flat prior. Thus the posterior will equal the likelihood
|
||||
vcsWithDifferentNumbersOfSamples.add(makeVC(alleles, genotypeList));
|
||||
}
|
||||
|
||||
// since we maintain a flat allele frequency distribution, the probability of being ref as each successive sample is added
|
||||
// is multiplied by the probability of any one. Thus we get an arithmetic series in log space
|
||||
final double[] log10PRefs = vcsWithDifferentNumbersOfSamples.stream()
|
||||
.mapToDouble(vc -> afCalc.getLog10PNonRef(vc).getLog10LikelihoodOfAFEq0()).toArray();
|
||||
|
||||
for (int n = 0; n < 9; n++) {
|
||||
Assert.assertEquals(log10PRefs[n+1] - log10PRefs[n], log10PRefs[0], 0.01);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testManyRefSamplesDontKillGoodVariant() {
|
||||
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 0.1, 0.1, DEFAULT_PLOIDY);
|
||||
final List<Allele> alleles = Arrays.asList(A,B);
|
||||
final Genotype AA = genotypeWithObviousCall(DIPLOID, BIALLELIC, new int[] {0,2}, EXTREMELY_CONFIDENT_PL);
|
||||
final Genotype AB = genotypeWithObviousCall(DIPLOID, BIALLELIC, new int[] {0,1,1,1}, EXTREMELY_CONFIDENT_PL);
|
||||
for (final int numRef : new int[]{1, 10, 100, 1000, 10000, 100000}) {
|
||||
final List<Genotype> genotypeList = new ArrayList<>(Collections.nCopies(numRef, AA));
|
||||
genotypeList.add(AB);
|
||||
final VariantContext vc = makeVC(alleles, genotypeList);
|
||||
final double log10PRef = afCalc.getLog10PNonRef(vc).getLog10LikelihoodOfAFEq0();
|
||||
Assert.assertTrue(log10PRef < (-EXTREMELY_CONFIDENT_PL/10) + Math.log10(numRef) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// make PLs that correspond to an obvious call i.e. one PL is relatively big and the rest are zero
|
||||
// alleleCounts is the GenotypeAlleleCounts format for the obvious genotype, with repeats but in no particular order
|
||||
private static int[] PLsForObviousCall(final int ploidy, final int numAlleles, final int[] alleleCounts, final int PL) {
|
||||
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
|
||||
final int[] result = Collections.nCopies(glCalc.genotypeCount(), PL).stream().mapToInt(n->n).toArray();
|
||||
result[glCalc.alleleCountsToIndex(alleleCounts)] = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Genotype genotypeWithObviousCall(final int ploidy, final int numAlleles, final int[] alleles, final int PL) {
|
||||
return makeGenotype(ploidy, PLsForObviousCall(ploidy, numAlleles, alleles, PL));
|
||||
}
|
||||
//note the call is irrelevant to the AFCalculator, which only looks at PLs
|
||||
private static Genotype makeGenotype(final int ploidy, int ... pls) {
|
||||
return new GenotypeBuilder("sample" + sampleNameCounter++).alleles(Collections.nCopies(ploidy, Allele.NO_CALL)).PL(pls).make();
|
||||
}
|
||||
|
||||
private static VariantContext makeVC(final List<Allele> alleles, final Genotype... genotypes) {
|
||||
return new VariantContextBuilder().chr("chr1").alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
private static VariantContext makeVC(final List<Allele> alleles, final Collection<Genotype> genotypes) {
|
||||
return new VariantContextBuilder().chr("chr1").alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
}
|
||||
|
|
@ -33,6 +33,9 @@ import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
|||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.*;
|
||||
import java.util.function.DoublePredicate;
|
||||
import java.util.function.DoubleUnaryOperator;
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
/**
|
||||
* MathUtils is a static class (no instantiation allowed!) with some useful math methods.
|
||||
|
|
@ -57,6 +60,10 @@ public class MathUtils {
|
|||
public static final double LOG_ONE_THIRD = -Math.log10(3.0);
|
||||
private static final double NATURAL_LOG_OF_TEN = Math.log(10.0);
|
||||
private static final double SQUARE_ROOT_OF_TWO_TIMES_PI = Math.sqrt(2.0 * Math.PI);
|
||||
/**
|
||||
* Log10 of the e constant.
|
||||
*/
|
||||
public static final double LOG10_OF_E = Math.log10(Math.E);
|
||||
|
||||
/**
|
||||
* A helper class to maintain a cache of log10 values
|
||||
|
|
@ -1686,4 +1693,69 @@ public class MathUtils {
|
|||
public static ExponentialDistribution exponentialDistribution( final double mean ) {
|
||||
return new ExponentialDistributionImpl(mean);
|
||||
}
|
||||
|
||||
/**
|
||||
* The following method implements Arrays.stream(array).map(func).toArray(), which is concise but performs poorly due
|
||||
* to the overhead of creating a stream, especially with small arrays. Thus we wrap the wordy but fast array code
|
||||
* in the following method which permits concise Java 8 code.
|
||||
*
|
||||
* Returns a new array -- the original array in not modified.
|
||||
*
|
||||
* This method has been benchmarked and performs as well as array-only code.
|
||||
*/
|
||||
public static double[] applyToArray(final double[] array, final DoubleUnaryOperator func) {
|
||||
Utils.nonNull(func, "function may not be null");
|
||||
Utils.nonNull(array, "array may not be null");
|
||||
final double[] result = new double[array.length];
|
||||
for (int m = 0; m < result.length; m++) {
|
||||
result[m] = func.applyAsDouble(array[m]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following method implements Arrays.stream(array).map(func).toArray(), which is concise but performs poorly due
|
||||
* to the overhead of creating a stream, especially with small arrays. Thus we wrap the wordy but fast array code
|
||||
* in the following method which permits concise Java 8 code.
|
||||
*
|
||||
* The original array is modified in place.
|
||||
*
|
||||
* This method has been benchmarked and performs as well as array-only code.
|
||||
*/
|
||||
public static double[] applyToArrayInPlace(final double[] array, final DoubleUnaryOperator func) {
|
||||
Utils.nonNull(array, "array may not be null");
|
||||
Utils.nonNull(func, "function may not be null");
|
||||
for (int m = 0; m < array.length; m++) {
|
||||
array[m] = func.applyAsDouble(array[m]);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test whether all elements of a double[] array satisfy a double -> boolean predicate
|
||||
*/
|
||||
public static boolean allMatch(final double[] array, final DoublePredicate pred) {
|
||||
Utils.nonNull(array, "array may not be null");
|
||||
Utils.nonNull(pred, "predicate may not be null");
|
||||
for (final double x : array) {
|
||||
if (!pred.test(x)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test whether all elements of an int[] array satisfy an int -> boolean predicate
|
||||
*/
|
||||
public static boolean allMatch(final int[] array, final IntPredicate pred) {
|
||||
Utils.nonNull(array, "array may not be null");
|
||||
Utils.nonNull(pred, "predicate may not be null");
|
||||
for (final int x : array) {
|
||||
if (!pred.test(x)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import java.net.InetAddress;
|
|||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -1171,4 +1172,54 @@ public class Utils {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that an Object {@code object} is not null and returns the same object or throws an {@link IllegalArgumentException}
|
||||
* @param object any Object
|
||||
* @return the same object
|
||||
* @throws IllegalArgumentException if a {@code o == null}
|
||||
*/
|
||||
public static <T> T nonNull(final T object) {
|
||||
return Utils.nonNull(object, "Null object is not allowed here.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException}
|
||||
* @param object any Object
|
||||
* @param message the text message that would be passed to the exception thrown when {@code o == null}.
|
||||
* @return the same object
|
||||
* @throws IllegalArgumentException if a {@code o == null}
|
||||
*/
|
||||
public static <T> T nonNull(final T object, final String message) {
|
||||
if (object == null) {
|
||||
throw new IllegalArgumentException(message);
|
||||
}
|
||||
return object;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException}
|
||||
* @param object any Object
|
||||
* @param message the text message that would be passed to the exception thrown when {@code o == null}.
|
||||
* @return the same object
|
||||
* @throws IllegalArgumentException if a {@code o == null}
|
||||
*/
|
||||
public static <T> T nonNull(final T object, final Supplier<String> message) {
|
||||
if (object == null) {
|
||||
throw new IllegalArgumentException(message.get());
|
||||
}
|
||||
return object;
|
||||
}
|
||||
|
||||
public static void validateArg(final boolean condition, final String msg){
|
||||
if (!condition){
|
||||
throw new IllegalArgumentException(msg);
|
||||
}
|
||||
}
|
||||
|
||||
public static void validateArg(final boolean condition, final Supplier<String> msg){
|
||||
if (!condition){
|
||||
throw new IllegalArgumentException(msg.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue