From 7666a58773f32161e7746dc804eee487ee1a5a40 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Oct 2012 19:38:48 -0700 Subject: [PATCH] Function to compute the max achievable AC for each alt allele -- Additional minor cleanup of ExactAFCalculation --- .../ExactAFCalculationPerformanceTest.java | 18 +- .../ExactAFCalculationTestBuilder.java | 22 +- .../GeneralPloidyExactAFCalculation.java | 8 +- .../ExactAFCalculationModelUnitTest.java | 43 ++++ .../genotyper/AlleleFrequencyCalculation.java | 13 +- .../walkers/genotyper/ExactAFCalculation.java | 222 +++++++++++------- 6 files changed, 212 insertions(+), 114 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java index b4d041061..5e18715c4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -58,7 +57,7 @@ public class ExactAFCalculationPerformanceTest { final double[] priors = testBuilder.makePriors(); for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) { - final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL); + final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); timer.start(); final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors); @@ -115,7 +114,7 @@ public class ExactAFCalculationPerformanceTest { final int[] ac = new int[testBuilder.numAltAlleles]; ac[0] = 1; - final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); + final VariantContext vc = testBuilder.makeACTest(ac, 0, nonTypePL); for ( int position = 0; position < vc.getNSamples(); position++ ) { final VariantContextBuilder vcb = new VariantContextBuilder(vc); @@ -149,19 +148,12 @@ public class ExactAFCalculationPerformanceTest { final int[] ac = new int[testBuilder.numAltAlleles]; ac[0] = 1; - final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); - final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0); - for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) { - final VariantContextBuilder vcb = new VariantContextBuilder(vc); - - final List genotypes = new ArrayList(); - genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1)); - genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative)); - vcb.genotypes(genotypes); + for ( int nNonInformative = 0; nNonInformative < testBuilder.nSamples; nNonInformative++ ) { + final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL); timer.start(); - final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors); + final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors); final long runtime = timer.getElapsedTimeNano(); final List columns = new LinkedList(coreValues); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java index f472a1140..4f8669a23 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.*; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; public class ExactAFCalculationTestBuilder { @@ -68,7 +70,11 @@ public class ExactAFCalculationTestBuilder { } } - public VariantContext makeACTest(final int[] ACs, final int nonTypePL) { + public VariantContext makeACTest(final List ACs, final int nNonInformative, final int nonTypePL) { + return makeACTest(ArrayUtils.toPrimitive(ACs.toArray(new Integer[]{})), nNonInformative, nonTypePL); + } + + public VariantContext makeACTest(final int[] ACs, final int nNonInformative, final int nonTypePL) { final int nChrom = nSamples * 2; final int[] nhet = new int[numAltAlleles]; @@ -76,7 +82,7 @@ public class ExactAFCalculationTestBuilder { for ( int i = 0; i < ACs.length; i++ ) { final double p = ACs[i] / (1.0 * nChrom); - nhomvar[i] = (int)Math.floor(nSamples * p * p); + nhomvar[i] = (int)Math.floor((nSamples - nNonInformative) * p * p); nhet[i] = ACs[i] - 2 * nhomvar[i]; if ( nhet[i] < 0 ) @@ -87,10 +93,10 @@ public class ExactAFCalculationTestBuilder { if ( calcAC != MathUtils.sum(ACs) ) throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs)); - return makeACTest(nhet, nhomvar, nonTypePL); + return makeACTest(nhet, nhomvar, nNonInformative, nonTypePL); } - public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) { + public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nNonInformative, final int nonTypePL) { List samples = new ArrayList(nSamples); for ( int altI = 0; altI < nhet.length; altI++ ) { @@ -100,8 +106,12 @@ public class ExactAFCalculationTestBuilder { samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1)); } - final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)); - for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0)); + final int[] nonInformativePLs = new int[GenotypeLikelihoods.numLikelihoods(numAltAlleles, 2)]; + final Genotype nonInformative = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), nonInformativePLs); + samples.addAll(Collections.nCopies(nNonInformative, nonInformative)); + + final int nRef = Math.max((int) (nSamples - nNonInformative - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)), 0); + samples.addAll(Collections.nCopies(nRef, makePL(GenotypeType.HOM_REF, nonTypePL, 0))); samples = samples.subList(0, nSamples); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java index b0452f9ea..4ef8612b7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculation.java @@ -54,12 +54,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation { @Override protected VariantContext reduceScope(VariantContext vc) { // don't try to genotype too many alternate alleles - if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) { - logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); + if ( vc.getAlternateAlleles().size() > maxAltAlleles) { + logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); - final List alleles = new ArrayList(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1); + final List alleles = new ArrayList(maxAltAlleles + 1); alleles.add(vc.getReference()); - alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy)); + alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy)); VariantContextBuilder builder = new VariantContextBuilder(vc); builder.alleles(alleles); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 602009654..c1c2ae57e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -345,4 +345,47 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { + expectedAC_AC + " priors " + Utils.join(",", priors)); } } + + @DataProvider(name = "MaxACsToVisit") + public Object[][] makeMaxACsToVisit() { + List tests = new ArrayList(); + + final int nSamples = 10; + final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact; + + for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) { + final int nChrom = (nSamples - nNonInformative) * 2; + for ( int i = 0; i < nChrom; i++ ) { + // bi-allelic + tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, modelType}); + + // tri-allelic + for ( int j = 0; j < (nChrom - i); j++) + tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, modelType}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "MaxACsToVisit") + public void testMaxACsToVisit(final int nSamples, final List requestedACs, final int nNonInformative, final ExactAFCalculationTestBuilder.ModelType modelType) { + final int nAlts = requestedACs.size(); + final ExactAFCalculationTestBuilder testBuilder + = new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType, + ExactAFCalculationTestBuilder.PriorType.human); + + final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100); + final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc); + + // this is necessary because cannot ensure that the tester gives us back the requested ACs due + // to rounding errors + final List ACs = new ArrayList(); + for ( final Allele a : vc.getAlternateAlleles() ) + ACs.add(vc.getCalledChrCount(a)); + + for ( int i = 0; i < nAlts; i++ ) { + Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i); + } + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java index fc578a5bd..138b3d403 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculation.java @@ -102,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable { */ public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE)); + return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles())); } /** @@ -183,6 +183,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable { final boolean assignGenotypes, final int ploidy); + // --------------------------------------------------------------------------- + // + // accessors + // + // --------------------------------------------------------------------------- + + public int getMaxAltAlleles() { + return Math.max(MAX_ALTERNATE_ALLELES_TO_GENOTYPE, MAX_ALTERNATE_ALLELES_FOR_INDELS); + } + + // --------------------------------------------------------------------------- // // Print information about the call to the calls log diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java index b70309ed5..a42e3fd7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java @@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.File; import java.io.PrintStream; @@ -85,105 +83,149 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation { return genotypeLikelihoods; } + protected int[] computeMaxACs(final VariantContext vc) { + final int nAlleles = vc.getNAlleles(); + final int[] maxACs = new int[nAlleles-1]; + + for ( int altI = 0; altI < nAlleles-1; altI++ ) { + maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles); + } + + return maxACs; + } + + private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) { + int maxAC = 0; + + for ( final Genotype g : vc.getGenotypes() ) { + final int gMaxAlt = computeAC(g, altI, nAlleles); + maxAC += gMaxAlt; + } + + return maxAC; + } + + private int computeAC(final Genotype g, final int altI, final int nAlleles) { + final int[] PLs = g.getLikelihoods().getAsPLs(); + + final int refPL = PLs[0]; + if ( refPL == 0 ) // if ref is most likely, return 0 + return 0; + + final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)]; + if (homPL < refPL) // if hom-var is < ref, our max possible is 2 + return 2; + + for ( int i = 0; i < nAlleles; i++ ) { + final int one = i < altI ? i : altI; + final int two = i < altI ? altI : i; + final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)]; + if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1 + return 1; + } + + return 0; // in this case REF is the most likely but in fact another allele is best + } + // ------------------------------------------------------------------------------------- // // protected classes used to store exact model matrix columns // // ------------------------------------------------------------------------------------- - protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first +protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - // a wrapper around the int array so that we can make it hashable - protected static final class ExactACcounts { +// a wrapper around the int array so that we can make it hashable +protected static final class ExactACcounts { - protected final int[] counts; - private int hashcode = -1; + protected final int[] counts; + private int hashcode = -1; - public ExactACcounts(final int[] counts) { - this.counts = counts; - } - - public int[] getCounts() { - return counts; - } - - @Override - public boolean equals(Object obj) { - return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); - } - - @Override - public int hashCode() { - if ( hashcode == -1 ) - hashcode = Arrays.hashCode(counts); - return hashcode; - } - - @Override - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append(counts[0]); - for ( int i = 1; i < counts.length; i++ ) { - sb.append("/"); - sb.append(counts[i]); - } - return sb.toString(); - } + public ExactACcounts(final int[] counts) { + this.counts = counts; } - // This class represents a column in the Exact AC calculation matrix - protected static final class ExactACset { - - // the counts of the various alternate alleles which this column represents - final ExactACcounts ACcounts; - - // the column of the matrix - final double[] log10Likelihoods; - - int sum = -1; - - public ExactACset(final int size, final ExactACcounts ACcounts) { - this.ACcounts = ACcounts; - log10Likelihoods = new double[size]; - Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); - } - - // sum of all the non-reference alleles - public int getACsum() { - if ( sum == -1 ) { - sum = 0; - for ( int count : ACcounts.getCounts() ) - sum += count; - } - return sum; - } - - public boolean equals(Object obj) { - return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); - } + public int[] getCounts() { + return counts; } - protected static final class MaxLikelihoodSeen { - double maxLog10L = Double.NEGATIVE_INFINITY; - ExactACcounts ACs = null; - - public MaxLikelihoodSeen() {} - - public void update(final double maxLog10L, final ExactACcounts ACs) { - this.maxLog10L = maxLog10L; - this.ACs = ACs; - } - - // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set - public boolean isLowerAC(final ExactACcounts otherACs) { - final int[] myACcounts = this.ACs.getCounts(); - final int[] otherACcounts = otherACs.getCounts(); - - for ( int i = 0; i < myACcounts.length; i++ ) { - if ( myACcounts[i] > otherACcounts[i] ) - return false; - } - return true; - } + @Override + public boolean equals(Object obj) { + return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); } + + @Override + public int hashCode() { + if ( hashcode == -1 ) + hashcode = Arrays.hashCode(counts); + return hashcode; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(counts[0]); + for ( int i = 1; i < counts.length; i++ ) { + sb.append("/"); + sb.append(counts[i]); + } + return sb.toString(); + } +} + +// This class represents a column in the Exact AC calculation matrix +protected static final class ExactACset { + + // the counts of the various alternate alleles which this column represents + final ExactACcounts ACcounts; + + // the column of the matrix + final double[] log10Likelihoods; + + int sum = -1; + + public ExactACset(final int size, final ExactACcounts ACcounts) { + this.ACcounts = ACcounts; + log10Likelihoods = new double[size]; + Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); + } + + // sum of all the non-reference alleles + public int getACsum() { + if ( sum == -1 ) { + sum = 0; + for ( int count : ACcounts.getCounts() ) + sum += count; + } + return sum; + } + + public boolean equals(Object obj) { + return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); + } +} + +protected static final class MaxLikelihoodSeen { + double maxLog10L = Double.NEGATIVE_INFINITY; + ExactACcounts ACs = null; + + public MaxLikelihoodSeen() {} + + public void update(final double maxLog10L, final ExactACcounts ACs) { + this.maxLog10L = maxLog10L; + this.ACs = ACs; + } + + // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set + public boolean isLowerAC(final ExactACcounts otherACs) { + final int[] myACcounts = this.ACs.getCounts(); + final int[] otherACcounts = otherACs.getCounts(); + + for ( int i = 0; i < myACcounts.length; i++ ) { + if ( myACcounts[i] > otherACcounts[i] ) + return false; + } + return true; + } +} } \ No newline at end of file