diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index c1c2ae57e..d5b05489b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -378,14 +378,70 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100); final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc); - // this is necessary because cannot ensure that the tester gives us back the requested ACs due - // to rounding errors + testExpectedACs(vc, maxACsToVisit); + } + + private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) { + // this is necessary because cannot ensure that the tester gives us back the + // requested ACs due to rounding errors final List ACs = new ArrayList(); for ( final Allele a : vc.getAlternateAlleles() ) ACs.add(vc.getCalledChrCount(a)); - for ( int i = 0; i < nAlts; i++ ) { + for ( int i = 0; i < maxACsToVisit.length; i++ ) { Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i); } } + + @DataProvider(name = "MaxACsGenotypes") + public Object[][] makeMaxACsForGenotype() { + List tests = new ArrayList(); + + final List AA = Arrays.asList(A, A); + final List AC = Arrays.asList(A, C); + final List CC = Arrays.asList(C, C); + final List AG = Arrays.asList(A, G); + final List GG = Arrays.asList(G, G); + final List CG = Arrays.asList(C, G); + + final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); + final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); + + tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)}); + tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)}); + tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)}); + + // make sure non-informative => 0 + tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)}); + tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)}); + + // multi-allelics + tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)}); + tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)}); + + // deal with non-informatives third alleles + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)}); + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "MaxACsGenotypes") + private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) { + final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make(); + + final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact; + final ExactAFCalculationTestBuilder testBuilder + = new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType, + ExactAFCalculationTestBuilder.PriorType.human); + final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc); + testExpectedACs(vc, maxACsToVisit); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java index a42e3fd7d..264de4812 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculation.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.*; @@ -83,48 +85,86 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation { return genotypeLikelihoods; } + /** + * Computes the maximum ACs we need to consider for each alt allele + * + * Walks over the genotypes in VC, and computes for each alt allele the maximum + * AC we need to consider in that alt allele dimension. Does the calculation + * based on the PLs in each genotype g, choosing to update the max AC for the + * alt alleles corresponding to that PL. Only takes the first lowest PL, + * if there are multiple genotype configurations with the same PL value. It + * takes values in the order of the alt alleles. + * + * @param vc the variant context we will compute max alt alleles for + * @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the + * first alt allele. + */ + @Ensures("result != null") protected int[] computeMaxACs(final VariantContext vc) { - final int nAlleles = vc.getNAlleles(); - final int[] maxACs = new int[nAlleles-1]; + final int[] maxACs = new int[vc.getNAlleles()-1]; - for ( int altI = 0; altI < nAlleles-1; altI++ ) { - maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles); - } + for ( final Genotype g : vc.getGenotypes() ) + updateMaxACs(g, maxACs); return maxACs; } - private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) { - int maxAC = 0; - - for ( final Genotype g : vc.getGenotypes() ) { - final int gMaxAlt = computeAC(g, altI, nAlleles); - maxAC += gMaxAlt; - } - - return maxAC; - } - - private int computeAC(final Genotype g, final int altI, final int nAlleles) { + /** + * Update the maximum achievable allele counts in maxAC according to the PLs in g + * + * Selects the maximum genotype configuration from the PLs in g, and updates + * the maxAC for this configure. For example, if the lowest PL is for 0/1, updates + * the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for + * many number of alt alleles (determined by length of maxACs). + * + * If the max PL occurs at 0/0, updates nothing + * Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have + * the same PL value, then updates the first one. + * + * Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL, + * then only first one (1) will be updated + * + * @param g the genotype to update + * @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele) + */ + @Requires({ + "g != null", + "maxACs != null", + "MathUtils.sum(maxACs) >= 0"}) + private void updateMaxACs(final Genotype g, final int[] maxACs) { final int[] PLs = g.getLikelihoods().getAsPLs(); - final int refPL = PLs[0]; - if ( refPL == 0 ) // if ref is most likely, return 0 - return 0; + int minPLi = 0; + int minPL = PLs[0]; - final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)]; - if (homPL < refPL) // if hom-var is < ref, our max possible is 2 - return 2; - - for ( int i = 0; i < nAlleles; i++ ) { - final int one = i < altI ? i : altI; - final int two = i < altI ? altI : i; - final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)]; - if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1 - return 1; + for ( int i = 0; i < PLs.length; i++ ) { + if ( PLs[i] < minPL ) { + minPL = PLs[i]; + minPLi = i; + } } - return 0; // in this case REF is the most likely but in fact another allele is best + final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi); + updateMaxACs(maxACs, pair.alleleIndex1); + updateMaxACs(maxACs, pair.alleleIndex2); + } + + /** + * Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref) + * + * If alleleI == 0 => doesn't update anything + * else maxACs[alleleI - 1]++ + * + * @param maxACs array of max alt allele ACs + * @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles. + */ + @Requires({ + "alleleI >= 0", + "(alleleI - 1) < maxACs.length", + "MathUtils.sum(maxACs) >= 0"}) + private void updateMaxACs(final int[] maxACs, final int alleleI) { + if ( alleleI > 0 ) + maxACs[alleleI-1]++; } // ------------------------------------------------------------------------------------- @@ -133,99 +173,99 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation { // // ------------------------------------------------------------------------------------- -protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first + protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first -// a wrapper around the int array so that we can make it hashable -protected static final class ExactACcounts { + // a wrapper around the int array so that we can make it hashable + protected static final class ExactACcounts { - protected final int[] counts; - private int hashcode = -1; + protected final int[] counts; + private int hashcode = -1; - public ExactACcounts(final int[] counts) { - this.counts = counts; - } - - public int[] getCounts() { - return counts; - } - - @Override - public boolean equals(Object obj) { - return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); - } - - @Override - public int hashCode() { - if ( hashcode == -1 ) - hashcode = Arrays.hashCode(counts); - return hashcode; - } - - @Override - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append(counts[0]); - for ( int i = 1; i < counts.length; i++ ) { - sb.append("/"); - sb.append(counts[i]); + public ExactACcounts(final int[] counts) { + this.counts = counts; } - return sb.toString(); - } -} -// This class represents a column in the Exact AC calculation matrix -protected static final class ExactACset { - - // the counts of the various alternate alleles which this column represents - final ExactACcounts ACcounts; - - // the column of the matrix - final double[] log10Likelihoods; - - int sum = -1; - - public ExactACset(final int size, final ExactACcounts ACcounts) { - this.ACcounts = ACcounts; - log10Likelihoods = new double[size]; - Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); - } - - // sum of all the non-reference alleles - public int getACsum() { - if ( sum == -1 ) { - sum = 0; - for ( int count : ACcounts.getCounts() ) - sum += count; + public int[] getCounts() { + return counts; } - return sum; - } - public boolean equals(Object obj) { - return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); - } -} - -protected static final class MaxLikelihoodSeen { - double maxLog10L = Double.NEGATIVE_INFINITY; - ExactACcounts ACs = null; - - public MaxLikelihoodSeen() {} - - public void update(final double maxLog10L, final ExactACcounts ACs) { - this.maxLog10L = maxLog10L; - this.ACs = ACs; - } - - // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set - public boolean isLowerAC(final ExactACcounts otherACs) { - final int[] myACcounts = this.ACs.getCounts(); - final int[] otherACcounts = otherACs.getCounts(); - - for ( int i = 0; i < myACcounts.length; i++ ) { - if ( myACcounts[i] > otherACcounts[i] ) - return false; + @Override + public boolean equals(Object obj) { + return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); + } + + @Override + public int hashCode() { + if ( hashcode == -1 ) + hashcode = Arrays.hashCode(counts); + return hashcode; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(counts[0]); + for ( int i = 1; i < counts.length; i++ ) { + sb.append("/"); + sb.append(counts[i]); + } + return sb.toString(); + } + } + + // This class represents a column in the Exact AC calculation matrix + protected static final class ExactACset { + + // the counts of the various alternate alleles which this column represents + final ExactACcounts ACcounts; + + // the column of the matrix + final double[] log10Likelihoods; + + int sum = -1; + + public ExactACset(final int size, final ExactACcounts ACcounts) { + this.ACcounts = ACcounts; + log10Likelihoods = new double[size]; + Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); + } + + // sum of all the non-reference alleles + public int getACsum() { + if ( sum == -1 ) { + sum = 0; + for ( int count : ACcounts.getCounts() ) + sum += count; + } + return sum; + } + + public boolean equals(Object obj) { + return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); + } + } + + protected static final class MaxLikelihoodSeen { + double maxLog10L = Double.NEGATIVE_INFINITY; + ExactACcounts ACs = null; + + public MaxLikelihoodSeen() {} + + public void update(final double maxLog10L, final ExactACcounts ACs) { + this.maxLog10L = maxLog10L; + this.ACs = ACs; + } + + // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set + public boolean isLowerAC(final ExactACcounts otherACs) { + final int[] myACcounts = this.ACs.getCounts(); + final int[] otherACcounts = otherACs.getCounts(); + + for ( int i = 0; i < myACcounts.length; i++ ) { + if ( myACcounts[i] > otherACcounts[i] ) + return false; + } + return true; } - return true; } -} } \ No newline at end of file