From ccb65a03e80a2e7212d599c87778c66f6dd43aa6 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 20 Sep 2012 10:14:48 -0400 Subject: [PATCH 1/3] sorry, non-ASCII characters annoy some computers. --- .../sting/gatk/walkers/annotator/MVLikelihoodRatio.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index f644c4c6d..85f61c91c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -22,7 +22,7 @@ import java.util.*; * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than - * the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. + * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. */ public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { From 4b7edc72d141ab45e8959048693d043653c199d7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 20 Sep 2012 10:59:42 -0400 Subject: [PATCH 2/3] Fixing edge case bug in the Exact model (both standard and generalized) where we could abort prematurely in the special case of multiple polymorphic alleles and samples with widely different depths of coverage (e.g. exome and low-pass). In these cases it was possible to call the site bi-allelic when in fact it was multi-allelic (but it wouldn't cause it to create a monomorphic call). --- .../GeneralPloidyExactAFCalculationModel.java | 18 ++++++++------ .../AlleleFrequencyCalculationModel.java | 24 +++++++++++++++++++ .../genotyper/ExactAFCalculationModel.java | 13 +++++----- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java index 93e118ce0..87572b804 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset ACset = ACqueue.remove(); - final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset); - maxLog10L = Math.max(maxLog10L, log10LofKs); + final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset); + + // adjust max likelihood seen if needed + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts); + // clean up memory indexesToACset.remove(ACset.ACcounts); if ( VERBOSE ) @@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula * @param originalPloidy Total ploidy of original combined pool * @param newGLPloidy Ploidy of GL vector * @param result AFResult object - * @param maxLog10L max likelihood observed so far + * @param maxLikelihoodSeen max likelihood observed so far * @param ACqueue Queue of conformations to compute * @param indexesToACset AC indices of objects in queue * @return max log likelihood @@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula final int originalPloidy, final int newGLPloidy, final AlleleFrequencyCalculationResult result, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final LinkedList ACqueue, final HashMap indexesToACset) { @@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula if (!Double.isInfinite(log10LofK)) newPool.add(set); - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { if ( VERBOSE ) - System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L); return log10LofK; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 08a333486..569cd7072 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); } } + + protected static final class MaxLikelihoodSeen { + double maxLog10L = Double.NEGATIVE_INFINITY; + ExactACcounts ACs = null; + + public MaxLikelihoodSeen() {} + + public void update(final double maxLog10L, final ExactACcounts ACs) { + this.maxLog10L = maxLog10L; + this.ACs = ACs; + } + + // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set + public boolean isLowerAC(final ExactACcounts otherACs) { + final int[] myACcounts = this.ACs.getCounts(); + final int[] otherACcounts = otherACs.getCounts(); + + for ( int i = 0; i < myACcounts.length; i++ ) { + if ( myACcounts[i] > otherACcounts[i] ) + return false; + } + return true; + } + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 77a39afc2..ba7f0f622 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static final int PL_INDEX_OF_HOM_REF = 0; - private static final List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { + private static List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles]; for ( int i = 0; i < numOriginalAltAlleles; i++ ) @@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); // adjust max likelihood seen if needed - maxLog10L = Math.max(maxLog10L, log10LofKs); + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, set.ACcounts); // clean up memory indexesToACset.remove(set.ACcounts); @@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static double calculateAlleleCountConformation(final ExactACset set, final ArrayList genotypeLikelihoods, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, @@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; // can we abort early because the log10Likelihoods are so small? - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK; From 2e6f5339961a50b2f66648d7636b3e9b7b2e0432 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 20 Sep 2012 11:55:28 -0400 Subject: [PATCH 3/3] Adding both unit and integration tests to cover the previous edge case of mismatched PLs --- .../ExactAFCalculationModelUnitTest.java | 15 +++++++++++++++ .../UnifiedGenotyperIntegrationTest.java | 14 +++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 306dddd65..0731d3fd8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0]; Assert.assertEquals(calculatedAlleleCount, 6); } + + @Test + public void testMismatchedGLs() { + + final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0}; + final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0}; + GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB)); + + final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2); + + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result); + + Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1); + Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 2f0bfb507..99b62fa8d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99")); + Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284")); executeTest("test Multiple SNP alleles", spec); } @@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("8a4ad38ec8015eea3461295148143428")); + Arrays.asList("00f54a0097e710c0f7b001444c237e32")); executeTest("test reverse trim", spec); } + @Test + public void testMismatchedPLs() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("b3fae6bf4c620458f4259dbc93125e37")); + executeTest("test mismatched PLs", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing compressed output @@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c3f786a5228346b43a80aa80d22b1490")); + Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(