diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java index 93e118ce0..87572b804 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset ACset = ACqueue.remove(); - final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset); - maxLog10L = Math.max(maxLog10L, log10LofKs); + final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset); + + // adjust max likelihood seen if needed + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts); + // clean up memory indexesToACset.remove(ACset.ACcounts); if ( VERBOSE ) @@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula * @param originalPloidy Total ploidy of original combined pool * @param newGLPloidy Ploidy of GL vector * @param result AFResult object - * @param maxLog10L max likelihood observed so far + * @param maxLikelihoodSeen max likelihood observed so far * @param ACqueue Queue of conformations to compute * @param indexesToACset AC indices of objects in queue * @return max log likelihood @@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula final int originalPloidy, final int newGLPloidy, final AlleleFrequencyCalculationResult result, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final LinkedList ACqueue, final HashMap indexesToACset) { @@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula if (!Double.isInfinite(log10LofK)) newPool.add(set); - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { if ( VERBOSE ) - System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L); return log10LofK; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index f644c4c6d..85f61c91c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -22,7 +22,7 @@ import java.util.*; * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than - * the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. + * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. */ public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 08a333486..569cd7072 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); } } + + protected static final class MaxLikelihoodSeen { + double maxLog10L = Double.NEGATIVE_INFINITY; + ExactACcounts ACs = null; + + public MaxLikelihoodSeen() {} + + public void update(final double maxLog10L, final ExactACcounts ACs) { + this.maxLog10L = maxLog10L; + this.ACs = ACs; + } + + // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set + public boolean isLowerAC(final ExactACcounts otherACs) { + final int[] myACcounts = this.ACs.getCounts(); + final int[] otherACcounts = otherACs.getCounts(); + + for ( int i = 0; i < myACcounts.length; i++ ) { + if ( myACcounts[i] > otherACcounts[i] ) + return false; + } + return true; + } + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 77a39afc2..ba7f0f622 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static final int PL_INDEX_OF_HOM_REF = 0; - private static final List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { + private static List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles]; for ( int i = 0; i < numOriginalAltAlleles; i++ ) @@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); // adjust max likelihood seen if needed - maxLog10L = Math.max(maxLog10L, log10LofKs); + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, set.ACcounts); // clean up memory indexesToACset.remove(set.ACcounts); @@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static double calculateAlleleCountConformation(final ExactACset set, final ArrayList genotypeLikelihoods, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, @@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; // can we abort early because the log10Likelihoods are so small? - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 306dddd65..0731d3fd8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0]; Assert.assertEquals(calculatedAlleleCount, 6); } + + @Test + public void testMismatchedGLs() { + + final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0}; + final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0}; + GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB)); + + final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2); + + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result); + + Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1); + Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 2f0bfb507..99b62fa8d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99")); + Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284")); executeTest("test Multiple SNP alleles", spec); } @@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("8a4ad38ec8015eea3461295148143428")); + Arrays.asList("00f54a0097e710c0f7b001444c237e32")); executeTest("test reverse trim", spec); } + @Test + public void testMismatchedPLs() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("b3fae6bf4c620458f4259dbc93125e37")); + executeTest("test mismatched PLs", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing compressed output @@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c3f786a5228346b43a80aa80d22b1490")); + Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(