From 4b7edc72d141ab45e8959048693d043653c199d7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 20 Sep 2012 10:59:42 -0400 Subject: [PATCH] Fixing edge case bug in the Exact model (both standard and generalized) where we could abort prematurely in the special case of multiple polymorphic alleles and samples with widely different depths of coverage (e.g. exome and low-pass). In these cases it was possible to call the site bi-allelic when in fact it was multi-allelic (but it wouldn't cause it to create a monomorphic call). --- .../GeneralPloidyExactAFCalculationModel.java | 18 ++++++++------ .../AlleleFrequencyCalculationModel.java | 24 +++++++++++++++++++ .../genotyper/ExactAFCalculationModel.java | 13 +++++----- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java index 93e118ce0..87572b804 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset ACset = ACqueue.remove(); - final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset); - maxLog10L = Math.max(maxLog10L, log10LofKs); + final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset); + + // adjust max likelihood seen if needed + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts); + // clean up memory indexesToACset.remove(ACset.ACcounts); if ( VERBOSE ) @@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula * @param originalPloidy Total ploidy of original combined pool * @param newGLPloidy Ploidy of GL vector * @param result AFResult object - * @param maxLog10L max likelihood observed so far + * @param maxLikelihoodSeen max likelihood observed so far * @param ACqueue Queue of conformations to compute * @param indexesToACset AC indices of objects in queue * @return max log likelihood @@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula final int originalPloidy, final int newGLPloidy, final AlleleFrequencyCalculationResult result, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final LinkedList ACqueue, final HashMap indexesToACset) { @@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula if (!Double.isInfinite(log10LofK)) newPool.add(set); - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { if ( VERBOSE ) - System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L); return log10LofK; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 08a333486..569cd7072 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); } } + + protected static final class MaxLikelihoodSeen { + double maxLog10L = Double.NEGATIVE_INFINITY; + ExactACcounts ACs = null; + + public MaxLikelihoodSeen() {} + + public void update(final double maxLog10L, final ExactACcounts ACs) { + this.maxLog10L = maxLog10L; + this.ACs = ACs; + } + + // returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set + public boolean isLowerAC(final ExactACcounts otherACs) { + final int[] myACcounts = this.ACs.getCounts(); + final int[] otherACcounts = otherACs.getCounts(); + + for ( int i = 0; i < myACcounts.length; i++ ) { + if ( myACcounts[i] > otherACcounts[i] ) + return false; + } + return true; + } + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 77a39afc2..ba7f0f622 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static final int PL_INDEX_OF_HOM_REF = 0; - private static final List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { + private static List chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles]; for ( int i = 0; i < numOriginalAltAlleles; i++ ) @@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated - double maxLog10L = Double.NEGATIVE_INFINITY; + MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen(); while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); // adjust max likelihood seen if needed - maxLog10L = Math.max(maxLog10L, log10LofKs); + if ( log10LofKs > maxLikelihoodSeen.maxLog10L ) + maxLikelihoodSeen.update(log10LofKs, set.ACcounts); // clean up memory indexesToACset.remove(set.ACcounts); @@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static double calculateAlleleCountConformation(final ExactACset set, final ArrayList genotypeLikelihoods, - final double maxLog10L, + final MaxLikelihoodSeen maxLikelihoodSeen, final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, @@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; // can we abort early because the log10Likelihoods are so small? - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) { //if ( DEBUG ) // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); return log10LofK;