From 893c9c85fa562ba38591fc653be66f135c4d91d9 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 28 Dec 2009 21:20:48 +0000 Subject: [PATCH] Added previous optimization to diploid (non-pool) model and shaved off 20% of runtime from it. Moved out some common functionality to joint estimate parent class. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2453 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/DiploidGenotypeCalculationModel.java | 11 +++++++++++ .../JointEstimateGenotypeCalculationModel.java | 13 +++++++++++++ .../walkers/genotyper/PooledCalculationModel.java | 5 +---- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index d2f579eb1..6cce8c4d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -61,6 +61,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // first, calculate for AF=0 (no change to matrix) log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency(); + double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0]; // for each minor allele frequency, calculate log10PofDgivenAFi for (int i = 1; i <= numFrequencies; i++) { @@ -69,6 +70,16 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // calculate new likelihoods log10PofDgivenAFi[baseIndex][i] = matrix.getLikelihoodsOfFrequency(); + + // an optimization to speed up the calculation: if we are beyond the local maximum such + // that subsequent likelihoods won't factor into the confidence score, just quit + if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) { + ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex); + return; + } + + if ( log10PofDgivenAFi[baseIndex][i] > maxLikelihoodSeen ) + maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][i]; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index a17ac618c..c7818c1d5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -13,6 +13,10 @@ import java.io.PrintWriter; public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { + // for use in optimizing the P(D|AF) calculations: + // how much off from the max likelihoods do we need to be before we can quit calculating? + protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; + // because the null allele frequencies are constant for a given N, // we cache the results to avoid having to recompute everything private HashMap nullAlleleFrequencyCache = new HashMap(); @@ -205,6 +209,15 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc /********************************************************************************/ + /** + * @param freqI allele frequency I + * @param numFrequencies total number of allele frequencies + * @param altBaseIndex the index of the alternate allele + */ + protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) { + while ( ++freqI <= numFrequencies ) + log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE; + } /** * @param ref the ref base diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 7509ca273..e4df220cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -13,8 +13,6 @@ import net.sf.samtools.SAMRecord; public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel { protected static final String POOL_SAMPLE_NAME = "POOL"; - private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; - private static FourBaseProbabilities fourBaseLikelihoods = null; private static boolean USE_CACHE = true; @@ -75,8 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode // an optimization to speed up the calculation: if we are beyond the local maximum such // that subsequent likelihoods won't factor into the confidence score, just quit if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) { - while ( ++frequency <= nChromosomes ) - log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE; + ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex); return; }