From 4a05757a2a71cef5dba08f29d147bd2b594adae1 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 11 Mar 2010 16:05:51 +0000 Subject: [PATCH] Fixed strand bias calculation because of -Infinity issues. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2980 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/DiploidGenotypeCalculationModel.java | 3 ++- .../JointEstimateGenotypeCalculationModel.java | 11 +++++++++++ .../walkers/genotyper/PooledCalculationModel.java | 3 ++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index e57eebe1d..156f79ab0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -66,6 +66,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // first, calculate for AF=0 (no change to matrix) log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency(); double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0]; + int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest(); // for each minor allele frequency, calculate log10PofDgivenAFi for (int i = 1; i <= numFrequencies; i++) { @@ -77,7 +78,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // an optimization to speed up the calculation: if we are beyond the local maximum such // that subsequent likelihoods won't factor into the confidence score, just quit - if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) { + if ( i >= minAlleleFrequencyToTest && maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) { ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex); return; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index f0a8596c8..63272f3a4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -16,6 +16,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // how much off from the max likelihoods do we need to be before we can quit calculating? protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; protected static final Double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; + private int minAlleleFrequencyToTest; // because the null allele frequencies are constant for a given N, // we cache the results to avoid having to recompute everything @@ -47,6 +48,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc int numSamples = getNSamples(contexts); int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero) + // reset the optimization value + minAlleleFrequencyToTest = 0; + // find the alternate allele with the largest sum of quality scores initializeBestAlternateAllele(ref, contexts); @@ -82,6 +86,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return vcc; } + protected int getMinAlleleFrequencyToTest() { + return minAlleleFrequencyToTest; + } + protected int getNSamples(Map contexts) { return contexts.size(); } @@ -376,6 +384,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc double overallLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess]; double lod = overallLog10PofF - overallLog10PofNull; + // set the optimization value for the subsequent strand calculations + minAlleleFrequencyToTest = bestAFguess; + // the forward lod initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 9cc6dd02b..a5f0af1a3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -54,6 +54,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode int altIndex = BaseUtils.simpleBaseToBaseIndex(alt); double maxLikelihoodSeen = -1.0 * Double.MAX_VALUE; + int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest(); // for each minor allele frequency, calculate log10PofDgivenAFi for (int frequency = 0; frequency <= nChromosomes; frequency++) { @@ -72,7 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode // an optimization to speed up the calculation: if we are beyond the local maximum such // that subsequent likelihoods won't factor into the confidence score, just quit - if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) { + if ( frequency > minAlleleFrequencyToTest && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) { ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex); return; }