From 05816955aa7945bc0422c8d3d117691b9a43bb52 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 14:28:21 -0500 Subject: [PATCH] It was possible that we'd clean up a matrix column too early when a dependent column aborted early (with not enough probability mass) because we weren't being smart about the order in which we created dependencies. Fixed. --- .../genotyper/ExactAFCalculationModel.java | 79 +++++++++++-------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 1594c92cb..24d7696b5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; @@ -177,11 +176,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { ACqueue.add(zeroSet); indexesToACset.put(zeroSet.ACcounts, zeroSet); - // optimization: create the temporary storage for computing L(j,k) just once - final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1; - final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies]; - for ( int i = 0; i < maxPossibleDependencies; i++ ) - tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY; + // optimization: create the temporary storage for computing L(j,k) just once + final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1; + final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies]; + for ( int i = 0; i < maxPossibleDependencies; i++ ) + tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY; // keep processing while we have AC conformations that need to be calculated double maxLog10L = Double.NEGATIVE_INFINITY; @@ -204,7 +203,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result, - final double[][] tempLog10ConformationLikelihoods) { + final double[][] tempLog10ConformationLikelihoods) { //if ( DEBUG ) // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); @@ -256,14 +255,24 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different if ( ACwiggle > 1 ) { - for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { - for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { + // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering + for ( int allele_i = 0; allele_i < numAltAlleles - 1; allele_i++ ) { + for ( int allele_j = allele_i + 1; allele_j < numAltAlleles; allele_j++ ) { + if ( allele_i == allele_j ) + continue; final int[] ACcountsClone = set.ACcounts.getCounts().clone(); ACcountsClone[allele_i]++; ACcountsClone[allele_j]++; lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset); } } + + // now we can deal with the case where the 2 new alleles are the same + for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); + ACcountsClone[allele_i] += 2; + lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset); + } } // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate @@ -298,6 +307,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // add the given dependency to the set + //if ( DEBUG ) + // System.out.println(" *** adding dependency from " + index + " to " + callingSet.ACcounts); final ExactACset set = indexesToACset.get(index); set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex); return wasInQueue ? null : set; @@ -317,7 +328,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result, - final double[][] tempLog10ConformationLikelihoods) { + final double[][] tempLog10ConformationLikelihoods) { set.log10Likelihoods[0] = 0.0; // the zero case final int totalK = set.getACsum(); @@ -329,40 +340,40 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // k > 0 for at least one k else { - // deal with the non-AA possible conformations - int conformationIndex = 1; - for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { - //if ( DEBUG ) - // System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); + // deal with the non-AA possible conformations + int conformationIndex = 1; + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + //if ( DEBUG ) + // System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); - ExactACset dependent = indexesToACset.get(mapping.getKey()); + ExactACset dependent = indexesToACset.get(mapping.getKey()); - for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - if ( totalK <= 2*j ) { // skip impossible conformations - final double[] gl = genotypeLikelihoods.get(j); - tempLog10ConformationLikelihoods[j][conformationIndex] = - determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()]; + if ( totalK <= 2*j ) { // skip impossible conformations + final double[] gl = genotypeLikelihoods.get(j); + tempLog10ConformationLikelihoods[j][conformationIndex] = + determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()]; } else { - tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY; - } + tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY; + } } - conformationIndex++; - } + conformationIndex++; + } - // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value + // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value final int numPaths = set.ACsetIndexToPLIndex.size() + 1; - for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - if ( totalK < 2*j-1 ) { - final double[] gl = genotypeLikelihoods.get(j); - tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; - } else { - tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY; - } + if ( totalK < 2*j-1 ) { + final double[] gl = genotypeLikelihoods.get(j); + tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + } else { + tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY; + } - final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths); set.log10Likelihoods[j] = log10Max - logDenominator; }