From 106bf13056969d13e01582a7088d168142d73519 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 14 Dec 2011 12:05:50 -0500 Subject: [PATCH] Use a thread local result object to collect the results of the exact calculation instead of passing in multiple pre-allocated arrays. --- .../AlleleFrequencyCalculationModel.java | 7 +-- .../genotyper/ExactAFCalculationModel.java | 22 +++---- .../genotyper/UnifiedGenotyperEngine.java | 58 +++++++++---------- 3 files changed, 39 insertions(+), 48 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 7d3e7047d..681cc1fa6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.List; @@ -65,11 +64,9 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { * @param GLs genotype likelihoods * @param Alleles Alleles corresponding to GLs * @param log10AlleleFrequencyPriors priors - * @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results - * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results + * @param result (pre-allocated) object to store likelihoods results */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, double[][] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyLikelihoods, - double[][] log10AlleleFrequencyPosteriors); + AlleleFrequencyCalculationResult result); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index ed86897f2..33634ce28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -47,12 +47,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public void getLog10PNonRef(final GenotypesContext GLs, final List alleles, final double[][] log10AlleleFrequencyPriors, - final double[][] log10AlleleFrequencyLikelihoods, - final double[][] log10AlleleFrequencyPosteriors) { + final AlleleFrequencyCalculationResult result) { final int numAlleles = alleles.size(); //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); - linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -196,8 +195,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public static void linearExactMultiAllelic(final GenotypesContext GLs, final int numAlternateAlleles, final double[][] log10AlleleFrequencyPriors, - final double[][] log10AlleleFrequencyLikelihoods, - final double[][] log10AlleleFrequencyPosteriors, + final AlleleFrequencyCalculationResult result, final boolean preserveData) { final ArrayList genotypeLikelihoods = getGLs(GLs); @@ -221,7 +219,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); // adjust max likelihood seen if needed maxLog10L = Math.max(maxLog10L, log10LofKs); @@ -236,14 +234,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, - final double[][] log10AlleleFrequencyLikelihoods, - final double[][] log10AlleleFrequencyPosteriors) { + final AlleleFrequencyCalculationResult result) { if ( DEBUG ) System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result); // clean up memory if ( !preserveData ) { @@ -349,8 +346,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final ArrayList genotypeLikelihoods, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, - final double[][] log10AlleleFrequencyLikelihoods, - final double[][] log10AlleleFrequencyPosteriors) { + final AlleleFrequencyCalculationResult result) { set.log10Likelihoods[0] = 0.0; // the zero case final int totalK = set.getACsum(); @@ -410,11 +406,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { int AC = set.ACcounts.getCounts()[i]; - log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); // for k=0 we still want to use theta final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; - log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 2308e1759..5d271cdb1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -75,14 +75,13 @@ public class UnifiedGenotyperEngine { // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); + // the allele frequency likelihoods (allocated once as an optimization) + private ThreadLocal alleleFrequencyCalculationResult = new ThreadLocal(); + // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything private final double[][] log10AlleleFrequencyPriorsSNPs; private final double[][] log10AlleleFrequencyPriorsIndels; - // the allele frequency likelihoods (allocated once as an optimization) - private ThreadLocal log10AlleleFrequencyLikelihoods = new ThreadLocal(); - private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); - // the priors object private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsIndels; @@ -264,9 +263,8 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); - log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); + alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N)); } // don't try to genotype too many alternate alleles @@ -285,9 +283,9 @@ public class UnifiedGenotyperEngine { } // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) - clearAFarray(log10AlleleFrequencyLikelihoods.get()); - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; @@ -299,7 +297,7 @@ public class UnifiedGenotyperEngine { // determine which alternate alleles have AF>0 boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { - int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]); + int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]); // if the most likely AC is not 0, then this is a good alternate allele to use if ( indexOfBestAC != 0 ) { @@ -320,7 +318,7 @@ public class UnifiedGenotyperEngine { // calculate p(f>0) // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]); + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; @@ -330,15 +328,15 @@ public class UnifiedGenotyperEngine { if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; + phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { sum = 0.0; for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += log10AlleleFrequencyPosteriors.get()[0][i]; + sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } @@ -396,31 +394,31 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); - clearAFarray(log10AlleleFrequencyLikelihoods.get()); - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); + double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); - clearAFarray(log10AlleleFrequencyLikelihoods.get()); - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; - double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); + double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; + double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); - clearAFarray(log10AlleleFrequencyLikelihoods.get()); - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); + clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; - double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); + double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; + double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -587,10 +585,10 @@ public class UnifiedGenotyperEngine { AFline.append(i + "/" + N + "\t"); AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); - if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) + if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) AFline.append("0.00000000\t"); else - AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); + AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i])); AFline.append(String.format("%.8f\t", normalizedPosteriors[i])); verboseWriter.println(AFline.toString()); }