Use a thread local result object to collect the results of the exact calculation instead of passing in multiple pre-allocated arrays.

This commit is contained in:
Eric Banks 2011-12-14 12:05:50 -05:00
parent 7648521718
commit 106bf13056
3 changed files with 39 additions and 48 deletions

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.List; import java.util.List;
@ -65,11 +64,9 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
* @param GLs genotype likelihoods * @param GLs genotype likelihoods
* @param Alleles Alleles corresponding to GLs * @param Alleles Alleles corresponding to GLs
* @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results * @param result (pre-allocated) object to store likelihoods results
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
*/ */
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles, protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
double[][] log10AlleleFrequencyPriors, double[][] log10AlleleFrequencyPriors,
double[][] log10AlleleFrequencyLikelihoods, AlleleFrequencyCalculationResult result);
double[][] log10AlleleFrequencyPosteriors);
} }

View File

@ -47,12 +47,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public void getLog10PNonRef(final GenotypesContext GLs, public void getLog10PNonRef(final GenotypesContext GLs,
final List<Allele> alleles, final List<Allele> alleles,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods, final AlleleFrequencyCalculationResult result) {
final double[][] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size(); final int numAlleles = alleles.size();
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false);
} }
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) { private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
@ -196,8 +195,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public static void linearExactMultiAllelic(final GenotypesContext GLs, public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles, final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods, final AlleleFrequencyCalculationResult result,
final double[][] log10AlleleFrequencyPosteriors,
final boolean preserveData) { final boolean preserveData) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
@ -221,7 +219,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset set = ACqueue.remove(); final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
// adjust max likelihood seen if needed // adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs); maxLog10L = Math.max(maxLog10L, log10LofKs);
@ -236,14 +234,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final Queue<ExactACset> ACqueue, final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods, final AlleleFrequencyCalculationResult result) {
final double[][] log10AlleleFrequencyPosteriors) {
if ( DEBUG ) if ( DEBUG )
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods // compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result);
// clean up memory // clean up memory
if ( !preserveData ) { if ( !preserveData ) {
@ -349,8 +346,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods, final AlleleFrequencyCalculationResult result) {
final double[][] log10AlleleFrequencyPosteriors) {
set.log10Likelihoods[0] = 0.0; // the zero case set.log10Likelihoods[0] = 0.0; // the zero case
final int totalK = set.getACsum(); final int totalK = set.getACsum();
@ -410,11 +406,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i]; int AC = set.ACcounts.getCounts()[i];
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK); result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
// for k=0 we still want to use theta // for k=0 we still want to use theta
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
} }
} }

View File

@ -75,14 +75,13 @@ public class UnifiedGenotyperEngine {
// the model used for calculating p(non-ref) // the model used for calculating p(non-ref)
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>(); private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
// the allele frequency likelihoods (allocated once as an optimization)
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
private final double[][] log10AlleleFrequencyPriorsSNPs; private final double[][] log10AlleleFrequencyPriorsSNPs;
private final double[][] log10AlleleFrequencyPriorsIndels; private final double[][] log10AlleleFrequencyPriorsIndels;
// the allele frequency likelihoods (allocated once as an optimization)
private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
// the priors object // the priors object
private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsSNPs;
private final GenotypePriors genotypePriorsIndels; private final GenotypePriors genotypePriorsIndels;
@ -264,9 +263,8 @@ public class UnifiedGenotyperEngine {
// initialize the data for this thread if that hasn't been done yet // initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) { if ( afcm.get() == null ) {
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
} }
// don't try to genotype too many alternate alleles // don't try to genotype too many alternate alleles
@ -285,9 +283,9 @@ public class UnifiedGenotyperEngine {
} }
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
// is the most likely frequency conformation AC=0 for all alternate alleles? // is the most likely frequency conformation AC=0 for all alternate alleles?
boolean bestGuessIsRef = true; boolean bestGuessIsRef = true;
@ -299,7 +297,7 @@ public class UnifiedGenotyperEngine {
// determine which alternate alleles have AF>0 // determine which alternate alleles have AF>0
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]); int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]);
// if the most likely AC is not 0, then this is a good alternate allele to use // if the most likely AC is not 0, then this is a good alternate allele to use
if ( indexOfBestAC != 0 ) { if ( indexOfBestAC != 0 ) {
@ -320,7 +318,7 @@ public class UnifiedGenotyperEngine {
// calculate p(f>0) // calculate p(f>0)
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]); double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]);
double sum = 0.0; double sum = 0.0;
for (int i = 1; i <= N; i++) for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i]; sum += normalizedPosteriors[i];
@ -330,15 +328,15 @@ public class UnifiedGenotyperEngine {
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) ) if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
} else { } else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) { if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0; sum = 0.0;
for (int i = 1; i <= N; i++) { for (int i = 1; i <= N; i++) {
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break; break;
sum += log10AlleleFrequencyPosteriors.get()[0][i]; sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i];
} }
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
} }
@ -396,31 +394,31 @@ public class UnifiedGenotyperEngine {
// the overall lod // the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod // the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod // the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(log10AlleleFrequencyPosteriors.get()); clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -587,10 +585,10 @@ public class UnifiedGenotyperEngine {
AFline.append(i + "/" + N + "\t"); AFline.append(i + "/" + N + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.2f\t", ((float)i)/N));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
AFline.append("0.00000000\t"); AFline.append("0.00000000\t");
else else
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]));
AFline.append(String.format("%.8f\t", normalizedPosteriors[i])); AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
verboseWriter.println(AFline.toString()); verboseWriter.println(AFline.toString());
} }