diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 2808e6968..01e696237 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -62,24 +62,26 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { /** * Must be overridden by concrete subclasses - * @param GLs genotype likelihoods - * @param Alleles Alleles corresponding to GLs - * @param log10AlleleFrequencyPriors priors - * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results + * @param GLs genotype likelihoods + * @param Alleles Alleles corresponding to GLs + * @param log10AlleleFrequencyPriors priors + * @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results + * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, double[][] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, double[][] log10AlleleFrequencyPosteriors); /** * Can be overridden by concrete subclasses * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results + * @param log10AlleleFrequencyLikelihoods allele frequency results * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ protected abstract GenotypesContext assignGenotypes(VariantContext vc, - double[][] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood); + double[][] log10AlleleFrequencyLikelihoods, + int AFofMaxLikelihood); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 22017a1ee..f4af579e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -52,15 +52,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC; } - public void getLog10PNonRef(GenotypesContext GLs, List alleles, - double[][] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyPosteriors) { + public void getLog10PNonRef(final GenotypesContext GLs, + final List alleles, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); if ( USE_MULTI_ALLELIC_CALCULATION ) - linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false); + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); else - linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyPosteriors); + linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -125,6 +127,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public int linearExact(GenotypesContext GLs, double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, double[][] log10AlleleFrequencyPosteriors) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; @@ -176,6 +179,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the posteriors vector final double log10LofK = kMinus0[numSamples]; + log10AlleleFrequencyLikelihoods[0][k] = log10LofK; log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; // can we abort early? @@ -320,6 +324,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public static void linearExactMultiAllelic(final GenotypesContext GLs, final int numAlternateAlleles, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors, final boolean preserveData) { @@ -344,7 +349,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); // adjust max likelihood seen if needed maxLog10L = Math.max(maxLog10L, log10LofKs); @@ -359,13 +364,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors) { if ( DEBUG ) System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); // clean up memory if ( !preserveData ) { @@ -471,6 +477,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final ArrayList genotypeLikelihoods, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors) { set.log10Likelihoods[0] = 0.0; // the zero case @@ -517,7 +524,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // update the posteriors vector final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; // determine the power of theta to use @@ -527,11 +533,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { nonRefAlleles++; } - // update the posteriors vector which is a collapsed view of each of the various ACs + // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + int AC = set.ACcounts.getCounts()[i]; + log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + // for k=0 we still want to use theta - final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts.getCounts()[i]]; - log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]], log10LofK + prior); + final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; + log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6e61790ed..606a0544c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -77,6 +77,7 @@ public class UnifiedGenotyperEngine { private final double[][] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) + private ThreadLocal log10AlleleFrequencyLikelihoods = new ThreadLocal(); private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the priors object @@ -294,6 +295,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { + log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -311,8 +313,9 @@ public class UnifiedGenotyperEngine { generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); @@ -350,7 +353,7 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); // print out stats if we have a writer if ( verboseWriter != null ) @@ -369,16 +372,18 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); @@ -386,8 +391,9 @@ public class UnifiedGenotyperEngine { // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); @@ -445,6 +451,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { + log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -460,8 +467,9 @@ public class UnifiedGenotyperEngine { return null; // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); @@ -499,7 +507,7 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); // *** note that calculating strand bias involves overwriting data structures, so we do that last HashMap attributes = new HashMap(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 00cfff4b3..9640a8963 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -83,14 +83,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { @Test(dataProvider = "getGLs") public void testGLs(GetGLsTest cfg) { + final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1]; final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1]; for ( int i = 0; i < 2; i++ ) { for ( int j = 0; j < 2*numSamples+1; j++ ) { + log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; } } - ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyPosteriors, false); + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); int nameIndex = 1; for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {