From 9b0ab4e9417c4cb61d56acac0a0cf0de1add078a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 15 Oct 2012 20:58:55 -0400 Subject: [PATCH] Cleanup IndependentAllelesDiploidExactAFCalc -- Remove capability to truncate genotype likelihoods -- this wasn't used and isn't really useful after all -- Added lots of contracts and docs, still more to come. -- Created a default makeMaxLikelihoods function in ReferenceDiploidExactAFCalc and DiploidExactAFCalc so that multiple subclasses don't just do the default thing -- Generalized reference bi-allelic model in IndependentAllelesDiploidExactAFCalc so that in principle any bi-allelic reference model can be used. --- ...dentAllelesDiploidExactAFCalcUnitTest.java | 48 ++----- .../genotyper/afcalc/DiploidExactAFCalc.java | 4 +- .../IndependentAllelesDiploidExactAFCalc.java | 118 ++++++++++-------- .../afcalc/ReferenceDiploidExactAFCalc.java | 6 - 4 files changed, 77 insertions(+), 99 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java index ed164f245..391c99990 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java @@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @DataProvider(name = "TestCombineGLsWithDrops") - public Object[][] makeTestCombineGLsWithDrops() { - List tests = new ArrayList(); - - final Set noDrops = Collections.emptySet(); - final Set drop1 = Collections.singleton(1); - final Set drop2 = Collections.singleton(2); - - // AA AB BB AC BC CC - // drop1 (B): AA AC CC - // drop2 (C): AA AB BB - tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops}); - tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops}); - tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2}); - tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1}); - - tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops}); - tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops}); - tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2}); - tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1}); - - tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops}); - tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops}); - tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2}); - tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1}); - - return tests.toArray(new Object[][]{}); - } - private Genotype makePL(final int ... PLs) { return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs); } @Test(enabled = true, dataProvider = "TestCombineGLs") private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.emptySet()); - } - - @Test(enabled = true, dataProvider = "TestCombineGLsWithDrops") - private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set allelesToDrop) { final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); - final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts); + final Genotype combined = calc.combineGLs(testg, altIndex, nAlts); Assert.assertEquals(combined.getPL(), expected.getPL(), "Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL())); @@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5); final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2); final Genotype gACcombined = makePL(0, 2, 5); + final Genotype gACcombined2 = makePL(0, 1, 4); final Genotype gAGcombined = makePL(0, 4, 9); - final Genotype gACdropped = makePL(0, 1, 2); - final Genotype gAGdropped = makePL(0, 3, 5); // biallelic tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())}); // tri-allelic - tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())}); - tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())}); + tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())}); + tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())}); return tests.toArray(new Object[][]{}); } - @Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts") + @Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts") private void testMakeAlleleConditionalContexts(final VariantContext vc, final List expectedVCs) { final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); final List biAllelicVCs = calc.makeAlleleConditionalContexts(vc); @@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { Assert.assertEquals(actual.getAlleles(), expected.getAlleles()); for ( int j = 0; j < actual.getNSamples(); j++ ) - Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL()); + Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(), + "expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL())); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 8b12dff61..49915c515 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); } - protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker); + protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) { + return new StateTracker(); + } @Override protected AFCalcResult computeLog10PNonRef(final VariantContext vc, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index ba1e5bbb8..c0edee291 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -91,6 +91,7 @@ import java.util.*; */ private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20); + private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0}; private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); /** @@ -105,19 +106,23 @@ import java.util.*; private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef(); - final ReferenceDiploidExactAFCalc refModel; + /** + * The AFCalc model we are using to do the bi-allelic computation + */ + final AFCalc biAlleleExactModel; protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); - refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy); - } - - @Override - protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) { - return refModel.makeMaxLikelihood(vc, resultTracker); + biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy); } + /** + * Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call + */ private static class MyAFCalcResult extends AFCalcResult { + /** + * List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call + */ final List supporting; private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pNonRefByAllele, List supporting) { @@ -129,58 +134,89 @@ import java.util.*; @Override public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - final List independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors); + final List independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors); final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); return combineIndependentPNonRefs(vc, withMultiAllelicPriors); } /** + * Compute the conditional exact AFCalcResult for each allele in vc independently, returning + * the result of each, in order of the alt alleles in VC * - * @param vc - * @param log10AlleleFrequencyPriors - * @return + * @param vc the VariantContext we want to analyze + * @param log10AlleleFrequencyPriors the priors + * @return a list of the AFCalcResults for each bi-allelic sub context of vc */ - protected List computeAlleleConditionalExact(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { + @Requires({"vc != null", "log10AlleleFrequencyPriors != null"}) + @Ensures("goodIndependentResult(vc, result)") + protected final List computeAlleleIndependentExact(final VariantContext vc, + final double[] log10AlleleFrequencyPriors) { final List results = new LinkedList(); for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) { - final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); results.add(resultTracker); } return results; } - protected List makeAlleleConditionalContexts(final VariantContext vc) { + /** + * Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results + */ + private static boolean goodIndependentResult(final VariantContext vc, final List results) { + if ( results.size() != vc.getNAlleles() - 1) return false; + for ( int i = 0; i < results.size(); i++ ) { + if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 ) + return false; + if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) ) + return false; + } + + return true; + } + + /** + * Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order + * + * @param vc the variant context to split. Must have n.alt.alleles > 1 + * @return a bi-allelic variant context for each alt allele in vc + */ + @Requires({"vc != null", "vc.getNAlleles() > 1"}) + @Ensures("result.size() == vc.getNAlleles() - 1") + protected final List makeAlleleConditionalContexts(final VariantContext vc) { final int nAltAlleles = vc.getNAlleles() - 1; final List vcs = new LinkedList(); - final List afZeroAlleles = new LinkedList(); for ( int altI = 0; altI < nAltAlleles; altI++ ) { - final Allele altAllele = vc.getAlternateAllele(altI); - final List biallelic = Arrays.asList(vc.getReference(), altAllele); - vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1)); - //afZeroAlleles.add(altAllele); + vcs.add(biallelicCombinedGLs(vc, altI + 1)); } return vcs; } - protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List biallelic, final List afZeroAlleles, final int allele2) { + /** + * Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex + * + * @param rootVC the root (potentially multi-allelic) variant context + * @param altAlleleIndex index of the alt allele, from 0 == first alt allele + * @return a bi-allelic variant context based on rootVC + */ + @Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"}) + @Ensures({"result.isBiallelic()"}) + protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) { if ( rootVC.isBiallelic() ) { - if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles); return rootVC; } else { - final Set allelesToDiscard = new HashSet(rootVC.getAlleleIndices(afZeroAlleles)); final int nAlts = rootVC.getNAlleles() - 1; final List biallelicGenotypes = new ArrayList(rootVC.getNSamples()); for ( final Genotype g : rootVC.getGenotypes() ) - biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts)); + biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts)); final VariantContextBuilder vcb = new VariantContextBuilder(rootVC); - vcb.alleles(biallelic); + final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1); + vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele)); vcb.genotypes(biallelicGenotypes); return vcb.make(); } @@ -201,30 +237,16 @@ import java.util.*; * XB = AB + BC * BB = BB * - * Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is - * useful in the case where you want to drop alleles (not combine them), such as above: - * - * AA AB BB AC BC CC - * - * and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2) - * - * XX = AA (since X = A and C is dropped) - * XB = AB - * BB = BB - * - * This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly - * AF == 0. - * * @param original the original multi-allelic genotype * @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0 * @param nAlts the total number of alt alleles * @return a new biallelic genotype with appropriate PLs */ - @Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"}) + @Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"}) @Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"}) - protected Genotype combineGLs(final Genotype original, final int altIndex, final Set allelesToDiscard, final int nAlts ) { + protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) { if ( original.isNonInformative() ) - return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make(); + return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make(); if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts); @@ -234,10 +256,6 @@ import java.util.*; for ( int index = 0; index < normalizedPr.length; index++ ) { final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index); - // just continue if we shouldn't include the pair because it's in the discard set - if ( discardAllelePair(pair, allelesToDiscard) ) - continue; - if ( pair.alleleIndex1 == altIndex ) { if ( pair.alleleIndex2 == altIndex ) // hom-alt case @@ -261,11 +279,7 @@ import java.util.*; return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make(); } - protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set allelesToDiscard) { - return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2); - } - - protected List applyMultiAllelicPriors(final List conditionalPNonRefResults) { + protected final List applyMultiAllelicPriors(final List conditionalPNonRefResults) { final ArrayList sorted = new ArrayList(conditionalPNonRefResults); // sort the results, so the most likely allele is first @@ -289,6 +303,8 @@ import java.util.*; /** * Take the independent estimates of pNonRef for each alt allele and combine them into a single result * + * TODO -- add more docs + * * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java index 4de983508..b4e7b2ab1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java @@ -1,13 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc { protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } - - protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { - return new StateTracker(); - } }