Cleanup IndependentAllelesDiploidExactAFCalc

-- Remove capability to truncate genotype likelihoods -- this wasn't used and isn't really useful after all
-- Added lots of contracts and docs, still more to come.
-- Created a default makeMaxLikelihoods function in ReferenceDiploidExactAFCalc and DiploidExactAFCalc so that multiple subclasses don't just do the default thing
-- Generalized reference bi-allelic model in IndependentAllelesDiploidExactAFCalc so that in principle any bi-allelic reference model can be used.
This commit is contained in:
Mark DePristo 2012-10-15 20:58:55 -04:00
parent 6bd0ec8de4
commit 9b0ab4e941
4 changed files with 77 additions and 99 deletions

View File

@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "TestCombineGLsWithDrops")
public Object[][] makeTestCombineGLsWithDrops() {
List<Object[]> tests = new ArrayList<Object[]>();
final Set<Integer> noDrops = Collections.emptySet();
final Set<Integer> drop1 = Collections.singleton(1);
final Set<Integer> drop2 = Collections.singleton(2);
// AA AB BB AC BC CC
// drop1 (B): AA AC CC
// drop2 (C): AA AB BB
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops});
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops});
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2});
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1});
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops});
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops});
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2});
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1});
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops});
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops});
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2});
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1});
return tests.toArray(new Object[][]{});
}
private Genotype makePL(final int ... PLs) {
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
}
@Test(enabled = true, dataProvider = "TestCombineGLs")
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.<Integer>emptySet());
}
@Test(enabled = true, dataProvider = "TestCombineGLsWithDrops")
private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set<Integer> allelesToDrop) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts);
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5);
final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2);
final Genotype gACcombined = makePL(0, 2, 5);
final Genotype gACcombined2 = makePL(0, 1, 4);
final Genotype gAGcombined = makePL(0, 4, 9);
final Genotype gACdropped = makePL(0, 1, 2);
final Genotype gAGdropped = makePL(0, 3, 5);
// biallelic
tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())});
// tri-allelic
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())});
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())});
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())});
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())});
return tests.toArray(new Object[][]{});
}
@Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts")
@Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts")
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
for ( int j = 0; j < actual.getNSamples(); j++ )
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL());
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(),
"expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL()));
}
}

View File

@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
}
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker);
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
return new StateTracker();
}
@Override
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,

View File

@ -91,6 +91,7 @@ import java.util.*;
*/
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/**
@ -105,19 +106,23 @@ import java.util.*;
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
final ReferenceDiploidExactAFCalc refModel;
/**
* The AFCalc model we are using to do the bi-allelic computation
*/
final AFCalc biAlleleExactModel;
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
}
@Override
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
return refModel.makeMaxLikelihood(vc, resultTracker);
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
}
/**
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
*/
private static class MyAFCalcResult extends AFCalcResult {
/**
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
*/
final List<AFCalcResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
@ -129,58 +134,89 @@ import java.util.*;
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors);
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
}
/**
* Compute the conditional exact AFCalcResult for each allele in vc independently, returning
* the result of each, in order of the alt alleles in VC
*
* @param vc
* @param log10AlleleFrequencyPriors
* @return
* @param vc the VariantContext we want to analyze
* @param log10AlleleFrequencyPriors the priors
* @return a list of the AFCalcResults for each bi-allelic sub context of vc
*/
protected List<AFCalcResult> computeAlleleConditionalExact(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
@Ensures("goodIndependentResult(vc, result)")
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
results.add(resultTracker);
}
return results;
}
protected List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
/**
* Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results
*/
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalcResult> results) {
if ( results.size() != vc.getNAlleles() - 1) return false;
for ( int i = 0; i < results.size(); i++ ) {
if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 )
return false;
if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) )
return false;
}
return true;
}
/**
* Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order
*
* @param vc the variant context to split. Must have n.alt.alleles > 1
* @return a bi-allelic variant context for each alt allele in vc
*/
@Requires({"vc != null", "vc.getNAlleles() > 1"})
@Ensures("result.size() == vc.getNAlleles() - 1")
protected final List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
final int nAltAlleles = vc.getNAlleles() - 1;
final List<VariantContext> vcs = new LinkedList<VariantContext>();
final List<Allele> afZeroAlleles = new LinkedList<Allele>();
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
final Allele altAllele = vc.getAlternateAllele(altI);
final List<Allele> biallelic = Arrays.asList(vc.getReference(), altAllele);
vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1));
//afZeroAlleles.add(altAllele);
vcs.add(biallelicCombinedGLs(vc, altI + 1));
}
return vcs;
}
protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List<Allele> biallelic, final List<Allele> afZeroAlleles, final int allele2) {
/**
* Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex
*
* @param rootVC the root (potentially multi-allelic) variant context
* @param altAlleleIndex index of the alt allele, from 0 == first alt allele
* @return a bi-allelic variant context based on rootVC
*/
@Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"})
@Ensures({"result.isBiallelic()"})
protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) {
if ( rootVC.isBiallelic() ) {
if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles);
return rootVC;
} else {
final Set<Integer> allelesToDiscard = new HashSet<Integer>(rootVC.getAlleleIndices(afZeroAlleles));
final int nAlts = rootVC.getNAlleles() - 1;
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
for ( final Genotype g : rootVC.getGenotypes() )
biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts));
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
vcb.alleles(biallelic);
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele));
vcb.genotypes(biallelicGenotypes);
return vcb.make();
}
@ -201,30 +237,16 @@ import java.util.*;
* XB = AB + BC
* BB = BB
*
* Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is
* useful in the case where you want to drop alleles (not combine them), such as above:
*
* AA AB BB AC BC CC
*
* and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2)
*
* XX = AA (since X = A and C is dropped)
* XB = AB
* BB = BB
*
* This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly
* AF == 0.
*
* @param original the original multi-allelic genotype
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
* @param nAlts the total number of alt alleles
* @return a new biallelic genotype with appropriate PLs
*/
@Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"})
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
protected Genotype combineGLs(final Genotype original, final int altIndex, final Set<Integer> allelesToDiscard, final int nAlts ) {
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make();
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
@ -234,10 +256,6 @@ import java.util.*;
for ( int index = 0; index < normalizedPr.length; index++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
// just continue if we shouldn't include the pair because it's in the discard set
if ( discardAllelePair(pair, allelesToDiscard) )
continue;
if ( pair.alleleIndex1 == altIndex ) {
if ( pair.alleleIndex2 == altIndex )
// hom-alt case
@ -261,11 +279,7 @@ import java.util.*;
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set<Integer> allelesToDiscard) {
return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2);
}
protected List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
// sort the results, so the most likely allele is first
@ -289,6 +303,8 @@ import java.util.*;
/**
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
*
* TODO -- add more docs
*
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
*/
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,

View File

@ -1,13 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
}
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
return new StateTracker();
}
}