Cleanup IndependentAllelesDiploidExactAFCalc
-- Remove capability to truncate genotype likelihoods -- this wasn't used and isn't really useful after all -- Added lots of contracts and docs, still more to come. -- Created a default makeMaxLikelihoods function in ReferenceDiploidExactAFCalc and DiploidExactAFCalc so that multiple subclasses don't just do the default thing -- Generalized reference bi-allelic model in IndependentAllelesDiploidExactAFCalc so that in principle any bi-allelic reference model can be used.
This commit is contained in:
parent
6bd0ec8de4
commit
9b0ab4e941
|
|
@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@DataProvider(name = "TestCombineGLsWithDrops")
|
||||
public Object[][] makeTestCombineGLsWithDrops() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final Set<Integer> noDrops = Collections.emptySet();
|
||||
final Set<Integer> drop1 = Collections.singleton(1);
|
||||
final Set<Integer> drop2 = Collections.singleton(2);
|
||||
|
||||
// AA AB BB AC BC CC
|
||||
// drop1 (B): AA AC CC
|
||||
// drop2 (C): AA AB BB
|
||||
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops});
|
||||
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops});
|
||||
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2});
|
||||
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1});
|
||||
|
||||
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops});
|
||||
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops});
|
||||
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2});
|
||||
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1});
|
||||
|
||||
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops});
|
||||
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops});
|
||||
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2});
|
||||
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private Genotype makePL(final int ... PLs) {
|
||||
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestCombineGLs")
|
||||
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
||||
testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.<Integer>emptySet());
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestCombineGLsWithDrops")
|
||||
private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set<Integer> allelesToDrop) {
|
||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||
final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts);
|
||||
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
|
||||
|
||||
Assert.assertEquals(combined.getPL(), expected.getPL(),
|
||||
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
|
||||
|
|
@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5);
|
||||
final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2);
|
||||
final Genotype gACcombined = makePL(0, 2, 5);
|
||||
final Genotype gACcombined2 = makePL(0, 1, 4);
|
||||
final Genotype gAGcombined = makePL(0, 4, 9);
|
||||
final Genotype gACdropped = makePL(0, 1, 2);
|
||||
final Genotype gAGdropped = makePL(0, 3, 5);
|
||||
|
||||
// biallelic
|
||||
tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())});
|
||||
|
||||
// tri-allelic
|
||||
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())});
|
||||
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())});
|
||||
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())});
|
||||
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts")
|
||||
@Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts")
|
||||
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
|
||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
|
||||
|
|
@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
|
||||
|
||||
for ( int j = 0; j < actual.getNSamples(); j++ )
|
||||
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL());
|
||||
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(),
|
||||
"expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||
}
|
||||
|
||||
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker);
|
||||
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
|
||||
return new StateTracker();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ import java.util.*;
|
|||
*/
|
||||
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
|
||||
|
||||
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
|
||||
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
/**
|
||||
|
|
@ -105,19 +106,23 @@ import java.util.*;
|
|||
|
||||
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
|
||||
|
||||
final ReferenceDiploidExactAFCalc refModel;
|
||||
/**
|
||||
* The AFCalc model we are using to do the bi-allelic computation
|
||||
*/
|
||||
final AFCalc biAlleleExactModel;
|
||||
|
||||
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
|
||||
return refModel.makeMaxLikelihood(vc, resultTracker);
|
||||
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
|
||||
*/
|
||||
private static class MyAFCalcResult extends AFCalcResult {
|
||||
/**
|
||||
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
|
||||
*/
|
||||
final List<AFCalcResult> supporting;
|
||||
|
||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
|
||||
|
|
@ -129,58 +134,89 @@ import java.util.*;
|
|||
@Override
|
||||
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final List<AFCalcResult> independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors);
|
||||
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
|
||||
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
|
||||
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the conditional exact AFCalcResult for each allele in vc independently, returning
|
||||
* the result of each, in order of the alt alleles in VC
|
||||
*
|
||||
* @param vc
|
||||
* @param log10AlleleFrequencyPriors
|
||||
* @return
|
||||
* @param vc the VariantContext we want to analyze
|
||||
* @param log10AlleleFrequencyPriors the priors
|
||||
* @return a list of the AFCalcResults for each bi-allelic sub context of vc
|
||||
*/
|
||||
protected List<AFCalcResult> computeAlleleConditionalExact(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
|
||||
@Ensures("goodIndependentResult(vc, result)")
|
||||
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
|
||||
|
||||
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
|
||||
final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
||||
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
||||
results.add(resultTracker);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
protected List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
|
||||
/**
|
||||
* Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results
|
||||
*/
|
||||
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalcResult> results) {
|
||||
if ( results.size() != vc.getNAlleles() - 1) return false;
|
||||
for ( int i = 0; i < results.size(); i++ ) {
|
||||
if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 )
|
||||
return false;
|
||||
if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order
|
||||
*
|
||||
* @param vc the variant context to split. Must have n.alt.alleles > 1
|
||||
* @return a bi-allelic variant context for each alt allele in vc
|
||||
*/
|
||||
@Requires({"vc != null", "vc.getNAlleles() > 1"})
|
||||
@Ensures("result.size() == vc.getNAlleles() - 1")
|
||||
protected final List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
|
||||
final int nAltAlleles = vc.getNAlleles() - 1;
|
||||
final List<VariantContext> vcs = new LinkedList<VariantContext>();
|
||||
|
||||
final List<Allele> afZeroAlleles = new LinkedList<Allele>();
|
||||
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
||||
final Allele altAllele = vc.getAlternateAllele(altI);
|
||||
final List<Allele> biallelic = Arrays.asList(vc.getReference(), altAllele);
|
||||
vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1));
|
||||
//afZeroAlleles.add(altAllele);
|
||||
vcs.add(biallelicCombinedGLs(vc, altI + 1));
|
||||
}
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List<Allele> biallelic, final List<Allele> afZeroAlleles, final int allele2) {
|
||||
/**
|
||||
* Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex
|
||||
*
|
||||
* @param rootVC the root (potentially multi-allelic) variant context
|
||||
* @param altAlleleIndex index of the alt allele, from 0 == first alt allele
|
||||
* @return a bi-allelic variant context based on rootVC
|
||||
*/
|
||||
@Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"})
|
||||
@Ensures({"result.isBiallelic()"})
|
||||
protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) {
|
||||
if ( rootVC.isBiallelic() ) {
|
||||
if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles);
|
||||
return rootVC;
|
||||
} else {
|
||||
final Set<Integer> allelesToDiscard = new HashSet<Integer>(rootVC.getAlleleIndices(afZeroAlleles));
|
||||
final int nAlts = rootVC.getNAlleles() - 1;
|
||||
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
|
||||
for ( final Genotype g : rootVC.getGenotypes() )
|
||||
biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts));
|
||||
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
|
||||
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
|
||||
vcb.alleles(biallelic);
|
||||
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
|
||||
vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele));
|
||||
vcb.genotypes(biallelicGenotypes);
|
||||
return vcb.make();
|
||||
}
|
||||
|
|
@ -201,30 +237,16 @@ import java.util.*;
|
|||
* XB = AB + BC
|
||||
* BB = BB
|
||||
*
|
||||
* Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is
|
||||
* useful in the case where you want to drop alleles (not combine them), such as above:
|
||||
*
|
||||
* AA AB BB AC BC CC
|
||||
*
|
||||
* and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2)
|
||||
*
|
||||
* XX = AA (since X = A and C is dropped)
|
||||
* XB = AB
|
||||
* BB = BB
|
||||
*
|
||||
* This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly
|
||||
* AF == 0.
|
||||
*
|
||||
* @param original the original multi-allelic genotype
|
||||
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
|
||||
* @param nAlts the total number of alt alleles
|
||||
* @return a new biallelic genotype with appropriate PLs
|
||||
*/
|
||||
@Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"})
|
||||
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
|
||||
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
|
||||
protected Genotype combineGLs(final Genotype original, final int altIndex, final Set<Integer> allelesToDiscard, final int nAlts ) {
|
||||
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
|
||||
if ( original.isNonInformative() )
|
||||
return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make();
|
||||
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
|
||||
|
||||
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
|
||||
|
||||
|
|
@ -234,10 +256,6 @@ import java.util.*;
|
|||
for ( int index = 0; index < normalizedPr.length; index++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
|
||||
|
||||
// just continue if we shouldn't include the pair because it's in the discard set
|
||||
if ( discardAllelePair(pair, allelesToDiscard) )
|
||||
continue;
|
||||
|
||||
if ( pair.alleleIndex1 == altIndex ) {
|
||||
if ( pair.alleleIndex2 == altIndex )
|
||||
// hom-alt case
|
||||
|
|
@ -261,11 +279,7 @@ import java.util.*;
|
|||
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
|
||||
}
|
||||
|
||||
protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set<Integer> allelesToDiscard) {
|
||||
return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2);
|
||||
}
|
||||
|
||||
protected List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
|
||||
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
|
||||
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
|
||||
|
||||
// sort the results, so the most likely allele is first
|
||||
|
|
@ -289,6 +303,8 @@ import java.util.*;
|
|||
/**
|
||||
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
|
||||
*
|
||||
* TODO -- add more docs
|
||||
*
|
||||
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
||||
*/
|
||||
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
||||
|
|
|
|||
|
|
@ -1,13 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
}
|
||||
|
||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
|
||||
return new StateTracker();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue