Count the number of evaluations in AFResult; expand unit tests

-- AFResult now tracks the number of evaluations (turns through the model calculation) so we can now compute the scaling of exact model itself as a function of n samples
-- Added unittests for priors (flat and human)
-- Discovered nasty general ploidy bug (enabled with Guillermo_FIXME)
This commit is contained in:
Mark DePristo 2012-10-01 14:14:44 -05:00
parent 33c7841c4d
commit f8ef4332de
4 changed files with 71 additions and 25 deletions

View File

@ -198,7 +198,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
combinedPoolLikelihoods.add(set);
for (int p=1; p<genotypeLikelihoods.size(); p++) {
result.reset();
result.reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p), combinedPloidy, ploidyPerPool,
numAlleles, log10AlleleFrequencyPriors, result);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
@ -230,6 +230,7 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
// keep processing while we have AC conformations that need to be calculated
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
result.incNEvaluations();
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);

View File

@ -56,6 +56,8 @@ public class AlleleFrequencyCalculationResult {
private double log10LikelihoodOfAFzero;
private double log10PosteriorOfAFzero;
int nEvaluations = 0;
/**
* The list of alleles actually used in computing the AF
*/
@ -122,6 +124,15 @@ public class AlleleFrequencyCalculationResult {
return alleleCountsOfMAP;
}
/**
* Returns the number of cycles used to evaluate the pNonRef for this AF calculation
*
* @return the number of evaluations required to produce the answer for this AF calculation
*/
public int getnEvaluations() {
return nEvaluations;
}
/**
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
*
@ -191,6 +202,13 @@ public class AlleleFrequencyCalculationResult {
allelesUsedInGenotyping = null;
}
/**
* Tell this result we used one more evaluation cycle
*/
protected void incNEvaluations() {
nEvaluations++;
}
protected void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
if ( log10LofK > log10MLE ) {
log10MLE = log10LofK;

View File

@ -147,6 +147,8 @@ public class DiploidExactAFCalculation extends ExactAFCalculation {
// keep processing while we have AC conformations that need to be calculated
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
result.incNEvaluations(); // keep track of the number of evaluations
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.testng.Assert;
@ -21,6 +22,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
static Genotype AA1, AB1, BB1, NON_INFORMATIVE1;
static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2;
final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors
final private static boolean INCLUDE_BIALLELIC = true;
final private static boolean INCLUDE_TRIALLELIC = true;
final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug
@BeforeSuite
public void before() {
@ -51,13 +55,15 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
final ExactAFCalculation calc;
final int[] expectedACs;
final double[] priors;
final String priorName;
private GetGLsTest(final ExactAFCalculation calculation, int numAltAlleles, List<Genotype> arg, final double[] priors) {
private GetGLsTest(final ExactAFCalculation calculation, int numAltAlleles, List<Genotype> arg, final double[] priors, final String priorName) {
super(GetGLsTest.class);
GLs = GenotypesContext.create(new ArrayList<Genotype>(arg));
this.numAltAlleles = numAltAlleles;
this.calc = calculation;
this.priors = priors;
this.priorName = priorName;
expectedACs = new int[numAltAlleles+1];
for ( int alleleI = 0; alleleI < expectedACs.length; alleleI++ ) {
@ -103,8 +109,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
}
public String toString() {
return String.format("%s model=%s input=%s", super.toString(), calc.getClass().getSimpleName(),
GLs.size() > 5 ? String.format("%d samples", GLs.size()) : GLs);
return String.format("%s model=%s prior=%s input=%s", super.toString(), calc.getClass().getSimpleName(),
priorName, GLs.size() > 5 ? String.format("%d samples", GLs.size()) : GLs);
}
}
@ -116,17 +122,26 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
final DiploidExactAFCalculation diploidCalc = new DiploidExactAFCalculation(nSamples, 4);
final GeneralPloidyExactAFCalculation generalCalc = new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
final double[] priors = new double[2*nSamples+1]; // flat priors
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc) ) {
// bi-allelic
if ( nSamples <= biAllelicSamples.size() )
for ( List<Genotype> genotypes : Utils.makePermutations(biAllelicSamples, nSamples, true) )
new GetGLsTest(model, 1, genotypes, priors);
final int nPriorValues = 2*nSamples+1;
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
final double[] humanPriors = new double[nPriorValues];
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
// tri-allelic
for ( List<Genotype> genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) )
new GetGLsTest(model, 2, genotypes, priors);
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc) ) {
final String priorName = priors == humanPriors ? "human" : "flat";
// bi-allelic
if ( INCLUDE_BIALLELIC && nSamples <= biAllelicSamples.size() )
for ( List<Genotype> genotypes : Utils.makePermutations(biAllelicSamples, nSamples, true) )
new GetGLsTest(model, 1, genotypes, priors, priorName);
// tri-allelic
if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || model != generalCalc || Guillermo_FIXME ) )
for ( List<Genotype> genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) )
new GetGLsTest(model, 2, genotypes, priors, priorName);
}
}
}
@ -166,11 +181,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
final double[] priors = new double[2*nSamples+1]; // flat priors
for ( ExactAFCalculation model : Arrays.asList(diploidCalc, generalCalc) ) {
final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors);
final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat");
for ( int rotation = 0; rotation < nSamples; rotation++ ) {
Collections.rotate(samples, 1);
final GetGLsTest withNonInformative = new GetGLsTest(model, testData.nAltAlleles, samples, priors);
final GetGLsTest withNonInformative = new GetGLsTest(model, testData.nAltAlleles, samples, priors, "flat");
tests.add(new Object[]{onlyInformative, withNonInformative});
}
}
@ -202,36 +217,46 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping());
}
private void testResultSimple(final GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = cfg.execute();
if ( cfg.isNonRef() ) {
//logger.warn("pNonRef = " + result.getLog10PosteriorOfAFzero());
Assert.assertTrue(result.getLog10PosteriorOfAFzero() < -1, "Genotypes imply pNonRef > 0 but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
} else {
// TODO -- I don't know why these two don't work
//Assert.assertTrue(result.getLog10PosteriorOfAFzero() > -1, "Genotypes imply pNonRef is low but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
// TODO -- why does this fail?
//Assert.assertTrue(result.getLog10PosteriorsMatrixSumWithoutAFzero() > -1, "Genotypes imply pNonRef > 0 but posterior sum over all non-AF0 fields was only " + result.getLog10PosteriorsMatrixSumWithoutAFzero());
// todo -- I'm not sure this is supposed to be true
//Assert.assertEquals(Math.pow(10, result.getLog10PosteriorOfAFzero()) + Math.pow(10, result.getLog10PosteriorsMatrixSumWithoutAFzero()), 1.0, 1e-3, "Total posterior prob didn't sum to 1");
// TODO -- I don't know why these two don't work
//Assert.assertTrue(result.getLog10PosteriorsMatrixSumWithoutAFzero() > -1,
// "Genotypes imply pNonRef is low but posterior sum over all non-AF0 fields was " + result.getLog10PosteriorsMatrixSumWithoutAFzero()
// + " pNonRef = " + result.getLog10PosteriorOfAFzero());
}
final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount();
Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations,
"Number of evaluations " + result.getnEvaluations() + " must be at least " + minNumberOfEvaluations);
Assert.assertNotNull(result.getAllelesUsedInGenotyping());
Assert.assertTrue(cfg.getAlleles().containsAll(result.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list");
for ( int altAlleleI = 0; altAlleleI < cfg.numAltAlleles; altAlleleI++ ) {
int expectedAlleleCount = cfg.getExpectedAltAC(altAlleleI);
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[altAlleleI];
int calcAC_MLE = result.getAlleleCountsOfMLE()[altAlleleI];
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
final Allele allele = cfg.getAlleles().get(altAlleleI+1);
Assert.assertEquals(calcAC_MLE, expectedAlleleCount, "MLE AC not equal to expected AC for allele " + allele);
}
// not true in general
// final int AC_MLE = (int)MathUtils.sum(result.getAlleleCountsOfMLE());
// final int AC_MAP = (int)MathUtils.sum(result.getAlleleCountsOfMAP());
// Assert.assertTrue(AC_MAP <= AC_MLE, "Requires sum MAP AC <= sum MLE AC for but saw " + AC_MAP + " vs " + AC_MLE);
}
@Test
public void testLargeGLs() {
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(1, 1), 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(1, 1), 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat");
final AlleleFrequencyCalculationResult result = cfg.execute();
@ -243,7 +268,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
public void testMismatchedGLs() {
final Genotype AB = makePL(Arrays.asList(A,C), 2000, 0, 2000, 2000, 2000, 2000);
final Genotype AC = makePL(Arrays.asList(A,G), 100, 100, 100, 0, 100, 100);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(2, 2), 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS);
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(2, 2), 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat");
final AlleleFrequencyCalculationResult result = cfg.execute();