More ExactModel cleanup
-- UnifiedGenotyperEngine no longer keeps a thread local double[2] array for the normalized posteriors array. This is way heavy-weight compared to just making the array each time. -- Added getNormalizedPosteriorOfAFGTZero and getNormalizedPosteriorOfAFzero to AFResult object. That's the place it should really live -- Add tests for priors, uncovering bugs in the contracts of the tri-allelic priors w.r.t. the AC of the MAP. Added TODOs
This commit is contained in:
parent
f8ef4332de
commit
17ca543937
|
|
@ -38,6 +38,8 @@ import java.util.List;
|
|||
* Date: Dec 14, 2011
|
||||
*
|
||||
* Useful helper class to communicate the results of the allele frequency calculation
|
||||
*
|
||||
* TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF?
|
||||
*/
|
||||
public class AlleleFrequencyCalculationResult {
|
||||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
|
|
@ -179,6 +181,28 @@ public class AlleleFrequencyCalculationResult {
|
|||
return allelesUsedInGenotyping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the normalized -- across all AFs -- of AC == 0, NOT LOG10
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"result >= 0.0", "result <= 1.0"})
|
||||
public double getNormalizedPosteriorOfAFzero() {
|
||||
return getNormalizedPosteriors()[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the normalized -- across all AFs -- of AC > 0, NOT LOG10
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"result >= 0.0", "result <= 1.0"})
|
||||
public double getNormalizedPosteriorOfAFGTZero() {
|
||||
return getNormalizedPosteriors()[1];
|
||||
}
|
||||
|
||||
private double[] getNormalizedPosteriors() {
|
||||
final double[] posteriors = new double[]{ getLog10PosteriorOfAFzero(), getLog10PosteriorsMatrixSumWithoutAFzero() };
|
||||
return MathUtils.normalizeFromLog10(posteriors);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -82,7 +82,6 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the allele frequency likelihoods and posteriors (allocated once as an optimization)
|
||||
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
|
||||
private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();
|
||||
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
private final double[] log10AlleleFrequencyPriorsSNPs;
|
||||
|
|
@ -357,7 +356,6 @@ public class UnifiedGenotyperEngine {
|
|||
if ( afcm.get() == null ) {
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES));
|
||||
posteriorsArray.set(new double[2]);
|
||||
}
|
||||
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
|
||||
|
||||
|
|
@ -402,16 +400,16 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// calculate p(f>0):
|
||||
final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
|
||||
final double PofF = 1.0 - normalizedPosteriors[0];
|
||||
final double PoFEq0 = AFresult.getNormalizedPosteriorOfAFzero();
|
||||
final double PoFGT0 = AFresult.getNormalizedPosteriorOfAFGTZero();
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFEq0);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero();
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFGT0);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
|
|
@ -422,7 +420,7 @@ public class UnifiedGenotyperEngine {
|
|||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, 1.0 - PofF);
|
||||
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, PoFGT0);
|
||||
}
|
||||
|
||||
// start constructing the resulting VC
|
||||
|
|
@ -438,7 +436,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null && !limitedContext )
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, model);
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PoFGT0, phredScaledConfidence, model);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
@ -521,13 +519,7 @@ public class UnifiedGenotyperEngine {
|
|||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
return MathUtils.normalizeFromLog10(normalizedPosteriors);
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
|
|
|||
|
|
@ -236,6 +236,27 @@ public class Utils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the values in joined by separator, such as A,B,C
|
||||
*
|
||||
* @param separator
|
||||
* @param doubles
|
||||
* @return
|
||||
*/
|
||||
public static String join(String separator, double[] doubles) {
|
||||
if ( doubles == null || doubles.length == 0)
|
||||
return "";
|
||||
else {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
ret.append(doubles[0]);
|
||||
for (int i = 1; i < doubles.length; ++i) {
|
||||
ret.append(separator);
|
||||
ret.append(doubles[i]);
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -195,12 +196,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "wellFormedGLs")
|
||||
@Test(enabled = true, dataProvider = "wellFormedGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
testResultSimple(cfg);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "GLsWithNonInformative", dependsOnMethods = "testGLs")
|
||||
@Test(enabled = true, dataProvider = "GLsWithNonInformative", dependsOnMethods = "testGLs")
|
||||
public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) {
|
||||
final AlleleFrequencyCalculationResult expected = onlyInformative.execute();
|
||||
final AlleleFrequencyCalculationResult actual = withNonInformative.execute();
|
||||
|
|
@ -220,18 +221,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
private void testResultSimple(final GetGLsTest cfg) {
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
|
||||
if ( cfg.isNonRef() ) {
|
||||
//logger.warn("pNonRef = " + result.getLog10PosteriorOfAFzero());
|
||||
Assert.assertTrue(result.getLog10PosteriorOfAFzero() < -1, "Genotypes imply pNonRef > 0 but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
|
||||
} else {
|
||||
// TODO -- I don't know why these two don't work
|
||||
//Assert.assertTrue(result.getLog10PosteriorOfAFzero() > -1, "Genotypes imply pNonRef is low but we had posterior AF = 0 of " + result.getLog10PosteriorOfAFzero());
|
||||
|
||||
// TODO -- I don't know why these two don't work
|
||||
//Assert.assertTrue(result.getLog10PosteriorsMatrixSumWithoutAFzero() > -1,
|
||||
// "Genotypes imply pNonRef is low but posterior sum over all non-AF0 fields was " + result.getLog10PosteriorsMatrixSumWithoutAFzero()
|
||||
// + " pNonRef = " + result.getLog10PosteriorOfAFzero());
|
||||
}
|
||||
Assert.assertEquals(result.getNormalizedPosteriorOfAFzero() + result.getNormalizedPosteriorOfAFGTZero(), 1.0, 1e-4);
|
||||
|
||||
final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount();
|
||||
Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations,
|
||||
|
|
@ -247,13 +237,18 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(calcAC_MLE, expectedAlleleCount, "MLE AC not equal to expected AC for allele " + allele);
|
||||
}
|
||||
|
||||
// not true in general
|
||||
// final int AC_MLE = (int)MathUtils.sum(result.getAlleleCountsOfMLE());
|
||||
// TODO
|
||||
// TODO -- enable when we understand the contract between AC_MAP and pNonRef
|
||||
// TODO
|
||||
// final int AC_MAP = (int)MathUtils.sum(result.getAlleleCountsOfMAP());
|
||||
// Assert.assertTrue(AC_MAP <= AC_MLE, "Requires sum MAP AC <= sum MLE AC for but saw " + AC_MAP + " vs " + AC_MLE);
|
||||
// if ( AC_MAP > 0 ) {
|
||||
// Assert.assertTrue(result.getNormalizedPosteriorOfAFzero() < 0.50, "MAP AC " + AC_MAP + " > 0 but we had posterior AF = 0 > 0.5 of " + result.getNormalizedPosteriorOfAFzero());
|
||||
// } else {
|
||||
// Assert.assertTrue(result.getNormalizedPosteriorOfAFzero() > 0.50, "MAP AC " + AC_MAP + " == 0 but we had posterior AF = 0 < 0.5 of " + result.getNormalizedPosteriorOfAFzero());
|
||||
// }
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testLargeGLs() {
|
||||
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
|
||||
GetGLsTest cfg = new GetGLsTest(new DiploidExactAFCalculation(1, 1), 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat");
|
||||
|
|
@ -264,7 +259,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testMismatchedGLs() {
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), 2000, 0, 2000, 2000, 2000, 2000);
|
||||
final Genotype AC = makePL(Arrays.asList(A,G), 100, 100, 100, 0, 100, 100);
|
||||
|
|
@ -275,4 +270,81 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "Models")
|
||||
public Object[][] makeModels() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new DiploidExactAFCalculation(1, 4)});
|
||||
tests.add(new Object[]{new GeneralPloidyExactAFCalculation(1, 4, 2)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "Models")
|
||||
public void testBiallelicPriors(final ExactAFCalculation model) {
|
||||
final int REF_PL = 10;
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000);
|
||||
|
||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL; log10NonRefPrior += 1 ) {
|
||||
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
||||
final double[] priors = MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2});
|
||||
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
final int actualAC = result.getAlleleCountsOfMAP()[0];
|
||||
|
||||
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
||||
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1];
|
||||
final boolean expectNonRef = pRefWithPrior <= pHetWithPrior;
|
||||
|
||||
if ( expectNonRef )
|
||||
Assert.assertTrue(result.getNormalizedPosteriorOfAFGTZero() > 0.5);
|
||||
else
|
||||
Assert.assertTrue(result.getNormalizedPosteriorOfAFGTZero() < 0.5);
|
||||
|
||||
final int expectedAC = expectNonRef ? 1 : 0;
|
||||
Assert.assertEquals(actualAC, expectedAC,
|
||||
"actual AC with priors " + log10NonRefPrior + " not expected "
|
||||
+ expectedAC + " priors " + Utils.join(",", priors));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = false, dataProvider = "Models")
|
||||
public void testTriallelicPriors(final ExactAFCalculation model) {
|
||||
// TODO
|
||||
// TODO
|
||||
// TODO THIS SEEMS TO ID A BUG IN THE EXACT MODEL FOR MULTI-ALLELICS, AS THE
|
||||
// TODO SECOND ALLELE ISN'T HAVING A SQUARED PRIOR. TALK TO ERIC AND CONFIRM
|
||||
// TODO
|
||||
// TODO
|
||||
final int REF_PL_AB = 10, REF_PL_AC = 20; // first AC goes, then AB
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL_AB, 0, 10000, 10000, 10000);
|
||||
final Genotype AC = makePL(Arrays.asList(A, G), REF_PL_AC, 10000, 10000, 0, 10000, 10000);
|
||||
|
||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL_AC; log10NonRefPrior += 1 ) {
|
||||
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
||||
final double nonRefPrior = (1-refPrior) / 2;
|
||||
final double[] priors = MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior});
|
||||
GetGLsTest cfg = new GetGLsTest(model, 2, Arrays.asList(AB, AC), priors, "pNonRef" + log10NonRefPrior);
|
||||
final AlleleFrequencyCalculationResult result = cfg.execute();
|
||||
final int actualAC_AB = result.getAlleleCountsOfMAP()[0];
|
||||
|
||||
final double pRefABWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
||||
final double pHetABWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1];
|
||||
final int expectedAC_AB = pRefABWithPrior <= pHetABWithPrior ? 1 : 0;
|
||||
Assert.assertEquals(actualAC_AB, expectedAC_AB,
|
||||
"actual AC with priors " + log10NonRefPrior + " not expected "
|
||||
+ expectedAC_AB + " priors " + Utils.join(",", priors));
|
||||
|
||||
final double nonRefPriorSecondAllele = Math.pow(nonRefPrior, 2);
|
||||
final double refPriorSecondAllele = 1 - nonRefPriorSecondAllele;
|
||||
final int actualAC_AC = result.getAlleleCountsOfMAP()[1];
|
||||
final double pRefACWithPrior = AB.getLikelihoods().getAsVector()[0] + Math.log10(refPriorSecondAllele);
|
||||
final double pHetACWithPrior = AC.getLikelihoods().getAsVector()[3] + Math.log10(nonRefPriorSecondAllele);
|
||||
final int expectedAC_AC = pRefACWithPrior <= pHetACWithPrior ? 1 : 0;
|
||||
Assert.assertEquals(actualAC_AC, expectedAC_AC,
|
||||
"actual AC with priors " + log10NonRefPrior + " not expected "
|
||||
+ expectedAC_AC + " priors " + Utils.join(",", priors));
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue