Intermediate commit II on simplifying AFCalcResult
-- All of the code now uses the AFCalc object, not the not package protected AFCalcResultTracker. Nearly all unit tests pass (expect for a contract failing one that will be dealt with in subsequent commit), due to -Infinity values from normalizeLog10. -- Changed the way that UnifiedGenotyper decides if the best model is non-ref. Previously looked at the MAP AC, but the MAP AC values are no longer provided by AFCalcResult. This is on purpose, because the MAP isn't a meaningful quantity for the exact model (i.e., everything is going to go to MLE AC in some upcoming commit). If you want to understand why come talk to me. Now uses the isPolymorphic function and the EMIT confidence, so that if pNonRef > EMIT then the site is poly, otherwise it's mono.
This commit is contained in:
parent
06687bfaf6
commit
4f1b1c4228
|
|
@ -61,7 +61,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors);
|
||||
final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
int otherAC = 0;
|
||||
|
|
@ -127,7 +127,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
vcb.genotypes(genotypes);
|
||||
|
||||
timer.start();
|
||||
final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vcb.make(), priors);
|
||||
final AFCalcResult resultTracker = calc.getLog10PNonRef(vcb.make(), priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
|
|
@ -157,7 +157,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AFCalcResultTracker resultTracker = calc.getLog10PNonRef(vc, priors);
|
||||
final AFCalcResult resultTracker = calc.getLog10PNonRef(vc, priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
|
|
@ -219,9 +219,9 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100);
|
||||
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors());
|
||||
final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors());
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
logger.info("result " + resultTracker.getNormalizedPosteriorOfAFGTZero());
|
||||
logger.info("result " + resultTracker.getLog10PosteriorOfAFGT0());
|
||||
logger.info("runtime " + runtime);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
static int sampleNameCounter = 0;
|
||||
static Genotype AA1, AB1, BB1, NON_INFORMATIVE1;
|
||||
static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2;
|
||||
final double[] FLAT_3SAMPLE_PRIORS = new double[2*3+1]; // flat priors
|
||||
final double[] FLAT_3SAMPLE_PRIORS = MathUtils.normalizeFromLog10(new double[2*3+1], true); // flat priors
|
||||
final private static boolean INCLUDE_BIALLELIC = true;
|
||||
final private static boolean INCLUDE_TRIALLELIC = true;
|
||||
final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug
|
||||
|
|
@ -76,11 +76,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
public AFCalcResultTracker execute() {
|
||||
public AFCalcResult execute() {
|
||||
return getCalc().getLog10PNonRef(getVC(), getPriors());
|
||||
}
|
||||
|
||||
public AFCalcResultTracker executeRef() {
|
||||
public AFCalcResult executeRef() {
|
||||
final ExactAFCalc ref = new ReferenceDiploidExactAFCalc(getCalc().nSamples, getCalc().getMaxAltAlleles());
|
||||
return ref.getLog10PNonRef(getVC(), getPriors());
|
||||
}
|
||||
|
|
@ -185,7 +185,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2);
|
||||
final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4);
|
||||
|
||||
final double[] priors = new double[2*nSamples+1]; // flat priors
|
||||
final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors
|
||||
|
||||
for ( ExactAFCalc model : Arrays.asList(diploidCalc, optDiploidCalc, generalCalc, indCalc) ) {
|
||||
final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat");
|
||||
|
|
@ -209,28 +209,18 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, dataProvider = "GLsWithNonInformative", dependsOnMethods = "testGLs")
|
||||
public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) {
|
||||
final AFCalcResultTracker expected = onlyInformative.execute();
|
||||
final AFCalcResultTracker actual = withNonInformative.execute();
|
||||
final AFCalcResult expected = onlyInformative.execute();
|
||||
final AFCalcResult actual = withNonInformative.execute();
|
||||
|
||||
testResultSimple(withNonInformative);
|
||||
|
||||
Assert.assertEquals(actual.getLog10PosteriorOfAFzero(), expected.getLog10LikelihoodOfAFzero());
|
||||
Assert.assertEquals(actual.getLog10LikelihoodOfAFzero(), expected.getLog10LikelihoodOfAFzero());
|
||||
Assert.assertEquals(actual.getLog10PosteriorsMatrixSumWithoutAFzero(), expected.getLog10PosteriorsMatrixSumWithoutAFzero());
|
||||
Assert.assertEquals(actual.getAlleleCountsOfMAP(), expected.getAlleleCountsOfMAP());
|
||||
Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE());
|
||||
Assert.assertEquals(actual.getLog10MAP(), expected.getLog10MAP());
|
||||
Assert.assertEquals(actual.getLog10MLE(), expected.getLog10MLE());
|
||||
Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping());
|
||||
compareAFCalcResults(actual, expected);
|
||||
}
|
||||
|
||||
private void testResultSimple(final GetGLsTest cfg) {
|
||||
final AFCalcResultTracker refResultTracker = cfg.executeRef();
|
||||
final AFCalcResultTracker resultTracker = cfg.execute();
|
||||
final AFCalcResult refResultTracker = cfg.executeRef();
|
||||
final AFCalcResult resultTracker = cfg.execute();
|
||||
|
||||
compareToRefResult(refResultTracker, resultTracker);
|
||||
|
||||
Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero() + resultTracker.getNormalizedPosteriorOfAFGTZero(), 1.0, 1e-4);
|
||||
compareAFCalcResults(resultTracker, refResultTracker);
|
||||
|
||||
// final int minNumberOfEvaluations = cfg.getVC().getCalledChrCount();
|
||||
// Assert.assertTrue(result.getnEvaluations() >= minNumberOfEvaluations,
|
||||
|
|
@ -257,20 +247,17 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
// }
|
||||
}
|
||||
|
||||
private void compareToRefResult(final AFCalcResultTracker refResultTracker,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final double TOLERANCE = 1;
|
||||
// MAP may not be equal
|
||||
// Assert.assertEquals(result.getAlleleCountsOfMAP(), refResult.getAlleleCountsOfMAP());
|
||||
Assert.assertEquals(resultTracker.getAlleleCountsOfMLE(), refResultTracker.getAlleleCountsOfMLE());
|
||||
Assert.assertEquals(resultTracker.getAllelesUsedInGenotyping(), refResultTracker.getAllelesUsedInGenotyping());
|
||||
Assert.assertEquals(resultTracker.getLog10LikelihoodOfAFzero(), refResultTracker.getLog10LikelihoodOfAFzero(), TOLERANCE);
|
||||
// Assert.assertEquals(result.getLog10MAP(), refResult.getLog10MAP(), TOLERANCE);
|
||||
// Assert.assertEquals(result.getLog10MLE(), refResult.getLog10MLE(), TOLERANCE);
|
||||
// Assert.assertEquals(result.getLog10PosteriorOfAFzero(), refResult.getLog10PosteriorOfAFzero(), TOLERANCE);
|
||||
// Assert.assertEquals(result.getLog10PosteriorsMatrixSumWithoutAFzero(), refResult.getLog10PosteriorsMatrixSumWithoutAFzero(), TOLERANCE);
|
||||
Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), refResultTracker.getNormalizedPosteriorOfAFGTZero(), 0.5);
|
||||
Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFzero(), refResultTracker.getNormalizedPosteriorOfAFzero(), 0.5);
|
||||
private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected) {
|
||||
final double TOLERANCE = 1; // TODO -- tighten up tolerances
|
||||
|
||||
Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getLog10PriorOfAFGT0(), expected.getLog10PriorOfAFGT0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getLog10LikelihoodOfAFEq0(), expected.getLog10LikelihoodOfAFEq0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getLog10LikelihoodOfAFGT0(), expected.getLog10LikelihoodOfAFGT0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE);
|
||||
Assert.assertEquals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE());
|
||||
Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping());
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "Models")
|
||||
|
|
@ -278,9 +265,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0);
|
||||
GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat");
|
||||
|
||||
final AFCalcResultTracker resultTracker = cfg.execute();
|
||||
final AFCalcResult resultTracker = cfg.execute();
|
||||
|
||||
int calculatedAlleleCount = resultTracker.getAlleleCountsOfMAP()[0];
|
||||
int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0];
|
||||
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||
}
|
||||
|
||||
|
|
@ -290,10 +277,10 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100);
|
||||
GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat");
|
||||
|
||||
final AFCalcResultTracker resultTracker = cfg.execute();
|
||||
final AFCalcResult resultTracker = cfg.execute();
|
||||
|
||||
Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[0], 1);
|
||||
Assert.assertEquals(resultTracker.getAlleleCountsOfMAP()[1], 1);
|
||||
Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1);
|
||||
Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -328,7 +315,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < PLs.length; i++ ) PLs[i] = g.getPL()[i] * ((int)Math.log10(scaleFactor)+1);
|
||||
final Genotype scaledG = new GenotypeBuilder(g).PL(PLs).make();
|
||||
final double scaledPNonRef = pNonRef < 0.5 ? pNonRef / scaleFactor : 1 - ((1-pNonRef) / scaleFactor);
|
||||
return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance / scaleFactor, true);
|
||||
return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance, true);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
|
|
@ -352,22 +339,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
final List<ExactAFCalculationTestBuilder.ModelType> constrainedModel = Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact);
|
||||
|
||||
final double TOLERANCE = 0.5;
|
||||
|
||||
final List<PNonRefData> initialPNonRefData = Arrays.asList(
|
||||
// bi-allelic sites
|
||||
new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, 1e-1, true),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, 1e-1, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, 1e-1, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, 1e-1, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, 1e-1, true),
|
||||
new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, 1e-1, true),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel),
|
||||
new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true),
|
||||
new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true),
|
||||
|
||||
// tri-allelic sites -- cannot scale because of the naivety of our scaling estimator
|
||||
new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, 2e-1, false), // more tolerance because constrained model is a bit inaccurate
|
||||
new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, 1e-1, false),
|
||||
new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, 1e-1, false),
|
||||
new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, 1e-1, false),
|
||||
new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, 1e-1, false),
|
||||
new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, 1e-1, false)
|
||||
new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, TOLERANCE * 2, false), // more tolerance because constrained model is a bit inaccurate
|
||||
new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, TOLERANCE, false),
|
||||
new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, TOLERANCE, false),
|
||||
new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, TOLERANCE, false),
|
||||
new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, TOLERANCE, false),
|
||||
new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false)
|
||||
);
|
||||
|
||||
for ( ExactAFCalculationTestBuilder.ModelType modelType : ExactAFCalculationTestBuilder.ModelType.values() ) {
|
||||
|
|
@ -400,9 +389,9 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot);
|
||||
vcb.genotypes(genotypes);
|
||||
|
||||
final AFCalcResultTracker resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors());
|
||||
final AFCalcResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), testBuilder.makePriors());
|
||||
|
||||
Assert.assertEquals(resultTracker.getNormalizedPosteriorOfAFGTZero(), expectedPNonRef, tolerance,
|
||||
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance,
|
||||
"Actual pNonRef not within tolerance " + tolerance + " of expected");
|
||||
}
|
||||
|
||||
|
|
@ -428,26 +417,24 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final int REF_PL = 10;
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000);
|
||||
|
||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL; log10NonRefPrior += 1 ) {
|
||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
|
||||
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
||||
final double[] priors = MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2});
|
||||
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
||||
final AFCalcResultTracker resultTracker = cfg.execute();
|
||||
final int actualAC = resultTracker.getAlleleCountsOfMAP()[0];
|
||||
final AFCalcResult resultTracker = cfg.execute();
|
||||
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
|
||||
|
||||
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
||||
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1];
|
||||
final boolean expectNonRef = pRefWithPrior <= pHetWithPrior;
|
||||
final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
|
||||
|
||||
if ( expectNonRef )
|
||||
Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() > 0.5);
|
||||
else
|
||||
Assert.assertTrue(resultTracker.getNormalizedPosteriorOfAFGTZero() < 0.5);
|
||||
if ( nonRefPost < 0.1 )
|
||||
Assert.assertTrue(resultTracker.isPolymorphic(-1));
|
||||
|
||||
final int expectedAC = expectNonRef ? 1 : 0;
|
||||
Assert.assertEquals(actualAC, expectedAC,
|
||||
final int expectedMLEAC = 1; // the MLE is independent of the prior
|
||||
Assert.assertEquals(actualAC, expectedMLEAC,
|
||||
"actual AC with priors " + log10NonRefPrior + " not expected "
|
||||
+ expectedAC + " priors " + Utils.join(",", priors));
|
||||
+ expectedMLEAC + " priors " + Utils.join(",", priors));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -468,8 +455,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final double nonRefPrior = (1-refPrior) / 2;
|
||||
final double[] priors = MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior});
|
||||
GetGLsTest cfg = new GetGLsTest(model, 2, Arrays.asList(AB, AC), priors, "pNonRef" + log10NonRefPrior);
|
||||
final AFCalcResultTracker resultTracker = cfg.execute();
|
||||
final int actualAC_AB = resultTracker.getAlleleCountsOfMAP()[0];
|
||||
final AFCalcResult resultTracker = cfg.execute();
|
||||
final int actualAC_AB = resultTracker.getAlleleCountsOfMLE()[0];
|
||||
|
||||
final double pRefABWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
||||
final double pHetABWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1];
|
||||
|
|
@ -480,7 +467,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
|
||||
final double nonRefPriorSecondAllele = Math.pow(nonRefPrior, 2);
|
||||
final double refPriorSecondAllele = 1 - nonRefPriorSecondAllele;
|
||||
final int actualAC_AC = resultTracker.getAlleleCountsOfMAP()[1];
|
||||
final int actualAC_AC = resultTracker.getAlleleCountsOfMLE()[1];
|
||||
final double pRefACWithPrior = AB.getLikelihoods().getAsVector()[0] + Math.log10(refPriorSecondAllele);
|
||||
final double pHetACWithPrior = AC.getLikelihoods().getAsVector()[3] + Math.log10(nonRefPriorSecondAllele);
|
||||
final int expectedAC_AC = pRefACWithPrior <= pHetACWithPrior ? 1 : 0;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
@ -363,7 +363,7 @@ public class UnifiedGenotyperEngine {
|
|||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
}
|
||||
|
||||
AFCalcResultTracker AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
|
||||
AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
|
||||
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
boolean bestGuessIsRef = true;
|
||||
|
|
@ -379,10 +379,14 @@ public class UnifiedGenotyperEngine {
|
|||
if ( indexOfAllele == -1 )
|
||||
continue;
|
||||
|
||||
final int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
|
||||
// we are non-ref if the probability of being non-ref > the emit confidence.
|
||||
// the emit confidence is phred-scaled, say 30 => 10^-3.
|
||||
// the posterior AF > 0 is log10: -5 => 10^-5
|
||||
// we are non-ref if 10^-5 < 10^-3 => -5 < -3
|
||||
final boolean isNonRef = AFresult.isPolymorphic(UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||
if ( indexOfBestAC != 0 ) {
|
||||
if ( ! isNonRef ) {
|
||||
myAlleles.add(alternateAllele);
|
||||
alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]);
|
||||
bestGuessIsRef = false;
|
||||
|
|
@ -394,22 +398,10 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// calculate p(f>0):
|
||||
final double PoFEq0 = AFresult.getNormalizedPosteriorOfAFzero();
|
||||
final double PoFGT0 = AFresult.getNormalizedPosteriorOfAFGTZero();
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFEq0);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero();
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PoFGT0);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
|
||||
final double phredScaledConfidence = ! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
? -10 * AFresult.getLog10PosteriorOfAFEq0()
|
||||
: -10 * AFresult.getLog10PosteriorOfAFGT0();
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
|
||||
|
|
@ -462,7 +454,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||
|
|
@ -471,16 +463,16 @@ public class UnifiedGenotyperEngine {
|
|||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
|
||||
double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
|
||||
double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
* @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i)
|
||||
* @return result (for programming convenience)
|
||||
*/
|
||||
public AFCalcResultTracker getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
|
||||
public AFCalcResult getLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||
if ( resultTracker == null ) throw new IllegalArgumentException("Results object cannot be null");
|
||||
|
|
@ -123,7 +123,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero());
|
||||
|
||||
resultTracker.setAllelesUsedInGenotyping(vcWorking.getAlleles());
|
||||
return resultTracker;
|
||||
return resultTracker.toAFCalcResult(log10AlleleFrequencyPriors);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -155,9 +155,9 @@ public abstract class AFCalc implements Cloneable {
|
|||
* @param resultTracker (pre-allocated) object to store results
|
||||
*/
|
||||
// TODO -- add consistent requires among args
|
||||
public abstract void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker);
|
||||
protected abstract void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker);
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
|
|
|
|||
|
|
@ -190,6 +190,22 @@ public class AFCalcResult {
|
|||
return log10PriorsOfAC[AF1p];
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we sufficiently confidence in being non-ref that the site is considered polymorphic?
|
||||
*
|
||||
* We are non-ref if the probability of being non-ref > the emit confidence (often an argument).
|
||||
* Suppose posterior AF > 0 is log10: -5 => 10^-5
|
||||
* And that log10minPNonRef is -3.
|
||||
* We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3
|
||||
*
|
||||
* @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic
|
||||
*
|
||||
* @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0
|
||||
*/
|
||||
public boolean isPolymorphic(final double log10minPNonRef) {
|
||||
return getLog10PosteriorOfAFGT0() < log10minPNonRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the log10 normalized posteriors given the log10 likelihoods and priors
|
||||
*
|
||||
|
|
@ -221,11 +237,11 @@ public class AFCalcResult {
|
|||
if ( vector.length != expectedSize ) return false;
|
||||
|
||||
for ( final double pr : vector ) {
|
||||
if ( pr > 0 ) return false; // log10 prob. vector should be < 0
|
||||
if ( pr > 0.0 ) return false; // log10 prob. vector should be < 0
|
||||
if ( Double.isInfinite(pr) || Double.isNaN(pr) ) return false;
|
||||
}
|
||||
|
||||
if ( shouldSumToOne || MathUtils.compareDoubles(MathUtils.sumLog10(vector), 0.0, 1e-2) != 0 )
|
||||
if ( shouldSumToOne && MathUtils.compareDoubles(MathUtils.sumLog10(vector), 1.0, 1e-2) != 0 )
|
||||
return false;
|
||||
|
||||
return true; // everything is good
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ import java.util.List;
|
|||
*
|
||||
* TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF?
|
||||
*/
|
||||
public class AFCalcResultTracker {
|
||||
class AFCalcResultTracker {
|
||||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
protected double log10MLE;
|
||||
protected double log10MAP;
|
||||
|
|
@ -157,6 +157,10 @@ public class AFCalcResultTracker {
|
|||
return log10LikelihoodOfAFzero;
|
||||
}
|
||||
|
||||
public double getLog10LikelihoodOfAFNotZero() {
|
||||
return getLog10PosteriorsMatrixSumWithoutAFzero(); // TODO -- INCORRECT TEMPORARY CALCULATION
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- eric what is this supposed to return? my unit tests don't do what I think they should
|
||||
*
|
||||
|
|
@ -215,6 +219,13 @@ public class AFCalcResultTracker {
|
|||
return AClimits;
|
||||
}
|
||||
|
||||
protected AFCalcResult toAFCalcResult(final double[] log10PriorsByAC) {
|
||||
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size());
|
||||
final double[] log10Likelihoods = new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()};
|
||||
final double[] log10Priors = new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)};
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Protected mutational methods only for use within the calculation models themselves
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -70,7 +71,7 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
@Requires({
|
||||
"g != null",
|
||||
"maxACs != null",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
"goodMaxACs(maxACs)"})
|
||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||
|
||||
|
|
@ -101,9 +102,13 @@ public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
@Requires({
|
||||
"alleleI >= 0",
|
||||
"(alleleI - 1) < maxACs.length",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
"goodMaxACs(maxACs)"})
|
||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
||||
if ( alleleI > 0 )
|
||||
maxACs[alleleI-1]++;
|
||||
}
|
||||
|
||||
private static boolean goodMaxACs(final int[] maxACs) {
|
||||
return MathUtils.sum(maxACs) >= 0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,9 +45,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker);
|
||||
|
||||
@Override
|
||||
public void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
protected void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final int numAlternateAlleles = vc.getNAlleles() - 1;
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes());
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
|
|
|
|||
|
|
@ -60,19 +60,20 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
public void computeLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final List<AFCalcResultTracker> independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors);
|
||||
combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker);
|
||||
refModel.computeLog10PNonRef(vc, log10AlleleFrequencyPriors, resultTracker);
|
||||
// final List<AFCalcResult> independentResultTrackers = computeLog10PNonRefForEachAllele(vc, log10AlleleFrequencyPriors);
|
||||
// combineIndependentPNonRefs(vc, independentResultTrackers, log10AlleleFrequencyPriors, resultTracker);
|
||||
}
|
||||
|
||||
protected List<AFCalcResultTracker> computeLog10PNonRefForEachAllele(final VariantContext vc,
|
||||
protected List<AFCalcResult> computeLog10PNonRefForEachAllele(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final int nAltAlleles = vc.getNAlleles() - 1;
|
||||
final List<AFCalcResultTracker> resultTrackers = new ArrayList<AFCalcResultTracker>(nAltAlleles);
|
||||
final List<AFCalcResult> resultTrackers = new ArrayList<AFCalcResult>(nAltAlleles);
|
||||
|
||||
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
||||
final List<Allele> biallelic = Arrays.asList(vc.getReference(), vc.getAlternateAllele(altI));
|
||||
final VariantContext subvc = biallelicCombinedGLs(vc, biallelic, altI + 1);
|
||||
final AFCalcResultTracker resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
||||
final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
||||
resultTrackers.add(resultTracker);
|
||||
}
|
||||
|
||||
|
|
@ -141,34 +142,34 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
* @param resultTracker the destination for the combined result
|
||||
*/
|
||||
protected void combineIndependentPNonRefs(final VariantContext vc,
|
||||
final List<AFCalcResultTracker> independentPNonRefs,
|
||||
final List<AFCalcResult> independentPNonRefs,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final AFCalcResultTracker resultTracker) {
|
||||
final int nChrom = vc.getNSamples() * 2;
|
||||
|
||||
resultTracker.reset();
|
||||
|
||||
// both the likelihood and the posterior of AF=0 are the same for all alleles
|
||||
// TODO -- check and ensure this is true
|
||||
resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero());
|
||||
resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero());
|
||||
resultTracker.log10PosteriorMatrixSum = 0.0;
|
||||
|
||||
int altI = 0;
|
||||
for ( final AFCalcResultTracker independentPNonRef : independentPNonRefs ) {
|
||||
resultTracker.log10MLE += independentPNonRef.getLog10MLE();
|
||||
|
||||
// TODO -- technically double counting some posterior mass
|
||||
resultTracker.log10MAP += independentPNonRef.getLog10MAP();
|
||||
|
||||
// TODO -- technically double counting some posterior mass
|
||||
resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
|
||||
resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0];
|
||||
resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0];
|
||||
|
||||
resultTracker.nEvaluations += independentPNonRef.nEvaluations;
|
||||
altI++;
|
||||
}
|
||||
// final int nChrom = vc.getNSamples() * 2;
|
||||
//
|
||||
// resultTracker.reset();
|
||||
//
|
||||
// // both the likelihood and the posterior of AF=0 are the same for all alleles
|
||||
// // TODO -- check and ensure this is true
|
||||
// resultTracker.setLog10LikelihoodOfAFzero(independentPNonRefs.get(0).getLog10LikelihoodOfAFzero());
|
||||
// resultTracker.setLog10PosteriorOfAFzero(independentPNonRefs.get(0).getLog10PosteriorOfAFzero());
|
||||
// resultTracker.log10PosteriorMatrixSum = 0.0;
|
||||
//
|
||||
// int altI = 0;
|
||||
// for ( final AFCalcResult independentPNonRef : independentPNonRefs ) {
|
||||
// resultTracker.log10MLE += independentPNonRef.getLog10MLE();
|
||||
//
|
||||
// // TODO -- technically double counting some posterior mass
|
||||
// resultTracker.log10MAP += independentPNonRef.getLog10MAP();
|
||||
//
|
||||
// // TODO -- technically double counting some posterior mass
|
||||
// resultTracker.log10PosteriorMatrixSum += independentPNonRef.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//
|
||||
// resultTracker.getAlleleCountsOfMAP()[altI] = independentPNonRef.getAlleleCountsOfMAP()[0];
|
||||
// resultTracker.getAlleleCountsOfMLE()[altI] = independentPNonRef.getAlleleCountsOfMLE()[0];
|
||||
//
|
||||
// resultTracker.nEvaluations += independentPNonRef.nEvaluations;
|
||||
// altI++;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResultTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalc;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalc;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -54,10 +54,9 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
flatPriors = new double[1+2*samples.size()];
|
||||
AFCalculator = new ReferenceDiploidExactAFCalc(samples.size(), 4);
|
||||
}
|
||||
AFCalcResultTracker resultTracker = new AFCalcResultTracker(vc.getAlternateAlleles().size());
|
||||
AFCalculator.computeLog10PNonRef(subContext, flatPriors, resultTracker);
|
||||
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
|
||||
// do we want to let this qual go up or down?
|
||||
if ( resultTracker.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
||||
if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue