From cb857d1640e232c0bf558cc2d686e50c8f452417 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 11 Oct 2012 11:05:01 -0400 Subject: [PATCH] AFCalcs must be made by factory method now -- AFCalcFactory is the only way to make AFCalcs now. There's a nice ordered enum there describing the models and their ploidy and max alt allele restrictions. The factory makes it easy to create them, and to find models that work for you given your ploidy and max alt alleles. -- AFCalc no longer has UAC constructor -- only AFCalcFactory does. Code cleanup throughout -- Enabling more unit tests, all of which almost pass now (except for IndependentAllelesDiploidExactAFCalc which will be fixed next) -- It's now possible to run the UG / HC with any of the exact models currently in the system. -- Code cleanup throughout the system, reorganizing the unit tests in particular --- .../ExactAFCalculationPerformanceTest.java | 18 +- .../afcalc/ExactAFCalculationTestBuilder.java | 21 +- .../afcalc/GeneralPloidyExactAFCalc.java | 14 +- ...ConstrainedAFCalculationModelUnitTest.java | 124 ++++++++++ .../ExactAFCalculationModelUnitTest.java | 200 +++------------- ...dentAllelesDiploidExactAFCalcUnitTest.java | 4 +- .../genotyper/UnifiedArgumentCollection.java | 4 +- .../genotyper/UnifiedGenotyperEngine.java | 33 +-- .../gatk/walkers/genotyper/afcalc/AFCalc.java | 54 ++--- .../genotyper/afcalc/AFCalcFactory.java | 225 ++++++++++++++++++ .../genotyper/afcalc/AFCalcResultTracker.java | 9 +- .../afcalc/ConstrainedDiploidExactAFCalc.java | 13 +- .../genotyper/afcalc/DiploidExactAFCalc.java | 14 +- .../walkers/genotyper/afcalc/ExactAFCalc.java | 12 +- .../IndependentAllelesDiploidExactAFCalc.java | 18 +- .../afcalc/OriginalDiploidExactAFCalc.java | 11 +- .../afcalc/ReferenceDiploidExactAFCalc.java | 12 +- .../GLBasedSampleSelector.java | 8 +- .../broadinstitute/sting/utils/MathUtils.java | 12 +- 19 files changed, 457 insertions(+), 349 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java index 5f563d489..16aa77284 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationPerformanceTest.java @@ -54,7 +54,7 @@ public class ExactAFCalculationPerformanceTest { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final ExactAFCalc calc = testBuilder.makeModel(); + final AFCalc calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) { @@ -113,7 +113,7 @@ public class ExactAFCalculationPerformanceTest { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final ExactAFCalc calc = testBuilder.makeModel(); + final AFCalc calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final int[] ac = new int[testBuilder.numAltAlleles]; @@ -147,7 +147,7 @@ public class ExactAFCalculationPerformanceTest { final SimpleTimer timer = new SimpleTimer(); for ( final int nonTypePL : Arrays.asList(100) ) { - final ExactAFCalc calc = testBuilder.makeModel(); + final AFCalc calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final int[] ac = new int[testBuilder.numAltAlleles]; @@ -169,10 +169,10 @@ public class ExactAFCalculationPerformanceTest { } private static class ModelParams { - final ExactAFCalculationTestBuilder.ModelType modelType; + final AFCalcFactory.Calculation modelType; final int maxBiNSamples, maxTriNSamples; - private ModelParams(ExactAFCalculationTestBuilder.ModelType modelType, int maxBiNSamples, int maxTriNSamples) { + private ModelParams(AFCalcFactory.Calculation modelType, int maxBiNSamples, int maxTriNSamples) { this.modelType = modelType; this.maxBiNSamples = maxBiNSamples; this.maxTriNSamples = maxTriNSamples; @@ -213,7 +213,7 @@ public class ExactAFCalculationPerformanceTest { final int ac = Integer.valueOf(args[2]); final ExactAFCalculationTestBuilder testBuilder = new ExactAFCalculationTestBuilder(nSamples, 1, - ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact, + AFCalcFactory.Calculation.EXACT_INDEPENDENT, ExactAFCalculationTestBuilder.PriorType.human); final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100); @@ -232,10 +232,10 @@ public class ExactAFCalculationPerformanceTest { final PrintStream out = new PrintStream(new FileOutputStream(args[1])); final List modelParams = Arrays.asList( - new ModelParams(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, 10000, 10), + new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10), // new ModelParams(ExactAFCalculationTestBuilder.ModelType.GeneralExact, 100, 10), - new ModelParams(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact, 10000, 100), - new ModelParams(ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact, 10000, 1000)); + new ModelParams(AFCalcFactory.Calculation.EXACT_CONSTRAINED, 10000, 100), + new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000)); final boolean ONLY_HUMAN_PRIORS = false; final List priorTypes = ONLY_HUMAN_PRIORS diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationTestBuilder.java index ca39f8bf8..951f8d3ed 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationTestBuilder.java @@ -21,24 +21,17 @@ public class ExactAFCalculationTestBuilder { final int nSamples; final int numAltAlleles; - final ModelType modelType; + final AFCalcFactory.Calculation modelType; final PriorType priorType; public ExactAFCalculationTestBuilder(final int nSamples, final int numAltAlleles, - final ModelType modelType, final PriorType priorType) { + final AFCalcFactory.Calculation modelType, final PriorType priorType) { this.nSamples = nSamples; this.numAltAlleles = numAltAlleles; this.modelType = modelType; this.priorType = priorType; } - public enum ModelType { - ReferenceDiploidExact, - ConstrainedDiploidExact, - IndependentDiploidExact, - GeneralExact - } - public enum PriorType { flat, human @@ -48,14 +41,8 @@ public class ExactAFCalculationTestBuilder { return nSamples; } - public ExactAFCalc makeModel() { - switch (modelType) { - case ReferenceDiploidExact: return new ReferenceDiploidExactAFCalc(nSamples, 4); - case ConstrainedDiploidExact: return new ConstrainedDiploidExactAFCalc(nSamples, 4); - case GeneralExact: return new GeneralPloidyExactAFCalc(nSamples, 4, 2); - case IndependentDiploidExact: return new IndependentAllelesDiploidExactAFCalc(nSamples, 4); - default: throw new RuntimeException("Unexpected type " + modelType); - } + public AFCalc makeModel() { + return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2); } public double[] makePriors() { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index f64fab33b..bb2eacc82 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -25,16 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; import org.broadinstitute.sting.gatk.walkers.genotyper.ProbabilityVector; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; -import java.io.PrintStream; import java.util.*; public class GeneralPloidyExactAFCalc extends ExactAFCalc { @@ -44,19 +41,14 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static boolean VERBOSE = false; - protected GeneralPloidyExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); - ploidy = UAC.samplePloidy; - } - - public GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { - super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null); + protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); this.ploidy = ploidy; } @Override protected VariantContext reduceScope(VariantContext vc) { - final int maxAltAlleles = vc.getType().equals(VariantContext.Type.INDEL) ? MAX_ALTERNATE_ALLELES_FOR_INDELS : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; + final int maxAltAlleles = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype; // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > maxAltAlleles) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java new file mode 100644 index 000000000..674f6f642 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java @@ -0,0 +1,124 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class ConstrainedAFCalculationModelUnitTest extends BaseTest { + static Allele A = Allele.create("A", true); + static Allele C = Allele.create("C"); + static Allele G = Allele.create("G"); + + protected static Genotype makePL(final List expectedGT, int ... pls) { + return ExactAFCalculationModelUnitTest.makePL(expectedGT, pls); + } + + @DataProvider(name = "MaxACsToVisit") + public Object[][] makeMaxACsToVisit() { + List tests = new ArrayList(); + + final int nSamples = 10; + + for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) { + final int nChrom = (nSamples - nNonInformative) * 2; + for ( int i = 0; i < nChrom; i++ ) { + // bi-allelic + tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED}); + + // tri-allelic + for ( int j = 0; j < (nChrom - i); j++) + tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "MaxACsToVisit") + public void testMaxACsToVisit(final int nSamples, final List requestedACs, final int nNonInformative, final AFCalcFactory.Calculation modelType) { + final int nAlts = requestedACs.size(); + final ExactAFCalculationTestBuilder testBuilder + = new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType, + ExactAFCalculationTestBuilder.PriorType.human); + + final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100); + final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); + + testExpectedACs(vc, maxACsToVisit); + } + + private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) { + // this is necessary because cannot ensure that the tester gives us back the + // requested ACs due to rounding errors + final List ACs = new ArrayList(); + for ( final Allele a : vc.getAlternateAlleles() ) + ACs.add(vc.getCalledChrCount(a)); + + for ( int i = 0; i < maxACsToVisit.length; i++ ) { + Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i); + } + } + + @DataProvider(name = "MaxACsGenotypes") + public Object[][] makeMaxACsForGenotype() { + List tests = new ArrayList(); + + final List AA = Arrays.asList(A, A); + final List AC = Arrays.asList(A, C); + final List CC = Arrays.asList(C, C); + final List AG = Arrays.asList(A, G); + final List GG = Arrays.asList(G, G); + final List CG = Arrays.asList(C, G); + + final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); + final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); + + tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)}); + tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)}); + tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)}); + + // make sure non-informative => 0 + tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)}); + tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)}); + + // multi-allelics + tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)}); + tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)}); + + // deal with non-informatives third alleles + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)}); + tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)}); + tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)}); + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "MaxACsGenotypes") + private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) { + final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make(); + + final ExactAFCalculationTestBuilder testBuilder + = new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, AFCalcFactory.Calculation.EXACT_CONSTRAINED, + ExactAFCalculationTestBuilder.PriorType.human); + + final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); + + testExpectedACs(vc, maxACsToVisit); + } +} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java index 34d7793d8..b1dc423a2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalculationModelUnitTest.java @@ -23,6 +23,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { static Genotype AA1, AB1, BB1, NON_INFORMATIVE1; static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2; final double[] FLAT_3SAMPLE_PRIORS = MathUtils.normalizeFromLog10(new double[2*3+1], true); // flat priors + final private static boolean INCLUDE_BIALLELIC = true; final private static boolean INCLUDE_TRIALLELIC = true; final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug @@ -53,12 +54,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { private class GetGLsTest extends TestDataProvider { GenotypesContext GLs; int numAltAlleles; - final ExactAFCalc calc; + final AFCalc calc; final int[] expectedACs; final double[] priors; final String priorName; - private GetGLsTest(final ExactAFCalc calc, int numAltAlleles, List arg, final double[] priors, final String priorName) { + private GetGLsTest(final AFCalc calc, int numAltAlleles, List arg, final double[] priors, final String priorName) { super(GetGLsTest.class); GLs = GenotypesContext.create(new ArrayList(arg)); this.numAltAlleles = numAltAlleles; @@ -81,7 +82,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { } public AFCalcResult executeRef() { - final ExactAFCalc ref = new ReferenceDiploidExactAFCalc(getCalc().nSamples, getCalc().getMaxAltAlleles()); + final AFCalc ref = AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_REFERENCE, getCalc().nSamples, getCalc().getMaxAltAlleles()); return ref.getLog10PNonRef(getVC(), getPriors()); } @@ -89,7 +90,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { return priors; } - public ExactAFCalc getCalc() { + public AFCalc getCalc() { return calc; } @@ -122,10 +123,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final List triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2); for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) { -// final ExactAFCalc diploidCalc = new ReferenceDiploidExactAFCalc(nSamples, 4); -// final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); - //final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); - final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); + List calcs = AFCalcFactory.createAFCalcs( + Arrays.asList( + AFCalcFactory.Calculation.EXACT_REFERENCE, + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY + ), 4, 2, 2, 2); final int nPriorValues = 2*nSamples+1; final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors @@ -133,7 +136,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001); for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) { - for ( ExactAFCalc model : Arrays.asList(indCalc) ) { + for ( AFCalc model : calcs ) { final String priorName = priors == humanPriors ? "human" : "flat"; // bi-allelic @@ -157,11 +160,11 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final List genotypes = Arrays.asList(AB2, CC2, CC2, CC2); final int nSamples = genotypes.size(); - final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); + final AFCalc indCalc = AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, nSamples, 4); final int nPriorValues = 2*nSamples+1; final double[] priors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors - for ( ExactAFCalc model : Arrays.asList(indCalc) ) { + for ( AFCalc model : Arrays.asList(indCalc) ) { final String priorName = "flat"; new GetGLsTest(model, 2, genotypes, priors, priorName); } @@ -214,14 +217,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative)); final int nSamples = samples.size(); - final ExactAFCalc diploidCalc = new ReferenceDiploidExactAFCalc(nSamples, 4); -// final ExactAFCalc optDiploidCalc = new ConstrainedDiploidExactAFCalc(nSamples, 4); - //final ExactAFCalc generalCalc = new GeneralPloidyExactAFCalc(nSamples, 4, 2); - final ExactAFCalc indCalc = new IndependentAllelesDiploidExactAFCalc(nSamples, 4); + List calcs = AFCalcFactory.createAFCalcs( + Arrays.asList( + AFCalcFactory.Calculation.EXACT_REFERENCE, + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY + ), 4, 2, 2, 2); final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors - for ( ExactAFCalc model : Arrays.asList(diploidCalc, indCalc) ) { + for ( AFCalc model : calcs ) { final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); for ( int rotation = 0; rotation < nSamples; rotation++ ) { @@ -263,8 +268,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { } } - private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected, final ExactAFCalc calc, final boolean onlyPosteriorsShouldBeEqual) { - final double TOLERANCE = 2; // TODO -- tighten up tolerances -- cannot be tightened up until we finalize the independent alleles model + private void compareAFCalcResults(final AFCalcResult actual, final AFCalcResult expected, final AFCalc calc, final boolean onlyPosteriorsShouldBeEqual) { + final double TOLERANCE = calc.getMaxAltAlleles() > 1 ? 2 : 0.1; // much tighter constraints on bi-allelic results if ( ! onlyPosteriorsShouldBeEqual ) { Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE, "Priors AF == 0"); @@ -321,14 +326,14 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final Genotype g; final double pNonRef, tolerance; final boolean canScale; - final List badModels; + final List badModels; final VariantContext vc; private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale) { - this(vc, g, pNonRef, tolerance, canScale, Collections.emptyList()); + this(vc, g, pNonRef, tolerance, canScale, Collections.emptyList()); } - private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List badModels) { + private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List badModels) { this.g = g; this.pNonRef = pNonRef; this.tolerance = tolerance; @@ -365,7 +370,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); final ExactAFCalculationTestBuilder.PriorType priorType = ExactAFCalculationTestBuilder.PriorType.flat; - final List constrainedModel = Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact); + final List constrainedModel = Arrays.asList(AFCalcFactory.Calculation.EXACT_CONSTRAINED); final double TOLERANCE = 0.5; @@ -387,7 +392,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false) ); - for ( ExactAFCalculationTestBuilder.ModelType modelType : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact) ) { + for ( AFCalcFactory.Calculation modelType : Arrays.asList(AFCalcFactory.Calculation.EXACT_REFERENCE, AFCalcFactory.Calculation.EXACT_INDEPENDENT) ) { for ( int nNonInformative = 0; nNonInformative < 3; nNonInformative++ ) { for ( final PNonRefData rootData : initialPNonRefData ) { for ( int plScale = 1; plScale <= 100000; plScale *= 10 ) { @@ -405,7 +410,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "PNonRef") private void testPNonRef(final VariantContext vcRoot, - ExactAFCalculationTestBuilder.ModelType modelType, + AFCalcFactory.Calculation modelType, ExactAFCalculationTestBuilder.PriorType priorType, final List genotypes, final double expectedPNonRef, @@ -433,15 +438,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { public Object[][] makeModels() { List tests = new ArrayList(); - tests.add(new Object[]{new ReferenceDiploidExactAFCalc(2, 4)}); -// tests.add(new Object[]{new ConstrainedDiploidExactAFCalc(2, 4)}); -// tests.add(new Object[]{new GeneralPloidyExactAFCalc(2, 4, 2)}); + for ( final AFCalcFactory.Calculation calc : AFCalcFactory.Calculation.values() ) { + if ( calc.usableForParams(2, 4) ) + tests.add(new Object[]{AFCalcFactory.createAFCalc(calc, 2, 4)}); + } return tests.toArray(new Object[][]{}); } @Test(enabled = true, dataProvider = "Models") - public void testBiallelicPriors(final ExactAFCalc model) { + public void testBiallelicPriors(final AFCalc model) { final int REF_PL = 10; final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); @@ -465,142 +471,4 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { + expectedMLEAC + " priors " + Utils.join(",", priors)); } } - - @Test(enabled = false, dataProvider = "Models") - public void testTriallelicPriors(final ExactAFCalc model) { - // TODO - // TODO - // TODO THIS SEEMS TO ID A BUG IN THE EXACT MODEL FOR MULTI-ALLELICS, AS THE - // TODO SECOND ALLELE ISN'T HAVING A SQUARED PRIOR. TALK TO ERIC AND CONFIRM - // TODO - // TODO - final int REF_PL_AB = 10, REF_PL_AC = 20; // first AC goes, then AB - final Genotype AB = makePL(Arrays.asList(A,C), REF_PL_AB, 0, 10000, 10000, 10000); - final Genotype AC = makePL(Arrays.asList(A, G), REF_PL_AC, 10000, 10000, 0, 10000, 10000); - - for ( int log10NonRefPrior = 1; log10NonRefPrior < 100*REF_PL_AC; log10NonRefPrior += 1 ) { - final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); - final double nonRefPrior = (1-refPrior) / 2; - final double[] priors = MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior, nonRefPrior}); - GetGLsTest cfg = new GetGLsTest(model, 2, Arrays.asList(AB, AC), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResult resultTracker = cfg.execute(); - final int actualAC_AB = resultTracker.getAlleleCountsOfMLE()[0]; - - final double pRefABWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; - final double pHetABWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1]; - final int expectedAC_AB = pRefABWithPrior <= pHetABWithPrior ? 1 : 0; - Assert.assertEquals(actualAC_AB, expectedAC_AB, - "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedAC_AB + " priors " + Utils.join(",", priors)); - - final double nonRefPriorSecondAllele = Math.pow(nonRefPrior, 2); - final double refPriorSecondAllele = 1 - nonRefPriorSecondAllele; - final int actualAC_AC = resultTracker.getAlleleCountsOfMLE()[1]; - final double pRefACWithPrior = AB.getLikelihoods().getAsVector()[0] + Math.log10(refPriorSecondAllele); - final double pHetACWithPrior = AC.getLikelihoods().getAsVector()[3] + Math.log10(nonRefPriorSecondAllele); - final int expectedAC_AC = pRefACWithPrior <= pHetACWithPrior ? 1 : 0; - Assert.assertEquals(actualAC_AC, expectedAC_AC, - "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedAC_AC + " priors " + Utils.join(",", priors)); - } - } - - @DataProvider(name = "MaxACsToVisit") - public Object[][] makeMaxACsToVisit() { - List tests = new ArrayList(); - - final int nSamples = 10; - final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact; - - for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) { - final int nChrom = (nSamples - nNonInformative) * 2; - for ( int i = 0; i < nChrom; i++ ) { - // bi-allelic - tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, modelType}); - - // tri-allelic - for ( int j = 0; j < (nChrom - i); j++) - tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, modelType}); - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "MaxACsToVisit") - public void testMaxACsToVisit(final int nSamples, final List requestedACs, final int nNonInformative, final ExactAFCalculationTestBuilder.ModelType modelType) { - final int nAlts = requestedACs.size(); - final ExactAFCalculationTestBuilder testBuilder - = new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType, - ExactAFCalculationTestBuilder.PriorType.human); - - final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100); - final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); - - testExpectedACs(vc, maxACsToVisit); - } - - private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) { - // this is necessary because cannot ensure that the tester gives us back the - // requested ACs due to rounding errors - final List ACs = new ArrayList(); - for ( final Allele a : vc.getAlternateAlleles() ) - ACs.add(vc.getCalledChrCount(a)); - - for ( int i = 0; i < maxACsToVisit.length; i++ ) { - Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i); - } - } - - @DataProvider(name = "MaxACsGenotypes") - public Object[][] makeMaxACsForGenotype() { - List tests = new ArrayList(); - - final List AA = Arrays.asList(A, A); - final List AC = Arrays.asList(A, C); - final List CC = Arrays.asList(C, C); - final List AG = Arrays.asList(A, G); - final List GG = Arrays.asList(G, G); - final List CG = Arrays.asList(C, G); - - final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); - final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); - - tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)}); - tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)}); - tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)}); - - // make sure non-informative => 0 - tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)}); - tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)}); - - // multi-allelics - tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)}); - tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)}); - - // deal with non-informatives third alleles - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)}); - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "MaxACsGenotypes") - private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) { - final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make(); - - final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact; - final ExactAFCalculationTestBuilder testBuilder - = new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType, - ExactAFCalculationTestBuilder.PriorType.human); - final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); - testExpectedACs(vc, maxACsToVisit); - } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java index 3fbbb603b..22c429e0b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java @@ -94,7 +94,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestCombineGLsWithDrops") private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set allelesToDrop) { - final IndependentAllelesDiploidExactAFCalc calc = new IndependentAllelesDiploidExactAFCalc(1, 4); + final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts); Assert.assertEquals(combined.getPL(), expected.getPL(), @@ -136,7 +136,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts") private void testMakeAlleleConditionalContexts(final VariantContext vc, final List expectedVCs) { - final IndependentAllelesDiploidExactAFCalc calc = new IndependentAllelesDiploidExactAFCalc(1, 4); + final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); final List biAllelicVCs = calc.makeAlleleConditionalContexts(vc); Assert.assertEquals(biAllelicVCs.size(), expectedVCs.size()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index d3dd46a0a..885463fcb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -42,7 +42,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection */ @Advanced @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) - protected AFCalc.Model AFmodel = AFCalc.Model.EXACT; + public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.EXACT; /** * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index bfdecfa68..3c3bb4305 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; @@ -351,7 +352,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); + afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger)); } // estimate our confidence in a reference call and return @@ -724,36 +725,6 @@ public class UnifiedGenotyperEngine { return glcm; } - private static AFCalc getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { - - List> afClasses = new PluginManager(AFCalc.class).getPlugins(); - - // user-specified name - String afModelName = UAC.AFmodel.implementationName; - - if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) - afModelName = GPSTRING + afModelName; - else - afModelName = "Diploid" + afModelName; - - for (int i = 0; i < afClasses.size(); i++) { - Class afClass = afClasses.get(i); - String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); - if (afModelName.equalsIgnoreCase(key)) { - try { - Object args[] = new Object[]{UAC,N,logger,verboseWriter}; - Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); - - return (AFCalc)c.newInstance(args); - } - catch (Exception e) { - throw new IllegalArgumentException("Unexpected AFCalc " + UAC.AFmodel); - } - } - } - throw new IllegalArgumentException("Unexpected AFCalc " + UAC.AFmodel); - } - public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { if ( tracker == null || ref == null || logger == null ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 370ffb68d..75a5bfe7b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -51,53 +50,36 @@ import java.util.List; public abstract class AFCalc implements Cloneable { private final static Logger defaultLogger = Logger.getLogger(AFCalc.class); - public enum Model { - /** The default model with the best performance in all cases */ - EXACT("ExactAFCalc"); + protected final int nSamples; + protected final int maxAlternateAllelesToGenotype; + protected final int maxAlternateAllelesForIndels; - public final String implementationName; - - private Model(String implementationName) { - this.implementationName = implementationName; - } - } - - protected int nSamples; - protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE; - protected int MAX_ALTERNATE_ALLELES_FOR_INDELS; - - protected Logger logger; - protected PrintStream verboseWriter; - - protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY; + protected Logger logger = defaultLogger; private SimpleTimer callTimer = new SimpleTimer(); private PrintStream callReport = null; private final AFCalcResultTracker resultTracker; - protected AFCalc(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) { - this(nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.exactCallsLog, logger, verboseWriter); - } - - protected AFCalc(final int nSamples, - final int maxAltAlleles, - final int maxAltAllelesForIndels, - final File exactCallsLog, - final Logger logger, - final PrintStream verboseWriter) { + protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); + if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels); + if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy); this.nSamples = nSamples; - this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = maxAltAlleles; - this.MAX_ALTERNATE_ALLELES_FOR_INDELS = maxAltAllelesForIndels; - this.logger = logger == null ? defaultLogger : logger; - this.verboseWriter = verboseWriter; - if ( exactCallsLog != null ) - initializeOutputFile(exactCallsLog); + this.maxAlternateAllelesToGenotype = maxAltAlleles; + this.maxAlternateAllelesForIndels = maxAltAllelesForIndels; this.resultTracker = new AFCalcResultTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels)); } + public void enableProcessLog(final File exactCallsLog) { + initializeOutputFile(exactCallsLog); + } + + public void setLogger(Logger logger) { + this.logger = logger; + } + /** * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc * @@ -184,7 +166,7 @@ public abstract class AFCalc implements Cloneable { // --------------------------------------------------------------------------- public int getMaxAltAlleles() { - return Math.max(MAX_ALTERNATE_ALLELES_TO_GENOTYPE, MAX_ALTERNATE_ALLELES_FOR_INDELS); + return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java new file mode 100644 index 000000000..046593c4a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java @@ -0,0 +1,225 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.lang.reflect.Constructor; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * Factory to make AFCalculations + */ +public class AFCalcFactory { + /** + * Enumeration of usable AF calculation, their constraints (i.e. ploidy). + * + * Note that the order these occur in the enum is the order of preference, so + * the first value is taken over the second when multiple calculations satisfy + * the needs of the request (i.e., considering ploidy). + */ + public enum Calculation { + /** The default implementation */ + EXACT(ReferenceDiploidExactAFCalc.class, 2, -1), + + /** reference implementation of multi-allelic EXACT model */ + EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1), + + /** expt. implementation */ + @Deprecated + EXACT_CONSTRAINED(ConstrainedDiploidExactAFCalc.class, 2, -1), + + /** expt. implementation -- for testing only */ + EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1), + + /** original biallelic exact model, for testing only */ + EXACT_ORIGINAL(OriginalDiploidExactAFCalc.class, 2, 2), + + /** implementation that supports any sample ploidy */ + EXACT_GENERAL_PLOIDY("GeneralPloidyExactAFCalc", -1, -1); + + /** + * Must be a name because we look this up dynamically + */ + public final String className; + public final int maxAltAlleles; + public final int requiredPloidy; + + private Calculation(final String className, final int requiredPloidy, final int maxAltAlleles) { + this.className = className; + this.requiredPloidy = requiredPloidy; + this.maxAltAlleles = maxAltAlleles; + } + + private Calculation(final Class clazz, final int requiredPloidy, final int maxAltAlleles) { + this(clazz.getSimpleName(), requiredPloidy, maxAltAlleles); + } + + public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) { + return (requiredPloidy == -1 || requiredPloidy == requestedPloidy) + && (maxAltAlleles == -1 || maxAltAlleles >= requestedMaxAltAlleles); + } + } + + private static final Map> afClasses; + static { + afClasses = new PluginManager(AFCalc.class).getPluginsByName(); + } + + private AFCalcFactory() { + + } + + private static Class getClassByName(final String name) { + for ( final Class clazz : afClasses.values() ) { + if ( clazz.getSimpleName().contains(name) ) { + return clazz; + } + } + + return null; + } + + /** + * Create a new AFCalc based on the parameters in the UAC + * + * @param UAC the UnifiedArgumentCollection containing the command-line parameters for the caller + * @param nSamples the number of samples we will be using + * @param logger an optional (can be null) logger to override the default in the model + * @return an initialized AFCalc + */ + public static AFCalc createAFCalc(final UnifiedArgumentCollection UAC, + final int nSamples, + final Logger logger) { + final int maxAltAlleles = Math.max(UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS); + if ( ! UAC.AFmodel.usableForParams(UAC.samplePloidy, maxAltAlleles) ) { + logger.warn("Requested ploidy / maxAltAlleles " + UAC.samplePloidy + " not supported by requested model " + UAC.AFmodel + " looking for an option"); + final List supportingCalculations = new LinkedList(); + for ( final Calculation calc : Calculation.values() ) { + if ( calc.usableForParams(UAC.samplePloidy, maxAltAlleles) ) + supportingCalculations.add(calc); + } + + if ( supportingCalculations.isEmpty() ) + throw new UserException("no AFCalculation model found that supports ploidy of " + UAC.samplePloidy + " and max alt alleles " + maxAltAlleles); + else if ( supportingCalculations.size() > 1 ) + logger.warn("Warning, multiple supporting AFCalcs found " + Utils.join(",", supportingCalculations) + " choosing first arbitrarily"); + else + UAC.AFmodel = supportingCalculations.get(0); + } + + final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.samplePloidy); + + if ( logger != null ) calc.setLogger(logger); + if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog); + + return calc; + } + + /** + * Create a new AFCalc, choosing the best implementation based on the given parameters, assuming + * that we will only be requesting bi-allelic variants to diploid genotypes + * + * @param nSamples the number of samples we'll be using + * + * @return an initialized AFCalc + */ + public static AFCalc createAFCalc(final int nSamples) { + return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2, 2); + } + + /** + * Create a new AFCalc that supports maxAltAlleles for all variants and diploid genotypes + * + * @param calc the calculation we'd like to use + * @param nSamples the number of samples we'll be using + * @param maxAltAlleles the max. alt alleles for both SNPs and indels + * + * @return an initialized AFCalc + */ + public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) { + return createAFCalc(calc, nSamples, maxAltAlleles, maxAltAlleles, 2); + } + + /** + * Create a new AFCalc, choosing the best implementation based on the given parameters + * + * @param nSamples the number of samples we'll be using + * @param maxAltAlleles the max. alt alleles to consider for SNPs + * @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs + * @param ploidy the sample ploidy. Must be consistent with the calc + * + * @return an initialized AFCalc + */ + public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { + final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels); + return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAlt), nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); + } + + /** + * Choose the best calculation for nSamples and ploidy + * + * @param nSamples + * @param ploidy + * @param maxAltAlleles + * @return + */ + private static Calculation chooseBestCalculation(final int nSamples, final int ploidy, final int maxAltAlleles) { + for ( final Calculation calc : Calculation.values() ) { + if ( calc.usableForParams(ploidy, maxAltAlleles) ) { + return calc; + } + } + + throw new IllegalStateException("no calculation found that supports nSamples " + nSamples + " ploidy " + ploidy + " and maxAltAlleles " + maxAltAlleles); + } + + /** + * Create a new AFCalc + * + * @param calc the calculation to use + * @param nSamples the number of samples we'll be using + * @param maxAltAlleles the max. alt alleles to consider for SNPs + * @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs + * @param ploidy the sample ploidy. Must be consistent with the calc + * + * @return an initialized AFCalc + */ + public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { + if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null"); + if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); + if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); + if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels); + if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy); + + final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels); + if ( ! calc.usableForParams(ploidy, maxAlt) ) + throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy); + + final Class afClass = getClassByName(calc.className); + if ( afClass == null ) + throw new IllegalArgumentException("Unexpected AFCalc " + calc); + + try { + Object args[] = new Object[]{nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy}; + Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class, int.class); + return (AFCalc)c.newInstance(args); + } catch (Exception e) { + throw new ReviewedStingException("Could not instantiate AFCalc " + calc, e); + } + } + + protected static List createAFCalcs(final List calcs, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { + final List AFCalcs = new LinkedList(); + + for ( final Calculation calc : calcs ) + AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy)); + + return AFCalcs; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java index 57ff4ec36..879edfea7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultTracker.java @@ -44,6 +44,8 @@ import java.util.Map; * TODO -- WHAT IS THE CONTRACT ON MAP AC AND P NON REF? */ class AFCalcResultTracker { + protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY; + // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles protected double log10MLE; protected double log10MAP; @@ -116,7 +118,10 @@ class AFCalcResultTracker { */ public double getLog10LikelihoodOfAFNotZero() { if ( log10LikelihoodsMatrixSum == null ) { - log10LikelihoodsMatrixSum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex); + if ( currentLikelihoodsCacheIndex == 0 ) // there's nothing to sum up, so make the sum equal to the smallest thing we have + log10LikelihoodsMatrixSum = MathUtils.LOG10_P_OF_ZERO; + else + log10LikelihoodsMatrixSum = MathUtils.log10sumLog10(log10LikelihoodsMatrixValues, 0, currentLikelihoodsCacheIndex); } return log10LikelihoodsMatrixSum; } @@ -172,7 +177,7 @@ class AFCalcResultTracker { * Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer */ protected void reset() { - log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AFCalc.VALUE_NOT_CALCULATED; + log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = VALUE_NOT_CALCULATED; for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) { alleleCountsOfMLE[i] = 0; alleleCountsOfMAP[i] = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java index 81bfb6cf8..36d53ceaa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java @@ -2,22 +2,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; - +@Deprecated public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { - public ConstrainedDiploidExactAFCalc(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles); - } - - public ConstrainedDiploidExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); + protected ConstrainedDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 00fdd83c9..8b12dff61 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -25,21 +25,15 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.*; -import java.io.PrintStream; import java.util.*; public abstract class DiploidExactAFCalc extends ExactAFCalc { - public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles, maxAltAlleles, null, null, null); - } - - public DiploidExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); + public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); + if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); } protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker); @@ -91,7 +85,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { @Override protected VariantContext reduceScope(final VariantContext vc) { - final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? MAX_ALTERNATE_ALLELES_FOR_INDELS : MAX_ALTERNATE_ALLELES_TO_GENOTYPE; + final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype; // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java index 98ecc2029..df0793352 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java @@ -25,16 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import java.io.File; -import java.io.PrintStream; import java.util.ArrayList; /** @@ -43,12 +39,8 @@ import java.util.ArrayList; abstract class ExactAFCalc extends AFCalc { protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - protected ExactAFCalc(final UnifiedArgumentCollection UAC, final int nSamples, final Logger logger, final PrintStream verboseWriter) { - super(UAC, nSamples, logger, verboseWriter); - } - - protected ExactAFCalc(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, File exactCallsLog, Logger logger, PrintStream verboseWriter) { - super(nSamples, maxAltAlleles, maxAltAllelesForIndels, exactCallsLog, logger, verboseWriter); + protected ExactAFCalc(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 2b1394236..b135b1688 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -27,26 +27,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.*; -import java.io.PrintStream; import java.util.*; public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); final ReferenceDiploidExactAFCalc refModel; - public IndependentAllelesDiploidExactAFCalc(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles); - refModel = new ReferenceDiploidExactAFCalc(nSamples, 1); - } - - public IndependentAllelesDiploidExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); - refModel = new ReferenceDiploidExactAFCalc(nSamples, 1); + protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); + refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy); } @Override @@ -160,9 +152,7 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { final Allele altAllele = vc.getAlternateAllele(altI); final List biallelic = Arrays.asList(vc.getReference(), altAllele); vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1)); - - // TODO -- WE NEED TO TRUNCATE THE ALLELES TO COMPUTE THE TRUE POSTERIOR BUT MUST INCLUDE IT TO GET THE TRUE MLE -// afZeroAlleles.add(altAllele); + afZeroAlleles.add(altAllele); } return vcs; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index fb652a8fb..093bf47d5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -1,12 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; import java.util.ArrayList; import java.util.Collections; import java.util.Map; @@ -15,12 +12,8 @@ import java.util.Map; * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference */ public class OriginalDiploidExactAFCalc extends DiploidExactAFCalc { - public OriginalDiploidExactAFCalc(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles); - } - - public OriginalDiploidExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); + protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java index 9aa93061f..4de983508 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java @@ -1,18 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; - public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc { - public ReferenceDiploidExactAFCalc(final int nSamples, final int maxAltAlleles) { - super(nSamples, maxAltAlleles); - } - - public ReferenceDiploidExactAFCalc(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { - super(UAC, N, logger, verboseWriter); + protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { + super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index f7f3e2a7a..f8c871e7d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -23,9 +23,9 @@ */ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; +import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.DiploidExactAFCalc; -import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ReferenceDiploidExactAFCalc; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.TreeSet; @@ -34,7 +34,7 @@ import java.util.TreeSet; public class GLBasedSampleSelector extends SampleSelector { double[] flatPriors = null; final double referenceLikelihood; - DiploidExactAFCalc AFCalculator; + AFCalc AFCalculator; public GLBasedSampleSelector(TreeSet sm, double refLik) { super(sm); @@ -52,7 +52,7 @@ public class GLBasedSampleSelector extends SampleSelector { // do we want to apply a prior? maybe user-spec? if ( flatPriors == null ) { flatPriors = new double[1+2*samples.size()]; - AFCalculator = new ReferenceDiploidExactAFCalc(samples.size(), 4); + AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 4, 2); } final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); // do we want to let this qual go up or down? diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index f20265255..2f97d6e40 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -58,6 +58,12 @@ public class MathUtils { private static final int MAXN = 50000; private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients + /** + * The smallest log10 value we'll emit from normalizeFromLog10 and other functions + * where the real-space value is 0.0. + */ + public final static double LOG10_P_OF_ZERO = -10000; + static { log10Cache = new double[LOG10_CACHE_SIZE]; log10FactorialCache = new double[LOG10_CACHE_SIZE]; @@ -572,12 +578,6 @@ public class MathUtils { return normalizeFromLog10(array, takeLog10OfOutput, false); } - /** - * The smallest log10 value we'll emit from normalizeFromLog10 and other functions - * where the real-space value is 0.0. - */ - final static double LOG10_P_OF_ZERO = -10000; - /** * See #normalizeFromLog10 but with the additional option to use an approximation that keeps the calculation always in log-space *