diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java new file mode 100644 index 000000000..a325513b0 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationPerformanceTest.java @@ -0,0 +1,192 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; + +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.*; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 10/2/12 + * Time: 10:25 AM + * To change this template use File | Settings | File Templates. + */ +public class ExactAFCalculationPerformanceTest { + final static Logger logger = Logger.getLogger(ExactAFCalculationPerformanceTest.class); + + private static abstract class Analysis { + final GATKReport report; + + public Analysis(final String name, final List columns) { + report = GATKReport.newSimpleReport(name, columns); + } + + public abstract void run(final ExactAFCalculationTestBuilder testBuilder, + final List coreColumns); + + public String getName() { + return getTable().getTableName(); + } + + public GATKReportTable getTable() { + return report.getTables().iterator().next(); + } + } + + private static class AnalyzeByACAndPL extends Analysis { + public AnalyzeByACAndPL(final List columns) { + super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac")); + } + + public void run(final ExactAFCalculationTestBuilder testBuilder, final List coreValues) { + final SimpleTimer timer = new SimpleTimer(); + + for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) { + final ExactAFCalculation calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + + for ( int ac = 0; ac < testBuilder.getnSamples(); ac++ ) { + final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); + + timer.start(); + final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors); + final long runtime = timer.getElapsedTimeNano(); + + final List columns = new LinkedList(coreValues); + columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ac)); + report.addRowList(columns); + } + } + } + } + + private static class AnalyzeBySingletonPosition extends Analysis { + public AnalyzeBySingletonPosition(final List columns) { + super("AnalyzeBySingletonPosition", Utils.append(columns, "non.type.pls", "position.of.singleton")); + } + + public void run(final ExactAFCalculationTestBuilder testBuilder, final List coreValues) { + final SimpleTimer timer = new SimpleTimer(); + + for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) { + final ExactAFCalculation calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + + int ac = 1; + final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); + + for ( int position = 0; position < vc.getNSamples(); position++ ) { + final VariantContextBuilder vcb = new VariantContextBuilder(vc); + final List genotypes = new ArrayList(vc.getGenotypes()); + Collections.rotate(genotypes, position); + vcb.genotypes(genotypes); + + timer.start(); + final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors); + final long runtime = timer.getElapsedTimeNano(); + + final List columns = new LinkedList(coreValues); + columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, position)); + report.addRowList(columns); + } + } + } + } + + private static class AnalyzeByNonInformative extends Analysis { + public AnalyzeByNonInformative(final List columns) { + super("AnalyzeByNonInformative", Utils.append(columns, "non.type.pls", "n.non.informative")); + } + + public void run(final ExactAFCalculationTestBuilder testBuilder, final List coreValues) { + final SimpleTimer timer = new SimpleTimer(); + + for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) { + final ExactAFCalculation calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + + int ac = 1; + final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); + final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0); + + for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) { + final VariantContextBuilder vcb = new VariantContextBuilder(vc); + + final List genotypes = new ArrayList(); + genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1)); + genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative)); + vcb.genotypes(genotypes); + + timer.start(); + final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors); + final long runtime = timer.getElapsedTimeNano(); + + final List columns = new LinkedList(coreValues); + columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, nNonInformative)); + report.addRowList(columns); + } + } + } + } + + public static void main(final String[] args) throws Exception { + logger.addAppender(new ConsoleAppender(new SimpleLayout())); + + final List coreColumns = Arrays.asList("iteration", "n.alt.alleles", "n.samples", + "exact.model", "prior.type", "runtime", "n.evaluations"); + + final PrintStream out = new PrintStream(new FileOutputStream(args[0])); + + final boolean USE_GENERAL = false; + final List modelTypes = USE_GENERAL + ? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values()) + : Arrays.asList(ExactAFCalculationTestBuilder.ModelType.DiploidExact); + + final boolean ONLY_HUMAN_PRIORS = false; + final List priorTypes = ONLY_HUMAN_PRIORS + ? Arrays.asList(ExactAFCalculationTestBuilder.PriorType.values()) + : Arrays.asList(ExactAFCalculationTestBuilder.PriorType.human); + + final List analyzes = new ArrayList(); + analyzes.add(new AnalyzeByACAndPL(coreColumns)); + analyzes.add(new AnalyzeBySingletonPosition(coreColumns)); + analyzes.add(new AnalyzeByNonInformative(coreColumns)); + + for ( int iteration = 0; iteration < 1; iteration++ ) { + for ( final int nAltAlleles : Arrays.asList(1) ) { + for ( final int nSamples : Arrays.asList(1, 10, 100) ) { + for ( final ExactAFCalculationTestBuilder.ModelType modelType : modelTypes ) { + for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) { + final ExactAFCalculationTestBuilder testBuilder + = new ExactAFCalculationTestBuilder(nSamples, 1, modelType, priorType); + + for ( final Analysis analysis : analyzes ) { + logger.info(Utils.join("\t", Arrays.asList(iteration, nSamples, modelType, priorType, analysis.getName()))); + final List values = Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType); + analysis.run(testBuilder, (List)values); + } + } + } + } + } + } + + final GATKReport report = new GATKReport(); + for ( final Analysis analysis : analyzes ) + report.addTable(analysis.getTable()); + report.print(out); + out.close(); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java new file mode 100644 index 000000000..acc2a45ca --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationTestBuilder.java @@ -0,0 +1,124 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class ExactAFCalculationTestBuilder { + final static Allele A = Allele.create("A", true); + final static Allele C = Allele.create("C"); + final static Allele G = Allele.create("G"); + final static Allele T = Allele.create("T"); + + static int sampleNameCounter = 0; + + final int nSamples; + final int numAltAlleles; + final ModelType modelType; + final PriorType priorType; + + public ExactAFCalculationTestBuilder(final int nSamples, final int numAltAlleles, + final ModelType modelType, final PriorType priorType) { + this.nSamples = nSamples; + this.numAltAlleles = numAltAlleles; + this.modelType = modelType; + this.priorType = priorType; + } + + public enum ModelType { + DiploidExact, + GeneralExact + } + + public enum PriorType { + flat, + human + } + + public int getnSamples() { + return nSamples; + } + + public ExactAFCalculation makeModel() { + switch (modelType) { + case DiploidExact: return new DiploidExactAFCalculation(nSamples, 4); + case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2); + default: throw new RuntimeException("Unexpected type " + modelType); + } + } + + public double[] makePriors() { + final int nPriorValues = 2*nSamples+1; + + switch ( priorType ) { + case flat: + return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors + case human: + final double[] humanPriors = new double[nPriorValues]; + UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001); + return humanPriors; + default: + throw new RuntimeException("Unexpected type " + priorType); + } + } + + public VariantContext makeACTest(final int ac, final int nonTypePL) { + final int nChrom = nSamples * 2; + final double p = ac / (1.0 * nChrom); + final int nhomvar = (int)Math.floor(nChrom * p * p); + final int nhet = ac - 2 * nhomvar; + + final int calcAC = nhet + 2 * nhomvar; + if ( calcAC != ac ) + throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + ac); + + return makeACTest(nhet, nhomvar, nonTypePL); + } + + public VariantContext makeACTest(final int nhet, final int nhomvar, final int nonTypePL) { + final List samples = new ArrayList(nSamples); + for ( int i = 0; i < nhet; i++ ) samples.add(makePL(GenotypeType.HET, nonTypePL)); + for ( int i = 0; i < nhomvar; i++ ) samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL)); + for ( int i = 0; i < (nSamples-nhet-nhomvar); i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL)); + + VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles()); + vcb.genotypes(samples); + return vcb.make(); + } + + public List getAlleles() { + return Arrays.asList(A, C, G, T).subList(0, numAltAlleles+1); + } + + public List getAlleles(final GenotypeType type) { + switch (type) { + case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0)); + case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(1)); + case HOM_VAR: return Arrays.asList(getAlleles().get(1), getAlleles().get(1)); + default: throw new IllegalArgumentException("Unexpected type " + type); + } + } + + public Genotype makePL(final List expectedGT, int ... pls) { + GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++); + gb.alleles(expectedGT); + gb.PL(pls); + return gb.make(); + } + + public Genotype makePL(final GenotypeType type, final int nonTypePL) { + GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++); + gb.alleles(getAlleles(type)); + + switch (type) { + case HOM_REF: gb.PL(new double[]{0, nonTypePL, nonTypePL}); break; + case HET: gb.PL(new double[]{nonTypePL, 0, nonTypePL}); break; + case HOM_VAR: gb.PL(new double[]{nonTypePL, nonTypePL, 0}); break; + } + + return gb.make(); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 81f8fab7d..f4a200af0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -236,6 +236,12 @@ public class Utils { } } + public static List append(final List left, T ... elts) { + final List l = new LinkedList(left); + l.addAll(Arrays.asList(elts)); + return l; + } + /** * Returns a string of the values in joined by separator, such as A,B,C *