Framework for evaluating the performance and scaling of the ExactAF models
This commit is contained in:
parent
17ca543937
commit
3663fe1555
|
|
@ -0,0 +1,192 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.apache.log4j.ConsoleAppender;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.log4j.SimpleLayout;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: depristo
|
||||||
|
* Date: 10/2/12
|
||||||
|
* Time: 10:25 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class ExactAFCalculationPerformanceTest {
|
||||||
|
final static Logger logger = Logger.getLogger(ExactAFCalculationPerformanceTest.class);
|
||||||
|
|
||||||
|
private static abstract class Analysis {
|
||||||
|
final GATKReport report;
|
||||||
|
|
||||||
|
public Analysis(final String name, final List<String> columns) {
|
||||||
|
report = GATKReport.newSimpleReport(name, columns);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void run(final ExactAFCalculationTestBuilder testBuilder,
|
||||||
|
final List<Object> coreColumns);
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return getTable().getTableName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKReportTable getTable() {
|
||||||
|
return report.getTables().iterator().next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class AnalyzeByACAndPL extends Analysis {
|
||||||
|
public AnalyzeByACAndPL(final List<String> columns) {
|
||||||
|
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||||
|
final SimpleTimer timer = new SimpleTimer();
|
||||||
|
|
||||||
|
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||||
|
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||||
|
final double[] priors = testBuilder.makePriors();
|
||||||
|
|
||||||
|
for ( int ac = 0; ac < testBuilder.getnSamples(); ac++ ) {
|
||||||
|
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||||
|
|
||||||
|
timer.start();
|
||||||
|
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
|
||||||
|
final long runtime = timer.getElapsedTimeNano();
|
||||||
|
|
||||||
|
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||||
|
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ac));
|
||||||
|
report.addRowList(columns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class AnalyzeBySingletonPosition extends Analysis {
|
||||||
|
public AnalyzeBySingletonPosition(final List<String> columns) {
|
||||||
|
super("AnalyzeBySingletonPosition", Utils.append(columns, "non.type.pls", "position.of.singleton"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||||
|
final SimpleTimer timer = new SimpleTimer();
|
||||||
|
|
||||||
|
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||||
|
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||||
|
final double[] priors = testBuilder.makePriors();
|
||||||
|
|
||||||
|
int ac = 1;
|
||||||
|
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||||
|
|
||||||
|
for ( int position = 0; position < vc.getNSamples(); position++ ) {
|
||||||
|
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||||
|
final List<Genotype> genotypes = new ArrayList<Genotype>(vc.getGenotypes());
|
||||||
|
Collections.rotate(genotypes, position);
|
||||||
|
vcb.genotypes(genotypes);
|
||||||
|
|
||||||
|
timer.start();
|
||||||
|
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
|
||||||
|
final long runtime = timer.getElapsedTimeNano();
|
||||||
|
|
||||||
|
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||||
|
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, position));
|
||||||
|
report.addRowList(columns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class AnalyzeByNonInformative extends Analysis {
|
||||||
|
public AnalyzeByNonInformative(final List<String> columns) {
|
||||||
|
super("AnalyzeByNonInformative", Utils.append(columns, "non.type.pls", "n.non.informative"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||||
|
final SimpleTimer timer = new SimpleTimer();
|
||||||
|
|
||||||
|
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||||
|
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||||
|
final double[] priors = testBuilder.makePriors();
|
||||||
|
|
||||||
|
int ac = 1;
|
||||||
|
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||||
|
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
|
||||||
|
|
||||||
|
for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) {
|
||||||
|
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||||
|
|
||||||
|
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1));
|
||||||
|
genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative));
|
||||||
|
vcb.genotypes(genotypes);
|
||||||
|
|
||||||
|
timer.start();
|
||||||
|
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
|
||||||
|
final long runtime = timer.getElapsedTimeNano();
|
||||||
|
|
||||||
|
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||||
|
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, nNonInformative));
|
||||||
|
report.addRowList(columns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
logger.addAppender(new ConsoleAppender(new SimpleLayout()));
|
||||||
|
|
||||||
|
final List<String> coreColumns = Arrays.asList("iteration", "n.alt.alleles", "n.samples",
|
||||||
|
"exact.model", "prior.type", "runtime", "n.evaluations");
|
||||||
|
|
||||||
|
final PrintStream out = new PrintStream(new FileOutputStream(args[0]));
|
||||||
|
|
||||||
|
final boolean USE_GENERAL = false;
|
||||||
|
final List<ExactAFCalculationTestBuilder.ModelType> modelTypes = USE_GENERAL
|
||||||
|
? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values())
|
||||||
|
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.DiploidExact);
|
||||||
|
|
||||||
|
final boolean ONLY_HUMAN_PRIORS = false;
|
||||||
|
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
|
||||||
|
? Arrays.asList(ExactAFCalculationTestBuilder.PriorType.values())
|
||||||
|
: Arrays.asList(ExactAFCalculationTestBuilder.PriorType.human);
|
||||||
|
|
||||||
|
final List<Analysis> analyzes = new ArrayList<Analysis>();
|
||||||
|
analyzes.add(new AnalyzeByACAndPL(coreColumns));
|
||||||
|
analyzes.add(new AnalyzeBySingletonPosition(coreColumns));
|
||||||
|
analyzes.add(new AnalyzeByNonInformative(coreColumns));
|
||||||
|
|
||||||
|
for ( int iteration = 0; iteration < 1; iteration++ ) {
|
||||||
|
for ( final int nAltAlleles : Arrays.asList(1) ) {
|
||||||
|
for ( final int nSamples : Arrays.asList(1, 10, 100) ) {
|
||||||
|
for ( final ExactAFCalculationTestBuilder.ModelType modelType : modelTypes ) {
|
||||||
|
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
|
||||||
|
final ExactAFCalculationTestBuilder testBuilder
|
||||||
|
= new ExactAFCalculationTestBuilder(nSamples, 1, modelType, priorType);
|
||||||
|
|
||||||
|
for ( final Analysis analysis : analyzes ) {
|
||||||
|
logger.info(Utils.join("\t", Arrays.asList(iteration, nSamples, modelType, priorType, analysis.getName())));
|
||||||
|
final List<?> values = Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType);
|
||||||
|
analysis.run(testBuilder, (List<Object>)values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final GATKReport report = new GATKReport();
|
||||||
|
for ( final Analysis analysis : analyzes )
|
||||||
|
report.addTable(analysis.getTable());
|
||||||
|
report.print(out);
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,124 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ExactAFCalculationTestBuilder {
|
||||||
|
final static Allele A = Allele.create("A", true);
|
||||||
|
final static Allele C = Allele.create("C");
|
||||||
|
final static Allele G = Allele.create("G");
|
||||||
|
final static Allele T = Allele.create("T");
|
||||||
|
|
||||||
|
static int sampleNameCounter = 0;
|
||||||
|
|
||||||
|
final int nSamples;
|
||||||
|
final int numAltAlleles;
|
||||||
|
final ModelType modelType;
|
||||||
|
final PriorType priorType;
|
||||||
|
|
||||||
|
public ExactAFCalculationTestBuilder(final int nSamples, final int numAltAlleles,
|
||||||
|
final ModelType modelType, final PriorType priorType) {
|
||||||
|
this.nSamples = nSamples;
|
||||||
|
this.numAltAlleles = numAltAlleles;
|
||||||
|
this.modelType = modelType;
|
||||||
|
this.priorType = priorType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum ModelType {
|
||||||
|
DiploidExact,
|
||||||
|
GeneralExact
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum PriorType {
|
||||||
|
flat,
|
||||||
|
human
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getnSamples() {
|
||||||
|
return nSamples;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExactAFCalculation makeModel() {
|
||||||
|
switch (modelType) {
|
||||||
|
case DiploidExact: return new DiploidExactAFCalculation(nSamples, 4);
|
||||||
|
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||||
|
default: throw new RuntimeException("Unexpected type " + modelType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public double[] makePriors() {
|
||||||
|
final int nPriorValues = 2*nSamples+1;
|
||||||
|
|
||||||
|
switch ( priorType ) {
|
||||||
|
case flat:
|
||||||
|
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||||
|
case human:
|
||||||
|
final double[] humanPriors = new double[nPriorValues];
|
||||||
|
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
||||||
|
return humanPriors;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Unexpected type " + priorType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantContext makeACTest(final int ac, final int nonTypePL) {
|
||||||
|
final int nChrom = nSamples * 2;
|
||||||
|
final double p = ac / (1.0 * nChrom);
|
||||||
|
final int nhomvar = (int)Math.floor(nChrom * p * p);
|
||||||
|
final int nhet = ac - 2 * nhomvar;
|
||||||
|
|
||||||
|
final int calcAC = nhet + 2 * nhomvar;
|
||||||
|
if ( calcAC != ac )
|
||||||
|
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + ac);
|
||||||
|
|
||||||
|
return makeACTest(nhet, nhomvar, nonTypePL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantContext makeACTest(final int nhet, final int nhomvar, final int nonTypePL) {
|
||||||
|
final List<Genotype> samples = new ArrayList<Genotype>(nSamples);
|
||||||
|
for ( int i = 0; i < nhet; i++ ) samples.add(makePL(GenotypeType.HET, nonTypePL));
|
||||||
|
for ( int i = 0; i < nhomvar; i++ ) samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL));
|
||||||
|
for ( int i = 0; i < (nSamples-nhet-nhomvar); i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL));
|
||||||
|
|
||||||
|
VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles());
|
||||||
|
vcb.genotypes(samples);
|
||||||
|
return vcb.make();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Allele> getAlleles() {
|
||||||
|
return Arrays.asList(A, C, G, T).subList(0, numAltAlleles+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Allele> getAlleles(final GenotypeType type) {
|
||||||
|
switch (type) {
|
||||||
|
case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0));
|
||||||
|
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(1));
|
||||||
|
case HOM_VAR: return Arrays.asList(getAlleles().get(1), getAlleles().get(1));
|
||||||
|
default: throw new IllegalArgumentException("Unexpected type " + type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype makePL(final List<Allele> expectedGT, int ... pls) {
|
||||||
|
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||||
|
gb.alleles(expectedGT);
|
||||||
|
gb.PL(pls);
|
||||||
|
return gb.make();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype makePL(final GenotypeType type, final int nonTypePL) {
|
||||||
|
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||||
|
gb.alleles(getAlleles(type));
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case HOM_REF: gb.PL(new double[]{0, nonTypePL, nonTypePL}); break;
|
||||||
|
case HET: gb.PL(new double[]{nonTypePL, 0, nonTypePL}); break;
|
||||||
|
case HOM_VAR: gb.PL(new double[]{nonTypePL, nonTypePL, 0}); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gb.make();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -236,6 +236,12 @@ public class Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> List<T> append(final List<T> left, T ... elts) {
|
||||||
|
final List<T> l = new LinkedList<T>(left);
|
||||||
|
l.addAll(Arrays.asList(elts));
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a string of the values in joined by separator, such as A,B,C
|
* Returns a string of the values in joined by separator, such as A,B,C
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue