Framework for evaluating the performance and scaling of the ExactAF models
This commit is contained in:
parent
17ca543937
commit
3663fe1555
|
|
@ -0,0 +1,192 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.ConsoleAppender;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.SimpleLayout;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 10/2/12
|
||||
* Time: 10:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ExactAFCalculationPerformanceTest {
|
||||
final static Logger logger = Logger.getLogger(ExactAFCalculationPerformanceTest.class);
|
||||
|
||||
private static abstract class Analysis {
|
||||
final GATKReport report;
|
||||
|
||||
public Analysis(final String name, final List<String> columns) {
|
||||
report = GATKReport.newSimpleReport(name, columns);
|
||||
}
|
||||
|
||||
public abstract void run(final ExactAFCalculationTestBuilder testBuilder,
|
||||
final List<Object> coreColumns);
|
||||
|
||||
public String getName() {
|
||||
return getTable().getTableName();
|
||||
}
|
||||
|
||||
public GATKReportTable getTable() {
|
||||
return report.getTables().iterator().next();
|
||||
}
|
||||
}
|
||||
|
||||
private static class AnalyzeByACAndPL extends Analysis {
|
||||
public AnalyzeByACAndPL(final List<String> columns) {
|
||||
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac"));
|
||||
}
|
||||
|
||||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
|
||||
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
for ( int ac = 0; ac < testBuilder.getnSamples(); ac++ ) {
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ac));
|
||||
report.addRowList(columns);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class AnalyzeBySingletonPosition extends Analysis {
|
||||
public AnalyzeBySingletonPosition(final List<String> columns) {
|
||||
super("AnalyzeBySingletonPosition", Utils.append(columns, "non.type.pls", "position.of.singleton"));
|
||||
}
|
||||
|
||||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
|
||||
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
int ac = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
|
||||
for ( int position = 0; position < vc.getNSamples(); position++ ) {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>(vc.getGenotypes());
|
||||
Collections.rotate(genotypes, position);
|
||||
vcb.genotypes(genotypes);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, position));
|
||||
report.addRowList(columns);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class AnalyzeByNonInformative extends Analysis {
|
||||
public AnalyzeByNonInformative(final List<String> columns) {
|
||||
super("AnalyzeByNonInformative", Utils.append(columns, "non.type.pls", "n.non.informative"));
|
||||
}
|
||||
|
||||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
|
||||
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
int ac = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
|
||||
|
||||
for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1));
|
||||
genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative));
|
||||
vcb.genotypes(genotypes);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, nNonInformative));
|
||||
report.addRowList(columns);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
logger.addAppender(new ConsoleAppender(new SimpleLayout()));
|
||||
|
||||
final List<String> coreColumns = Arrays.asList("iteration", "n.alt.alleles", "n.samples",
|
||||
"exact.model", "prior.type", "runtime", "n.evaluations");
|
||||
|
||||
final PrintStream out = new PrintStream(new FileOutputStream(args[0]));
|
||||
|
||||
final boolean USE_GENERAL = false;
|
||||
final List<ExactAFCalculationTestBuilder.ModelType> modelTypes = USE_GENERAL
|
||||
? Arrays.asList(ExactAFCalculationTestBuilder.ModelType.values())
|
||||
: Arrays.asList(ExactAFCalculationTestBuilder.ModelType.DiploidExact);
|
||||
|
||||
final boolean ONLY_HUMAN_PRIORS = false;
|
||||
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
|
||||
? Arrays.asList(ExactAFCalculationTestBuilder.PriorType.values())
|
||||
: Arrays.asList(ExactAFCalculationTestBuilder.PriorType.human);
|
||||
|
||||
final List<Analysis> analyzes = new ArrayList<Analysis>();
|
||||
analyzes.add(new AnalyzeByACAndPL(coreColumns));
|
||||
analyzes.add(new AnalyzeBySingletonPosition(coreColumns));
|
||||
analyzes.add(new AnalyzeByNonInformative(coreColumns));
|
||||
|
||||
for ( int iteration = 0; iteration < 1; iteration++ ) {
|
||||
for ( final int nAltAlleles : Arrays.asList(1) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 10, 100) ) {
|
||||
for ( final ExactAFCalculationTestBuilder.ModelType modelType : modelTypes ) {
|
||||
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
|
||||
final ExactAFCalculationTestBuilder testBuilder
|
||||
= new ExactAFCalculationTestBuilder(nSamples, 1, modelType, priorType);
|
||||
|
||||
for ( final Analysis analysis : analyzes ) {
|
||||
logger.info(Utils.join("\t", Arrays.asList(iteration, nSamples, modelType, priorType, analysis.getName())));
|
||||
final List<?> values = Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType);
|
||||
analysis.run(testBuilder, (List<Object>)values);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final GATKReport report = new GATKReport();
|
||||
for ( final Analysis analysis : analyzes )
|
||||
report.addTable(analysis.getTable());
|
||||
report.print(out);
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class ExactAFCalculationTestBuilder {
|
||||
final static Allele A = Allele.create("A", true);
|
||||
final static Allele C = Allele.create("C");
|
||||
final static Allele G = Allele.create("G");
|
||||
final static Allele T = Allele.create("T");
|
||||
|
||||
static int sampleNameCounter = 0;
|
||||
|
||||
final int nSamples;
|
||||
final int numAltAlleles;
|
||||
final ModelType modelType;
|
||||
final PriorType priorType;
|
||||
|
||||
public ExactAFCalculationTestBuilder(final int nSamples, final int numAltAlleles,
|
||||
final ModelType modelType, final PriorType priorType) {
|
||||
this.nSamples = nSamples;
|
||||
this.numAltAlleles = numAltAlleles;
|
||||
this.modelType = modelType;
|
||||
this.priorType = priorType;
|
||||
}
|
||||
|
||||
public enum ModelType {
|
||||
DiploidExact,
|
||||
GeneralExact
|
||||
}
|
||||
|
||||
public enum PriorType {
|
||||
flat,
|
||||
human
|
||||
}
|
||||
|
||||
public int getnSamples() {
|
||||
return nSamples;
|
||||
}
|
||||
|
||||
public ExactAFCalculation makeModel() {
|
||||
switch (modelType) {
|
||||
case DiploidExact: return new DiploidExactAFCalculation(nSamples, 4);
|
||||
case GeneralExact: return new GeneralPloidyExactAFCalculation(nSamples, 4, 2);
|
||||
default: throw new RuntimeException("Unexpected type " + modelType);
|
||||
}
|
||||
}
|
||||
|
||||
public double[] makePriors() {
|
||||
final int nPriorValues = 2*nSamples+1;
|
||||
|
||||
switch ( priorType ) {
|
||||
case flat:
|
||||
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
case human:
|
||||
final double[] humanPriors = new double[nPriorValues];
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues-1, humanPriors, 0.001);
|
||||
return humanPriors;
|
||||
default:
|
||||
throw new RuntimeException("Unexpected type " + priorType);
|
||||
}
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int ac, final int nonTypePL) {
|
||||
final int nChrom = nSamples * 2;
|
||||
final double p = ac / (1.0 * nChrom);
|
||||
final int nhomvar = (int)Math.floor(nChrom * p * p);
|
||||
final int nhet = ac - 2 * nhomvar;
|
||||
|
||||
final int calcAC = nhet + 2 * nhomvar;
|
||||
if ( calcAC != ac )
|
||||
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + ac);
|
||||
|
||||
return makeACTest(nhet, nhomvar, nonTypePL);
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int nhet, final int nhomvar, final int nonTypePL) {
|
||||
final List<Genotype> samples = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nhet; i++ ) samples.add(makePL(GenotypeType.HET, nonTypePL));
|
||||
for ( int i = 0; i < nhomvar; i++ ) samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL));
|
||||
for ( int i = 0; i < (nSamples-nhet-nhomvar); i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL));
|
||||
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles());
|
||||
vcb.genotypes(samples);
|
||||
return vcb.make();
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles() {
|
||||
return Arrays.asList(A, C, G, T).subList(0, numAltAlleles+1);
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles(final GenotypeType type) {
|
||||
switch (type) {
|
||||
case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0));
|
||||
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(1));
|
||||
case HOM_VAR: return Arrays.asList(getAlleles().get(1), getAlleles().get(1));
|
||||
default: throw new IllegalArgumentException("Unexpected type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
public Genotype makePL(final List<Allele> expectedGT, int ... pls) {
|
||||
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||
gb.alleles(expectedGT);
|
||||
gb.PL(pls);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
public Genotype makePL(final GenotypeType type, final int nonTypePL) {
|
||||
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||
gb.alleles(getAlleles(type));
|
||||
|
||||
switch (type) {
|
||||
case HOM_REF: gb.PL(new double[]{0, nonTypePL, nonTypePL}); break;
|
||||
case HET: gb.PL(new double[]{nonTypePL, 0, nonTypePL}); break;
|
||||
case HOM_VAR: gb.PL(new double[]{nonTypePL, nonTypePL, 0}); break;
|
||||
}
|
||||
|
||||
return gb.make();
|
||||
}
|
||||
}
|
||||
|
|
@ -236,6 +236,12 @@ public class Utils {
|
|||
}
|
||||
}
|
||||
|
||||
public static <T> List<T> append(final List<T> left, T ... elts) {
|
||||
final List<T> l = new LinkedList<T>(left);
|
||||
l.addAll(Arrays.asList(elts));
|
||||
return l;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the values in joined by separator, such as A,B,C
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue