Generalize framework for evaluating the performance and scaling of the ExactAF models to tri-allelic variants

-- Wow, big performance problems with multi-allelic exact model!
This commit is contained in:
Mark DePristo 2012-10-02 19:17:37 -05:00
parent 3663fe1555
commit 50e4a832ea
3 changed files with 102 additions and 39 deletions

View File

@ -47,7 +47,7 @@ public class ExactAFCalculationPerformanceTest {
private static class AnalyzeByACAndPL extends Analysis {
public AnalyzeByACAndPL(final List<String> columns) {
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac"));
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac", "n.alt.seg", "other.ac"));
}
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
@ -57,19 +57,48 @@ public class ExactAFCalculationPerformanceTest {
final ExactAFCalculation calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
for ( int ac = 0; ac < testBuilder.getnSamples(); ac++ ) {
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL);
timer.start();
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
final long runtime = timer.getElapsedTimeNano();
int otherAC = 0;
int nAltSeg = 0;
for ( int i = 0; i < ACs.length; i++ ) {
nAltSeg += ACs[i] > 0 ? 1 : 0;
if ( i > 0 ) otherAC += ACs[i];
}
final List<Object> columns = new LinkedList<Object>(coreValues);
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ac));
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ACs[0], nAltSeg, otherAC));
report.addRowList(columns);
}
}
}
private List<int[]> makeACs(final int nAltAlleles, final int nChrom) {
if ( nAltAlleles > 2 ) throw new IllegalArgumentException("nAltAlleles must be < 3");
final List<int[]> ACs = new LinkedList<int[]>();
if ( nAltAlleles == 1 )
for ( int i = 0; i < nChrom; i++ ) {
ACs.add(new int[]{i});
} else if ( nAltAlleles == 2 ) {
for ( int i = 0; i < nChrom; i++ ) {
for ( int j : Arrays.asList(0, 1, 5, 10, 50, 100, 1000, 10000, 100000) ) {
if ( j < nChrom - i )
ACs.add(new int[]{i, j});
}
}
} else {
throw new IllegalStateException("cannot get here");
}
return ACs;
}
}
private static class AnalyzeBySingletonPosition extends Analysis {
@ -80,11 +109,12 @@ public class ExactAFCalculationPerformanceTest {
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
final SimpleTimer timer = new SimpleTimer();
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
for ( final int nonTypePL : Arrays.asList(100) ) {
final ExactAFCalculation calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
int ac = 1;
final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
for ( int position = 0; position < vc.getNSamples(); position++ ) {
@ -113,11 +143,12 @@ public class ExactAFCalculationPerformanceTest {
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
final SimpleTimer timer = new SimpleTimer();
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
for ( final int nonTypePL : Arrays.asList(100) ) {
final ExactAFCalculation calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
int ac = 1;
final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
@ -159,21 +190,26 @@ public class ExactAFCalculationPerformanceTest {
? Arrays.asList(ExactAFCalculationTestBuilder.PriorType.values())
: Arrays.asList(ExactAFCalculationTestBuilder.PriorType.human);
final int MAX_N_SAMPLES_FOR_MULTI_ALLELIC = 100;
final List<Analysis> analyzes = new ArrayList<Analysis>();
analyzes.add(new AnalyzeByACAndPL(coreColumns));
analyzes.add(new AnalyzeBySingletonPosition(coreColumns));
analyzes.add(new AnalyzeByNonInformative(coreColumns));
for ( int iteration = 0; iteration < 1; iteration++ ) {
for ( final int nAltAlleles : Arrays.asList(1) ) {
for ( final int nSamples : Arrays.asList(1, 10, 100) ) {
for ( final int nAltAlleles : Arrays.asList(1, 2) ) {
for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) {
if ( nSamples > MAX_N_SAMPLES_FOR_MULTI_ALLELIC && nAltAlleles > 1 )
continue; // skip things that will take forever!
for ( final ExactAFCalculationTestBuilder.ModelType modelType : modelTypes ) {
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
final ExactAFCalculationTestBuilder testBuilder
= new ExactAFCalculationTestBuilder(nSamples, 1, modelType, priorType);
= new ExactAFCalculationTestBuilder(nSamples, nAltAlleles, modelType, priorType);
for ( final Analysis analysis : analyzes ) {
logger.info(Utils.join("\t", Arrays.asList(iteration, nSamples, modelType, priorType, analysis.getName())));
logger.info(Utils.join("\t", Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType, analysis.getName())));
final List<?> values = Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType);
analysis.run(testBuilder, (List<Object>)values);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.ArrayList;
@ -65,24 +66,45 @@ public class ExactAFCalculationTestBuilder {
}
}
public VariantContext makeACTest(final int ac, final int nonTypePL) {
public VariantContext makeACTest(final int[] ACs, final int nonTypePL) {
final int nChrom = nSamples * 2;
final double p = ac / (1.0 * nChrom);
final int nhomvar = (int)Math.floor(nChrom * p * p);
final int nhet = ac - 2 * nhomvar;
final int calcAC = nhet + 2 * nhomvar;
if ( calcAC != ac )
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + ac);
final int[] nhet = new int[numAltAlleles];
final int[] nhomvar = new int[numAltAlleles];
for ( int i = 0; i < ACs.length; i++ ) {
final double p = ACs[i] / (1.0 * nChrom);
nhomvar[i] = (int)Math.floor(nSamples * p * p);
nhet[i] = ACs[i] - 2 * nhomvar[i];
if ( nhet[i] < 0 )
throw new IllegalStateException("Bug!");
}
final long calcAC = MathUtils.sum(nhet) + 2 * MathUtils.sum(nhomvar);
if ( calcAC != MathUtils.sum(ACs) )
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs));
return makeACTest(nhet, nhomvar, nonTypePL);
}
public VariantContext makeACTest(final int nhet, final int nhomvar, final int nonTypePL) {
final List<Genotype> samples = new ArrayList<Genotype>(nSamples);
for ( int i = 0; i < nhet; i++ ) samples.add(makePL(GenotypeType.HET, nonTypePL));
for ( int i = 0; i < nhomvar; i++ ) samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL));
for ( int i = 0; i < (nSamples-nhet-nhomvar); i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL));
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) {
List<Genotype> samples = new ArrayList<Genotype>(nSamples);
for ( int altI = 0; altI < nhet.length; altI++ ) {
for ( int i = 0; i < nhet[altI]; i++ )
samples.add(makePL(GenotypeType.HET, nonTypePL, altI+1));
for ( int i = 0; i < nhomvar[altI]; i++ )
samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1));
}
final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar));
for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0));
samples = samples.subList(0, nSamples);
if ( samples.size() > nSamples )
throw new IllegalStateException("too many samples");
VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles());
vcb.genotypes(samples);
@ -93,11 +115,11 @@ public class ExactAFCalculationTestBuilder {
return Arrays.asList(A, C, G, T).subList(0, numAltAlleles+1);
}
public List<Allele> getAlleles(final GenotypeType type) {
public List<Allele> getAlleles(final GenotypeType type, final int altI) {
switch (type) {
case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0));
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(1));
case HOM_VAR: return Arrays.asList(getAlleles().get(1), getAlleles().get(1));
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(altI));
case HOM_VAR: return Arrays.asList(getAlleles().get(altI), getAlleles().get(altI));
default: throw new IllegalArgumentException("Unexpected type " + type);
}
}
@ -109,15 +131,25 @@ public class ExactAFCalculationTestBuilder {
return gb.make();
}
public Genotype makePL(final GenotypeType type, final int nonTypePL) {
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
gb.alleles(getAlleles(type));
private int numPLs() {
return GenotypeLikelihoods.numLikelihoods(numAltAlleles+1, 2);
}
switch (type) {
case HOM_REF: gb.PL(new double[]{0, nonTypePL, nonTypePL}); break;
case HET: gb.PL(new double[]{nonTypePL, 0, nonTypePL}); break;
case HOM_VAR: gb.PL(new double[]{nonTypePL, nonTypePL, 0}); break;
public Genotype makePL(final GenotypeType type, final int nonTypePL, final int altI) {
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
gb.alleles(getAlleles(type, altI));
final int[] pls = new int[numPLs()];
Arrays.fill(pls, nonTypePL);
int index = 0;
switch ( type ) {
case HOM_REF: index = GenotypeLikelihoods.calculatePLindex(0, 0); break;
case HET: index = GenotypeLikelihoods.calculatePLindex(0, altI); break;
case HOM_VAR: index = GenotypeLikelihoods.calculatePLindex(altI, altI); break;
}
pls[index] = 0;
gb.PL(pls);
return gb.make();
}

View File

@ -17,7 +17,6 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C");
static Allele G = Allele.create("G");
static Allele T = Allele.create("T");
static int sampleNameCounter = 0;
static Genotype AA1, AB1, BB1, NON_INFORMATIVE1;
@ -101,10 +100,6 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
Allele.create("T")).subList(0, numAltAlleles+1);
}
public boolean isNonRef() {
return expectedACs[0] < getVC().getNSamples() * 2;
}
public int getExpectedAltAC(final int alleleI) {
return expectedACs[alleleI+1];
}