Generalize framework for evaluating the performance and scaling of the ExactAF models to tri-allelic variants
-- Wow, big performance problems with multi-allelic exact model!
This commit is contained in:
parent
3663fe1555
commit
50e4a832ea
|
|
@ -47,7 +47,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
|
||||
private static class AnalyzeByACAndPL extends Analysis {
|
||||
public AnalyzeByACAndPL(final List<String> columns) {
|
||||
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac"));
|
||||
super("AnalyzeByACAndPL", Utils.append(columns, "non.type.pls", "ac", "n.alt.seg", "other.ac"));
|
||||
}
|
||||
|
||||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
|
|
@ -57,19 +57,48 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
for ( int ac = 0; ac < testBuilder.getnSamples(); ac++ ) {
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
|
||||
final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
int otherAC = 0;
|
||||
int nAltSeg = 0;
|
||||
for ( int i = 0; i < ACs.length; i++ ) {
|
||||
nAltSeg += ACs[i] > 0 ? 1 : 0;
|
||||
if ( i > 0 ) otherAC += ACs[i];
|
||||
}
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ac));
|
||||
columns.addAll(Arrays.asList(runtime, result.getnEvaluations(), nonTypePL, ACs[0], nAltSeg, otherAC));
|
||||
report.addRowList(columns);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<int[]> makeACs(final int nAltAlleles, final int nChrom) {
|
||||
if ( nAltAlleles > 2 ) throw new IllegalArgumentException("nAltAlleles must be < 3");
|
||||
|
||||
final List<int[]> ACs = new LinkedList<int[]>();
|
||||
|
||||
if ( nAltAlleles == 1 )
|
||||
for ( int i = 0; i < nChrom; i++ ) {
|
||||
ACs.add(new int[]{i});
|
||||
} else if ( nAltAlleles == 2 ) {
|
||||
for ( int i = 0; i < nChrom; i++ ) {
|
||||
for ( int j : Arrays.asList(0, 1, 5, 10, 50, 100, 1000, 10000, 100000) ) {
|
||||
if ( j < nChrom - i )
|
||||
ACs.add(new int[]{i, j});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("cannot get here");
|
||||
}
|
||||
|
||||
return ACs;
|
||||
}
|
||||
}
|
||||
|
||||
private static class AnalyzeBySingletonPosition extends Analysis {
|
||||
|
|
@ -80,11 +109,12 @@ public class ExactAFCalculationPerformanceTest {
|
|||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
|
||||
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||
for ( final int nonTypePL : Arrays.asList(100) ) {
|
||||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
int ac = 1;
|
||||
final int[] ac = new int[testBuilder.numAltAlleles];
|
||||
ac[0] = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
|
||||
for ( int position = 0; position < vc.getNSamples(); position++ ) {
|
||||
|
|
@ -113,11 +143,12 @@ public class ExactAFCalculationPerformanceTest {
|
|||
public void run(final ExactAFCalculationTestBuilder testBuilder, final List<Object> coreValues) {
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
|
||||
for ( final int nonTypePL : Arrays.asList(10, 100, 1000) ) {
|
||||
for ( final int nonTypePL : Arrays.asList(100) ) {
|
||||
final ExactAFCalculation calc = testBuilder.makeModel();
|
||||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
int ac = 1;
|
||||
final int[] ac = new int[testBuilder.numAltAlleles];
|
||||
ac[0] = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
|
||||
|
||||
|
|
@ -159,21 +190,26 @@ public class ExactAFCalculationPerformanceTest {
|
|||
? Arrays.asList(ExactAFCalculationTestBuilder.PriorType.values())
|
||||
: Arrays.asList(ExactAFCalculationTestBuilder.PriorType.human);
|
||||
|
||||
final int MAX_N_SAMPLES_FOR_MULTI_ALLELIC = 100;
|
||||
|
||||
final List<Analysis> analyzes = new ArrayList<Analysis>();
|
||||
analyzes.add(new AnalyzeByACAndPL(coreColumns));
|
||||
analyzes.add(new AnalyzeBySingletonPosition(coreColumns));
|
||||
analyzes.add(new AnalyzeByNonInformative(coreColumns));
|
||||
|
||||
for ( int iteration = 0; iteration < 1; iteration++ ) {
|
||||
for ( final int nAltAlleles : Arrays.asList(1) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 10, 100) ) {
|
||||
for ( final int nAltAlleles : Arrays.asList(1, 2) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) {
|
||||
if ( nSamples > MAX_N_SAMPLES_FOR_MULTI_ALLELIC && nAltAlleles > 1 )
|
||||
continue; // skip things that will take forever!
|
||||
|
||||
for ( final ExactAFCalculationTestBuilder.ModelType modelType : modelTypes ) {
|
||||
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
|
||||
final ExactAFCalculationTestBuilder testBuilder
|
||||
= new ExactAFCalculationTestBuilder(nSamples, 1, modelType, priorType);
|
||||
= new ExactAFCalculationTestBuilder(nSamples, nAltAlleles, modelType, priorType);
|
||||
|
||||
for ( final Analysis analysis : analyzes ) {
|
||||
logger.info(Utils.join("\t", Arrays.asList(iteration, nSamples, modelType, priorType, analysis.getName())));
|
||||
logger.info(Utils.join("\t", Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType, analysis.getName())));
|
||||
final List<?> values = Arrays.asList(iteration, nAltAlleles, nSamples, modelType, priorType);
|
||||
analysis.run(testBuilder, (List<Object>)values);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -65,24 +66,45 @@ public class ExactAFCalculationTestBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int ac, final int nonTypePL) {
|
||||
public VariantContext makeACTest(final int[] ACs, final int nonTypePL) {
|
||||
final int nChrom = nSamples * 2;
|
||||
final double p = ac / (1.0 * nChrom);
|
||||
final int nhomvar = (int)Math.floor(nChrom * p * p);
|
||||
final int nhet = ac - 2 * nhomvar;
|
||||
|
||||
final int calcAC = nhet + 2 * nhomvar;
|
||||
if ( calcAC != ac )
|
||||
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + ac);
|
||||
final int[] nhet = new int[numAltAlleles];
|
||||
final int[] nhomvar = new int[numAltAlleles];
|
||||
|
||||
for ( int i = 0; i < ACs.length; i++ ) {
|
||||
final double p = ACs[i] / (1.0 * nChrom);
|
||||
nhomvar[i] = (int)Math.floor(nSamples * p * p);
|
||||
nhet[i] = ACs[i] - 2 * nhomvar[i];
|
||||
|
||||
if ( nhet[i] < 0 )
|
||||
throw new IllegalStateException("Bug!");
|
||||
}
|
||||
|
||||
final long calcAC = MathUtils.sum(nhet) + 2 * MathUtils.sum(nhomvar);
|
||||
if ( calcAC != MathUtils.sum(ACs) )
|
||||
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs));
|
||||
|
||||
return makeACTest(nhet, nhomvar, nonTypePL);
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int nhet, final int nhomvar, final int nonTypePL) {
|
||||
final List<Genotype> samples = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nhet; i++ ) samples.add(makePL(GenotypeType.HET, nonTypePL));
|
||||
for ( int i = 0; i < nhomvar; i++ ) samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL));
|
||||
for ( int i = 0; i < (nSamples-nhet-nhomvar); i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL));
|
||||
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) {
|
||||
List<Genotype> samples = new ArrayList<Genotype>(nSamples);
|
||||
|
||||
for ( int altI = 0; altI < nhet.length; altI++ ) {
|
||||
for ( int i = 0; i < nhet[altI]; i++ )
|
||||
samples.add(makePL(GenotypeType.HET, nonTypePL, altI+1));
|
||||
for ( int i = 0; i < nhomvar[altI]; i++ )
|
||||
samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1));
|
||||
}
|
||||
|
||||
final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar));
|
||||
for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0));
|
||||
|
||||
samples = samples.subList(0, nSamples);
|
||||
|
||||
if ( samples.size() > nSamples )
|
||||
throw new IllegalStateException("too many samples");
|
||||
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles());
|
||||
vcb.genotypes(samples);
|
||||
|
|
@ -93,11 +115,11 @@ public class ExactAFCalculationTestBuilder {
|
|||
return Arrays.asList(A, C, G, T).subList(0, numAltAlleles+1);
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles(final GenotypeType type) {
|
||||
public List<Allele> getAlleles(final GenotypeType type, final int altI) {
|
||||
switch (type) {
|
||||
case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0));
|
||||
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(1));
|
||||
case HOM_VAR: return Arrays.asList(getAlleles().get(1), getAlleles().get(1));
|
||||
case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(altI));
|
||||
case HOM_VAR: return Arrays.asList(getAlleles().get(altI), getAlleles().get(altI));
|
||||
default: throw new IllegalArgumentException("Unexpected type " + type);
|
||||
}
|
||||
}
|
||||
|
|
@ -109,15 +131,25 @@ public class ExactAFCalculationTestBuilder {
|
|||
return gb.make();
|
||||
}
|
||||
|
||||
public Genotype makePL(final GenotypeType type, final int nonTypePL) {
|
||||
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||
gb.alleles(getAlleles(type));
|
||||
private int numPLs() {
|
||||
return GenotypeLikelihoods.numLikelihoods(numAltAlleles+1, 2);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case HOM_REF: gb.PL(new double[]{0, nonTypePL, nonTypePL}); break;
|
||||
case HET: gb.PL(new double[]{nonTypePL, 0, nonTypePL}); break;
|
||||
case HOM_VAR: gb.PL(new double[]{nonTypePL, nonTypePL, 0}); break;
|
||||
public Genotype makePL(final GenotypeType type, final int nonTypePL, final int altI) {
|
||||
GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++);
|
||||
gb.alleles(getAlleles(type, altI));
|
||||
|
||||
final int[] pls = new int[numPLs()];
|
||||
Arrays.fill(pls, nonTypePL);
|
||||
|
||||
int index = 0;
|
||||
switch ( type ) {
|
||||
case HOM_REF: index = GenotypeLikelihoods.calculatePLindex(0, 0); break;
|
||||
case HET: index = GenotypeLikelihoods.calculatePLindex(0, altI); break;
|
||||
case HOM_VAR: index = GenotypeLikelihoods.calculatePLindex(altI, altI); break;
|
||||
}
|
||||
pls[index] = 0;
|
||||
gb.PL(pls);
|
||||
|
||||
return gb.make();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
static Allele A = Allele.create("A", true);
|
||||
static Allele C = Allele.create("C");
|
||||
static Allele G = Allele.create("G");
|
||||
static Allele T = Allele.create("T");
|
||||
|
||||
static int sampleNameCounter = 0;
|
||||
static Genotype AA1, AB1, BB1, NON_INFORMATIVE1;
|
||||
|
|
@ -101,10 +100,6 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
Allele.create("T")).subList(0, numAltAlleles+1);
|
||||
}
|
||||
|
||||
public boolean isNonRef() {
|
||||
return expectedACs[0] < getVC().getNSamples() * 2;
|
||||
}
|
||||
|
||||
public int getExpectedAltAC(final int alleleI) {
|
||||
return expectedACs[alleleI+1];
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue