Generalize testing infrastructure to allow us to run specific n.samples calculation

This commit is contained in:
Mark DePristo 2012-10-07 19:43:55 -04:00
parent ec935f76f6
commit c82aa01e0e
3 changed files with 92 additions and 21 deletions

View File

@ -2,9 +2,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;
import org.apache.log4j.TTCCLayout;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -82,18 +83,21 @@ public class ExactAFCalculationPerformanceTest {
final List<int[]> ACs = new LinkedList<int[]>();
if ( nAltAlleles == 1 )
for ( int i = 0; i < nChrom; i++ ) {
ACs.add(new int[]{i});
} else if ( nAltAlleles == 2 ) {
for ( int i = 0; i < nChrom; i++ ) {
for ( int j : Arrays.asList(0, 1, 5, 10, 50, 100, 1000, 10000, 100000) ) {
if ( j < nChrom - i )
ACs.add(new int[]{i, j});
final List<Integer> ACsToTry = MathUtils.log10LinearRange(0, nChrom, 0.1); //Arrays.asList(0, 1, 2, 3, 6, 10, 20, 40, 60, 100, 200, 400, 600, 1000, 2000, 4000, 6000, 10000, 100000);
for ( int i : ACsToTry ) {
if ( i < nChrom ) {
if ( nAltAlleles == 1 ) {
ACs.add(new int[]{i});
} else if ( nAltAlleles == 2 ) {
for ( int j : ACsToTry ) {
if ( j < nChrom - i )
ACs.add(new int[]{i, j});
}
} else {
throw new IllegalStateException("cannot get here");
}
}
} else {
throw new IllegalStateException("cannot get here");
}
return ACs;
@ -116,7 +120,7 @@ public class ExactAFCalculationPerformanceTest {
ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, 0, nonTypePL);
for ( int position = 0; position < vc.getNSamples(); position++ ) {
for ( final int position : MathUtils.log10LinearRange(0, vc.getNSamples(), 0.1) ) {
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
final List<Genotype> genotypes = new ArrayList<Genotype>(vc.getGenotypes());
Collections.rotate(genotypes, position);
@ -184,19 +188,54 @@ public class ExactAFCalculationPerformanceTest {
}
}
public enum Operation {
ANALYZE,
SINGLE
}
public static void main(final String[] args) throws Exception {
logger.addAppender(new ConsoleAppender(new SimpleLayout()));
final TTCCLayout layout = new TTCCLayout();
layout.setThreadPrinting(false);
layout.setCategoryPrefixing(false);
layout.setContextPrinting(false);
logger.addAppender(new ConsoleAppender(layout));
final Operation op = Operation.valueOf(args[0]);
switch ( op ) {
case ANALYZE: analyze(args); break;
case SINGLE: profileBig(args); break;
default: throw new IllegalAccessException("unknown operation " + op);
}
}
private static void profileBig(final String[] args) throws Exception {
final int nSamples = Integer.valueOf(args[1]);
final int ac = Integer.valueOf(args[2]);
final ExactAFCalculationTestBuilder testBuilder = new ExactAFCalculationTestBuilder(nSamples, 1,
ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact,
ExactAFCalculationTestBuilder.PriorType.human);
final VariantContext vc = testBuilder.makeACTest(new int[]{ac}, 0, 100);
final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(vc, testBuilder.makePriors());
final long runtime = timer.getElapsedTimeNano();
logger.info("result " + result.getNormalizedPosteriorOfAFGTZero());
logger.info("runtime " + runtime);
}
private static void analyze(final String[] args) throws Exception {
final List<String> coreColumns = Arrays.asList("iteration", "n.alt.alleles", "n.samples",
"exact.model", "prior.type", "runtime", "n.evaluations");
final PrintStream out = new PrintStream(new FileOutputStream(args[0]));
final PrintStream out = new PrintStream(new FileOutputStream(args[1]));
final List<ModelParams> modelParams = Arrays.asList(
new ModelParams(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, 1000, 10),
new ModelParams(ExactAFCalculationTestBuilder.ModelType.ReferenceDiploidExact, 10000, 10),
// new ModelParams(ExactAFCalculationTestBuilder.ModelType.GeneralExact, 100, 10),
new ModelParams(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact, 1000, 100),
new ModelParams(ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact, 1000, 10000));
new ModelParams(ExactAFCalculationTestBuilder.ModelType.ConstrainedDiploidExact, 10000, 100),
new ModelParams(ExactAFCalculationTestBuilder.ModelType.IndependentDiploidExact, 10000, 1000));
final boolean ONLY_HUMAN_PRIORS = false;
final List<ExactAFCalculationTestBuilder.PriorType> priorTypes = ONLY_HUMAN_PRIORS
@ -211,9 +250,9 @@ public class ExactAFCalculationPerformanceTest {
for ( int iteration = 0; iteration < 1; iteration++ ) {
for ( final int nAltAlleles : Arrays.asList(1, 2) ) {
for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) {
for ( final ModelParams modelToRun : modelParams) {
if ( modelToRun.meetsConstraints(nAltAlleles, nSamples) ) {
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
for ( final ModelParams modelToRun : modelParams) {
if ( modelToRun.meetsConstraints(nAltAlleles, nSamples) ) {
for ( final ExactAFCalculationTestBuilder.PriorType priorType : priorTypes ) {
final ExactAFCalculationTestBuilder testBuilder
= new ExactAFCalculationTestBuilder(nSamples, nAltAlleles, modelToRun.modelType, priorType);

View File

@ -89,7 +89,7 @@ public class ExactAFCalculationTestBuilder {
nhet[i] = ACs[i] - 2 * nhomvar[i];
if ( nhet[i] < 0 )
throw new IllegalStateException("Bug!");
throw new IllegalStateException("Bug! nhet[i] < 0");
}
final long calcAC = MathUtils.sum(nhet) + 2 * MathUtils.sum(nhomvar);

View File

@ -1666,4 +1666,36 @@ public class MathUtils {
return result;
}
/**
* Returns a series of integer values between start and stop, inclusive,
* expontentially distributed between the two. That is, if there are
* ten values between 0-10 there will be 10 between 10-100.
*
* WARNING -- BADLY TESTED
* @param start
* @param stop
* @param eps
* @return
*/
public static List<Integer> log10LinearRange(final int start, final int stop, final double eps) {
final LinkedList<Integer> values = new LinkedList<Integer>();
final double log10range = Math.log10(stop - start);
if ( start == 0 )
values.add(0);
double i = 0.0;
while ( i <= log10range ) {
final int index = (int)Math.round(Math.pow(10, i)) + start;
if ( index < stop && (values.peekLast() == null || values.peekLast() != index ) )
values.add(index);
i += eps;
}
if ( values.peekLast() == null || values.peekLast() != stop )
values.add(stop);
return values;
}
}