Improve interface of ExactCallLogger, use it to have a more informative AFCalcPerformanceTest

This commit is contained in:
Mark DePristo 2012-10-17 08:34:47 -04:00
parent d6be657966
commit c9e7a947c2
3 changed files with 73 additions and 34 deletions

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -225,12 +226,24 @@ public class AFCalcPerformanceTest {
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcTestBuilder.PriorType.human);
logger.info(call);
final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
call.newNanoTime = timer.getElapsedTimeNano();
call.newPNonRef = result.getLog10PosteriorOfAFGT0();
logger.info(call);
logger.info("\t\t" + result);
final long newNanoTime = timer.getElapsedTimeNano();
if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
logger.info("**** ONE IS POLY");
}
logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0());
final double speedup = call.runtime / (1.0 * newNanoTime);
logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x");
for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) {
if ( a.isNonReference() ) {
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
}
}
}
}

View File

@ -238,6 +238,19 @@ public class AFCalcResult {
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
}
/**
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
*
* @param log10minPNonRef the confidence threshold, in log10 space
* @return true if any are poly, false otherwise
*/
public boolean anyPolymorphic(final double log10minPNonRef) {
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) )
return true;
return false;
}
/**
* Returns the log10 probability that allele is segregating
*

View File

@ -1,20 +1,18 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
/**
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
*
* Once opened, calls can be writen to disk with printCallInfo
*/
public class ExactCallLogger implements Cloneable {
private PrintStream callReport = null;
@ -38,32 +36,28 @@ public class ExactCallLogger implements Cloneable {
*/
public static class ExactCall {
final VariantContext vc;
final long origNanoTime;
long newNanoTime = -1;
final double origPNonRef;
double newPNonRef = -1;
final long runtime;
final AFCalcResult originalCall;
public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) {
public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
this.vc = vc;
this.origNanoTime = origNanoTime;
this.origPNonRef = origPNonRef;
this.runtime = runtime;
this.originalCall = originalCall;
}
@Override
public String toString() {
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s",
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
origPNonRef,
new AutoFormattingTime(origNanoTime / 1e9).toString(),
newPNonRef,
newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString());
originalCall.getLog10PosteriorOfAFGT0(),
new AutoFormattingTime(runtime / 1e9).toString());
}
}
protected void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors,
final long runtimeNano,
final AFCalcResult result) {
protected final void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors,
final long runtimeNano,
final AFCalcResult result) {
printCallElement(vc, "type", "ignore", vc.getType());
int allelei = 0;
@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable {
callReport.flush();
}
@Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"})
private void printCallElement(final VariantContext vc,
final Object variable,
final Object key,
@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable {
* Read in a list of ExactCall objects from reader, keeping only those
* with starts in startsToKeep or all sites (if this is empty)
*
* @param reader
* @param startsToKeep
* @param parser
* @return
* @param reader a just-opened reader sitting at the start of the file
* @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept
* @param parser a genome loc parser to create genome locs
* @return a list of ExactCall objects in reader
* @throws IOException
*/
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable {
// skip the header line
reader.readLine();
// skip the first "type" line
reader.readLine();
while (true) {
final VariantContextBuilder builder = new VariantContextBuilder();
final List<Allele> alleles = new ArrayList<Allele>();
final List<Genotype> genotypes = new ArrayList<Genotype>();
final double[] posteriors = new double[2];
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final List<Integer> mle = new ArrayList<Integer>();
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
long runtimeNano = -1;
GenomeLoc currentLoc = null;
@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable {
if (currentLoc == null)
currentLoc = lineLoc;
if (variable.equals("log10PosteriorOfAFzero")) {
if (variable.equals("type")) {
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
builder.alleles(alleles);
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes);
calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value)));
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
}
break;
} else if (variable.equals("allele")) {
@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable {
final GenotypeBuilder gb = new GenotypeBuilder(key);
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
genotypes.add(gb.make());
} else if (variable.equals("log10PosteriorOfAFEq0")) {
posteriors[0] = Double.valueOf(value);
} else if (variable.equals("log10PosteriorOfAFGt0")) {
posteriors[1] = Double.valueOf(value);
} else if (variable.equals("MLE")) {
mle.add(Integer.valueOf(value));
} else if (variable.equals("pNonRefByAllele")) {
final Allele a = Allele.create(key);
log10pNonRefByAllele.put(a, Double.valueOf(value));
} else if (variable.equals("runtime.nano")) {
runtimeNano = Long.valueOf(value);
} else {