Improve interface of ExactCallLogger, use it to have a more informative AFCalcPerformanceTest

This commit is contained in:
Mark DePristo 2012-10-17 08:34:47 -04:00
parent d6be657966
commit c9e7a947c2
3 changed files with 73 additions and 34 deletions

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -225,12 +226,24 @@ public class AFCalcPerformanceTest {
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcTestBuilder.PriorType.human); AFCalcTestBuilder.PriorType.human);
logger.info(call);
final SimpleTimer timer = new SimpleTimer().start(); final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
call.newNanoTime = timer.getElapsedTimeNano(); final long newNanoTime = timer.getElapsedTimeNano();
call.newPNonRef = result.getLog10PosteriorOfAFGT0(); if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
logger.info(call); logger.info("**** ONE IS POLY");
logger.info("\t\t" + result); }
logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0());
final double speedup = call.runtime / (1.0 * newNanoTime);
logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x");
for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) {
if ( a.isNonReference() ) {
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
}
}
} }
} }

View File

@ -238,6 +238,19 @@ public class AFCalcResult {
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
} }
/**
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
*
* @param log10minPNonRef the confidence threshold, in log10 space
* @return true if any are poly, false otherwise
*/
public boolean anyPolymorphic(final double log10minPNonRef) {
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) )
return true;
return false;
}
/** /**
* Returns the log10 probability that allele is segregating * Returns the log10 probability that allele is segregating
* *

View File

@ -1,20 +1,18 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.AutoFormattingTime; import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc; import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.*; import java.io.*;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/** /**
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
*
* Once opened, calls can be writen to disk with printCallInfo
*/ */
public class ExactCallLogger implements Cloneable { public class ExactCallLogger implements Cloneable {
private PrintStream callReport = null; private PrintStream callReport = null;
@ -38,29 +36,25 @@ public class ExactCallLogger implements Cloneable {
*/ */
public static class ExactCall { public static class ExactCall {
final VariantContext vc; final VariantContext vc;
final long origNanoTime; final long runtime;
long newNanoTime = -1; final AFCalcResult originalCall;
final double origPNonRef;
double newPNonRef = -1;
public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
this.vc = vc; this.vc = vc;
this.origNanoTime = origNanoTime; this.runtime = runtime;
this.origPNonRef = origPNonRef; this.originalCall = originalCall;
} }
@Override @Override
public String toString() { public String toString() {
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
origPNonRef, originalCall.getLog10PosteriorOfAFGT0(),
new AutoFormattingTime(origNanoTime / 1e9).toString(), new AutoFormattingTime(runtime / 1e9).toString());
newPNonRef,
newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString());
} }
} }
protected void printCallInfo(final VariantContext vc, protected final void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors, final double[] log10AlleleFrequencyPriors,
final long runtimeNano, final long runtimeNano,
final AFCalcResult result) { final AFCalcResult result) {
@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable {
callReport.flush(); callReport.flush();
} }
@Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"})
private void printCallElement(final VariantContext vc, private void printCallElement(final VariantContext vc,
final Object variable, final Object variable,
final Object key, final Object key,
@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable {
* Read in a list of ExactCall objects from reader, keeping only those * Read in a list of ExactCall objects from reader, keeping only those
* with starts in startsToKeep or all sites (if this is empty) * with starts in startsToKeep or all sites (if this is empty)
* *
* @param reader * @param reader a just-opened reader sitting at the start of the file
* @param startsToKeep * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept
* @param parser * @param parser a genome loc parser to create genome locs
* @return * @return a list of ExactCall objects in reader
* @throws IOException * @throws IOException
*/ */
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable {
// skip the header line // skip the header line
reader.readLine(); reader.readLine();
// skip the first "type" line
reader.readLine();
while (true) { while (true) {
final VariantContextBuilder builder = new VariantContextBuilder(); final VariantContextBuilder builder = new VariantContextBuilder();
final List<Allele> alleles = new ArrayList<Allele>(); final List<Allele> alleles = new ArrayList<Allele>();
final List<Genotype> genotypes = new ArrayList<Genotype>(); final List<Genotype> genotypes = new ArrayList<Genotype>();
final double[] posteriors = new double[2];
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final List<Integer> mle = new ArrayList<Integer>();
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
long runtimeNano = -1; long runtimeNano = -1;
GenomeLoc currentLoc = null; GenomeLoc currentLoc = null;
@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable {
if (currentLoc == null) if (currentLoc == null)
currentLoc = lineLoc; currentLoc = lineLoc;
if (variable.equals("log10PosteriorOfAFzero")) { if (variable.equals("type")) {
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
builder.alleles(alleles); builder.alleles(alleles);
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes); builder.genotypes(genotypes);
calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
} }
break; break;
} else if (variable.equals("allele")) { } else if (variable.equals("allele")) {
@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable {
final GenotypeBuilder gb = new GenotypeBuilder(key); final GenotypeBuilder gb = new GenotypeBuilder(key);
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
genotypes.add(gb.make()); genotypes.add(gb.make());
} else if (variable.equals("log10PosteriorOfAFEq0")) {
posteriors[0] = Double.valueOf(value);
} else if (variable.equals("log10PosteriorOfAFGt0")) {
posteriors[1] = Double.valueOf(value);
} else if (variable.equals("MLE")) {
mle.add(Integer.valueOf(value));
} else if (variable.equals("pNonRefByAllele")) {
final Allele a = Allele.create(key);
log10pNonRefByAllele.put(a, Double.valueOf(value));
} else if (variable.equals("runtime.nano")) { } else if (variable.equals("runtime.nano")) {
runtimeNano = Long.valueOf(value); runtimeNano = Long.valueOf(value);
} else { } else {