Improve interface of ExactCallLogger, use it to have a more informative AFCalcPerformanceTest
This commit is contained in:
parent
d6be657966
commit
c9e7a947c2
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
@ -225,12 +226,24 @@ public class AFCalcPerformanceTest {
|
||||||
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
|
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
|
||||||
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
|
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
|
||||||
AFCalcTestBuilder.PriorType.human);
|
AFCalcTestBuilder.PriorType.human);
|
||||||
|
logger.info(call);
|
||||||
final SimpleTimer timer = new SimpleTimer().start();
|
final SimpleTimer timer = new SimpleTimer().start();
|
||||||
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
|
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
|
||||||
call.newNanoTime = timer.getElapsedTimeNano();
|
final long newNanoTime = timer.getElapsedTimeNano();
|
||||||
call.newPNonRef = result.getLog10PosteriorOfAFGT0();
|
if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
|
||||||
logger.info(call);
|
logger.info("**** ONE IS POLY");
|
||||||
logger.info("\t\t" + result);
|
}
|
||||||
|
logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0());
|
||||||
|
final double speedup = call.runtime / (1.0 * newNanoTime);
|
||||||
|
logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x");
|
||||||
|
for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) {
|
||||||
|
if ( a.isNonReference() ) {
|
||||||
|
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
|
||||||
|
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
|
||||||
|
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||||
|
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -238,6 +238,19 @@ public class AFCalcResult {
|
||||||
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
|
||||||
|
*
|
||||||
|
* @param log10minPNonRef the confidence threshold, in log10 space
|
||||||
|
* @return true if any are poly, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean anyPolymorphic(final double log10minPNonRef) {
|
||||||
|
for ( final Allele a : getAllelesUsedInGenotyping() )
|
||||||
|
if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) )
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the log10 probability that allele is segregating
|
* Returns the log10 probability that allele is segregating
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,18 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
|
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
|
||||||
|
*
|
||||||
|
* Once opened, calls can be writen to disk with printCallInfo
|
||||||
*/
|
*/
|
||||||
public class ExactCallLogger implements Cloneable {
|
public class ExactCallLogger implements Cloneable {
|
||||||
private PrintStream callReport = null;
|
private PrintStream callReport = null;
|
||||||
|
|
@ -38,29 +36,25 @@ public class ExactCallLogger implements Cloneable {
|
||||||
*/
|
*/
|
||||||
public static class ExactCall {
|
public static class ExactCall {
|
||||||
final VariantContext vc;
|
final VariantContext vc;
|
||||||
final long origNanoTime;
|
final long runtime;
|
||||||
long newNanoTime = -1;
|
final AFCalcResult originalCall;
|
||||||
final double origPNonRef;
|
|
||||||
double newPNonRef = -1;
|
|
||||||
|
|
||||||
public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) {
|
public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
|
||||||
this.vc = vc;
|
this.vc = vc;
|
||||||
this.origNanoTime = origNanoTime;
|
this.runtime = runtime;
|
||||||
this.origPNonRef = origPNonRef;
|
this.originalCall = originalCall;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s",
|
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
|
||||||
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
|
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
|
||||||
origPNonRef,
|
originalCall.getLog10PosteriorOfAFGT0(),
|
||||||
new AutoFormattingTime(origNanoTime / 1e9).toString(),
|
new AutoFormattingTime(runtime / 1e9).toString());
|
||||||
newPNonRef,
|
|
||||||
newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void printCallInfo(final VariantContext vc,
|
protected final void printCallInfo(final VariantContext vc,
|
||||||
final double[] log10AlleleFrequencyPriors,
|
final double[] log10AlleleFrequencyPriors,
|
||||||
final long runtimeNano,
|
final long runtimeNano,
|
||||||
final AFCalcResult result) {
|
final AFCalcResult result) {
|
||||||
|
|
@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable {
|
||||||
callReport.flush();
|
callReport.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"})
|
||||||
private void printCallElement(final VariantContext vc,
|
private void printCallElement(final VariantContext vc,
|
||||||
final Object variable,
|
final Object variable,
|
||||||
final Object key,
|
final Object key,
|
||||||
|
|
@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable {
|
||||||
* Read in a list of ExactCall objects from reader, keeping only those
|
* Read in a list of ExactCall objects from reader, keeping only those
|
||||||
* with starts in startsToKeep or all sites (if this is empty)
|
* with starts in startsToKeep or all sites (if this is empty)
|
||||||
*
|
*
|
||||||
* @param reader
|
* @param reader a just-opened reader sitting at the start of the file
|
||||||
* @param startsToKeep
|
* @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept
|
||||||
* @param parser
|
* @param parser a genome loc parser to create genome locs
|
||||||
* @return
|
* @return a list of ExactCall objects in reader
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
|
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
|
||||||
|
|
@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable {
|
||||||
// skip the header line
|
// skip the header line
|
||||||
reader.readLine();
|
reader.readLine();
|
||||||
|
|
||||||
|
// skip the first "type" line
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
final List<Allele> alleles = new ArrayList<Allele>();
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
final double[] posteriors = new double[2];
|
||||||
|
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||||
|
final List<Integer> mle = new ArrayList<Integer>();
|
||||||
|
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
|
||||||
long runtimeNano = -1;
|
long runtimeNano = -1;
|
||||||
|
|
||||||
GenomeLoc currentLoc = null;
|
GenomeLoc currentLoc = null;
|
||||||
|
|
@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable {
|
||||||
if (currentLoc == null)
|
if (currentLoc == null)
|
||||||
currentLoc = lineLoc;
|
currentLoc = lineLoc;
|
||||||
|
|
||||||
if (variable.equals("log10PosteriorOfAFzero")) {
|
if (variable.equals("type")) {
|
||||||
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
|
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
|
||||||
builder.alleles(alleles);
|
builder.alleles(alleles);
|
||||||
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
|
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
|
||||||
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||||
builder.genotypes(genotypes);
|
builder.genotypes(genotypes);
|
||||||
calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value)));
|
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
|
||||||
|
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
|
||||||
|
calls.add(new ExactCall(builder.make(), runtimeNano, result));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
} else if (variable.equals("allele")) {
|
} else if (variable.equals("allele")) {
|
||||||
|
|
@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable {
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(key);
|
final GenotypeBuilder gb = new GenotypeBuilder(key);
|
||||||
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
|
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
|
||||||
genotypes.add(gb.make());
|
genotypes.add(gb.make());
|
||||||
|
} else if (variable.equals("log10PosteriorOfAFEq0")) {
|
||||||
|
posteriors[0] = Double.valueOf(value);
|
||||||
|
} else if (variable.equals("log10PosteriorOfAFGt0")) {
|
||||||
|
posteriors[1] = Double.valueOf(value);
|
||||||
|
} else if (variable.equals("MLE")) {
|
||||||
|
mle.add(Integer.valueOf(value));
|
||||||
|
} else if (variable.equals("pNonRefByAllele")) {
|
||||||
|
final Allele a = Allele.create(key);
|
||||||
|
log10pNonRefByAllele.put(a, Double.valueOf(value));
|
||||||
} else if (variable.equals("runtime.nano")) {
|
} else if (variable.equals("runtime.nano")) {
|
||||||
runtimeNano = Long.valueOf(value);
|
runtimeNano = Long.valueOf(value);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue