Refactor ExactCallLogger into a separate class
-- Update minor integration tests with NanoSchedule due to qual accuracy update
This commit is contained in:
parent
c74d7061fe
commit
9bcefadd4e
|
|
@ -18,11 +18,8 @@ import java.io.*;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 10/2/12
|
||||
* Time: 10:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
* A simple GATK utility (i.e, runs from command-line) for assessing the performance of
|
||||
* the exact model
|
||||
*/
|
||||
public class AFCalcPerformanceTest {
|
||||
final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class);
|
||||
|
|
@ -222,9 +219,9 @@ public class AFCalcPerformanceTest {
|
|||
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref);
|
||||
final GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile));
|
||||
final List<AFCalc.ExactCall> loggedCalls = AFCalc.readExactLog(reader, startsToUse, parser);
|
||||
final List<ExactCallLogger.ExactCall> loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser);
|
||||
|
||||
for ( final AFCalc.ExactCall call : loggedCalls ) {
|
||||
for ( final ExactCallLogger.ExactCall call : loggedCalls ) {
|
||||
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
|
||||
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
|
||||
AFCalcTestBuilder.PriorType.human);
|
||||
|
|
|
|||
|
|
@ -28,20 +28,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
||||
*
|
||||
*/
|
||||
public abstract class AFCalc implements Cloneable {
|
||||
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
|
||||
|
|
@ -53,8 +50,8 @@ public abstract class AFCalc implements Cloneable {
|
|||
protected Logger logger = defaultLogger;
|
||||
|
||||
private SimpleTimer callTimer = new SimpleTimer();
|
||||
private PrintStream callReport = null;
|
||||
private final AFCalcResultTracker resultTracker;
|
||||
private ExactCallLogger exactCallLogger = null;
|
||||
|
||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||
|
|
@ -69,7 +66,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
}
|
||||
|
||||
public void enableProcessLog(final File exactCallsLog) {
|
||||
initializeOutputFile(exactCallsLog);
|
||||
exactCallLogger = new ExactCallLogger(exactCallsLog);
|
||||
}
|
||||
|
||||
public void setLogger(Logger logger) {
|
||||
|
|
@ -97,8 +94,8 @@ public abstract class AFCalc implements Cloneable {
|
|||
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
|
||||
final long nanoTime = callTimer.getElapsedTimeNano();
|
||||
|
||||
if ( callReport != null )
|
||||
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero());
|
||||
if ( exactCallLogger != null )
|
||||
exactCallLogger.printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -165,130 +162,6 @@ public abstract class AFCalc implements Cloneable {
|
|||
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Print information about the call to the calls log
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private void initializeOutputFile(final File outputFile) {
|
||||
try {
|
||||
if (outputFile != null) {
|
||||
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
||||
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void printCallInfo(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final long runtimeNano,
|
||||
final double log10PosteriorOfAFzero) {
|
||||
printCallElement(vc, "type", "ignore", vc.getType());
|
||||
|
||||
int allelei = 0;
|
||||
for ( final Allele a : vc.getAlleles() )
|
||||
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||
|
||||
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
||||
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||
|
||||
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
||||
|
||||
callReport.flush();
|
||||
}
|
||||
|
||||
private void printCallElement(final VariantContext vc,
|
||||
final Object variable,
|
||||
final Object key,
|
||||
final Object value) {
|
||||
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||
}
|
||||
|
||||
public static class ExactCall {
|
||||
final VariantContext vc;
|
||||
final long origNanoTime;
|
||||
long newNanoTime = -1;
|
||||
final double origPNonRef;
|
||||
double newPNonRef = -1;
|
||||
|
||||
public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) {
|
||||
this.vc = vc;
|
||||
this.origNanoTime = origNanoTime;
|
||||
this.origPNonRef = origPNonRef;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
|
||||
origPNonRef,
|
||||
new AutoFormattingTime(origNanoTime / 1e9).toString(),
|
||||
newPNonRef,
|
||||
newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString());
|
||||
}
|
||||
}
|
||||
|
||||
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
|
||||
List<ExactCall> calls = new LinkedList<ExactCall>();
|
||||
|
||||
// skip the header line
|
||||
reader.readLine();
|
||||
|
||||
while ( true ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
long runtimeNano = -1;
|
||||
|
||||
GenomeLoc currentLoc = null;
|
||||
while ( true ) {
|
||||
final String line = reader.readLine();
|
||||
if ( line == null )
|
||||
return calls;
|
||||
|
||||
final String[] parts = line.split("\t");
|
||||
final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
|
||||
final String variable = parts[1];
|
||||
final String key = parts[2];
|
||||
final String value = parts[3];
|
||||
|
||||
if ( currentLoc == null )
|
||||
currentLoc = lineLoc;
|
||||
|
||||
if ( variable.equals("log10PosteriorOfAFzero") ) {
|
||||
if ( startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart()) ) {
|
||||
builder.alleles(alleles);
|
||||
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
|
||||
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||
builder.genotypes(genotypes);
|
||||
calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value)));
|
||||
}
|
||||
break;
|
||||
} else if ( variable.equals("allele") ) {
|
||||
final boolean isRef = key.equals("0");
|
||||
alleles.add(Allele.create(value, isRef));
|
||||
} else if ( variable.equals("PL") ) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(key);
|
||||
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
|
||||
genotypes.add(gb.make());
|
||||
} else if ( variable.equals("runtime.nano") ) {
|
||||
runtimeNano = Long.valueOf(value);
|
||||
} else {
|
||||
// nothing to do
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public AFCalcResultTracker getResultTracker() {
|
||||
return resultTracker;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,166 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
|
||||
*/
|
||||
public class ExactCallLogger implements Cloneable {
|
||||
private PrintStream callReport = null;
|
||||
|
||||
/**
|
||||
* Create a new ExactCallLogger writing it's output to outputFile
|
||||
*
|
||||
* @param outputFile
|
||||
*/
|
||||
public ExactCallLogger(final File outputFile) {
|
||||
try {
|
||||
callReport = new PrintStream(new FileOutputStream(outputFile));
|
||||
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarizes information about an exact call that happened
|
||||
*/
|
||||
public static class ExactCall {
|
||||
final VariantContext vc;
|
||||
final long origNanoTime;
|
||||
long newNanoTime = -1;
|
||||
final double origPNonRef;
|
||||
double newPNonRef = -1;
|
||||
|
||||
public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) {
|
||||
this.vc = vc;
|
||||
this.origNanoTime = origNanoTime;
|
||||
this.origPNonRef = origPNonRef;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
|
||||
origPNonRef,
|
||||
new AutoFormattingTime(origNanoTime / 1e9).toString(),
|
||||
newPNonRef,
|
||||
newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString());
|
||||
}
|
||||
}
|
||||
|
||||
protected void printCallInfo(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors,
|
||||
final long runtimeNano,
|
||||
final AFCalcResult result) {
|
||||
printCallElement(vc, "type", "ignore", vc.getType());
|
||||
|
||||
int allelei = 0;
|
||||
for (final Allele a : vc.getAlleles())
|
||||
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||
|
||||
for (final Genotype g : vc.getGenotypes())
|
||||
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||
|
||||
for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
|
||||
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||
|
||||
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||
printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
|
||||
printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());
|
||||
|
||||
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
|
||||
if ( allele.isNonReference() ) {
|
||||
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
|
||||
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
|
||||
}
|
||||
}
|
||||
|
||||
callReport.flush();
|
||||
}
|
||||
|
||||
private void printCallElement(final VariantContext vc,
|
||||
final Object variable,
|
||||
final Object key,
|
||||
final Object value) {
|
||||
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Read in a list of ExactCall objects from reader, keeping only those
|
||||
* with starts in startsToKeep or all sites (if this is empty)
|
||||
*
|
||||
* @param reader
|
||||
* @param startsToKeep
|
||||
* @param parser
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
|
||||
if ( reader == null ) throw new IllegalArgumentException("reader cannot be null");
|
||||
if ( startsToKeep == null ) throw new IllegalArgumentException("startsToKeep cannot be null");
|
||||
if ( parser == null ) throw new IllegalArgumentException("GenomeLocParser cannot be null");
|
||||
|
||||
List<ExactCall> calls = new LinkedList<ExactCall>();
|
||||
|
||||
// skip the header line
|
||||
reader.readLine();
|
||||
|
||||
while (true) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
long runtimeNano = -1;
|
||||
|
||||
GenomeLoc currentLoc = null;
|
||||
while (true) {
|
||||
final String line = reader.readLine();
|
||||
if (line == null)
|
||||
return calls;
|
||||
|
||||
final String[] parts = line.split("\t");
|
||||
final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
|
||||
final String variable = parts[1];
|
||||
final String key = parts[2];
|
||||
final String value = parts[3];
|
||||
|
||||
if (currentLoc == null)
|
||||
currentLoc = lineLoc;
|
||||
|
||||
if (variable.equals("log10PosteriorOfAFzero")) {
|
||||
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
|
||||
builder.alleles(alleles);
|
||||
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
|
||||
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||
builder.genotypes(genotypes);
|
||||
calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value)));
|
||||
}
|
||||
break;
|
||||
} else if (variable.equals("allele")) {
|
||||
final boolean isRef = key.equals("0");
|
||||
alleles.add(Allele.create(value, isRef));
|
||||
} else if (variable.equals("PL")) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(key);
|
||||
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
|
||||
genotypes.add(gb.make());
|
||||
} else if (variable.equals("runtime.nano")) {
|
||||
runtimeNano = Long.valueOf(value);
|
||||
} else {
|
||||
// nothing to do
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
|
|||
for ( final int nct : Arrays.asList(1, 2) ) {
|
||||
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
|
||||
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
|
||||
tests.add(new Object[]{ "BOTH", "f8184e336dec7632408aa9afa98e6914", nt, nct });
|
||||
tests.add(new Object[]{ "BOTH", "8cad82c3a5f5b932042933f136663c8a", nt, nct });
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
Loading…
Reference in New Issue