From 9bcefadd4efaf6094f51268d2113c3328d27a585 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 16 Oct 2012 13:30:09 -0400 Subject: [PATCH] Refactor ExactCallLogger into a separate class -- Update minor integration tests with NanoSchedule due to qual accuracy update --- .../afcalc/AFCalcPerformanceTest.java | 11 +- .../gatk/walkers/genotyper/afcalc/AFCalc.java | 145 +-------------- .../genotyper/afcalc/ExactCallLogger.java | 166 ++++++++++++++++++ .../NanoSchedulerIntegrationTest.java | 2 +- 4 files changed, 180 insertions(+), 144 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index f019d8f8e..fab26c9d2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -18,11 +18,8 @@ import java.io.*; import java.util.*; /** - * Created with IntelliJ IDEA. - * User: depristo - * Date: 10/2/12 - * Time: 10:25 AM - * To change this template use File | Settings | File Templates. + * A simple GATK utility (i.e, runs from command-line) for assessing the performance of + * the exact model */ public class AFCalcPerformanceTest { final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class); @@ -222,9 +219,9 @@ public class AFCalcPerformanceTest { final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref); final GenomeLocParser parser = new GenomeLocParser(seq); final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile)); - final List loggedCalls = AFCalc.readExactLog(reader, startsToUse, parser); + final List loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser); - for ( final AFCalc.ExactCall call : loggedCalls ) { + for ( final ExactCallLogger.ExactCall call : loggedCalls ) { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 8cb6bcabc..07f88c9e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -28,20 +28,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; +import java.io.File; import java.util.List; /** * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods - * */ public abstract class AFCalc implements Cloneable { private final static Logger defaultLogger = Logger.getLogger(AFCalc.class); @@ -53,8 +50,8 @@ public abstract class AFCalc implements Cloneable { protected Logger logger = defaultLogger; private SimpleTimer callTimer = new SimpleTimer(); - private PrintStream callReport = null; private final AFCalcResultTracker resultTracker; + private ExactCallLogger exactCallLogger = null; protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); @@ -69,7 +66,7 @@ public abstract class AFCalc implements Cloneable { } public void enableProcessLog(final File exactCallsLog) { - initializeOutputFile(exactCallsLog); + exactCallLogger = new ExactCallLogger(exactCallsLog); } public void setLogger(Logger logger) { @@ -97,8 +94,8 @@ public abstract class AFCalc implements Cloneable { final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors); final long nanoTime = callTimer.getElapsedTimeNano(); - if ( callReport != null ) - printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero()); + if ( exactCallLogger != null ) + exactCallLogger.printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result); return result; } @@ -165,130 +162,6 @@ public abstract class AFCalc implements Cloneable { return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); } - - // --------------------------------------------------------------------------- - // - // Print information about the call to the calls log - // - // --------------------------------------------------------------------------- - - private void initializeOutputFile(final File outputFile) { - try { - if (outputFile != null) { - callReport = new PrintStream( new FileOutputStream(outputFile) ); - callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); - } - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotCreateOutputFile(outputFile, e); - } - } - - private void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final double log10PosteriorOfAFzero) { - printCallElement(vc, "type", "ignore", vc.getType()); - - int allelei = 0; - for ( final Allele a : vc.getAlleles() ) - printCallElement(vc, "allele", allelei++, a.getDisplayString()); - - for ( final Genotype g : vc.getGenotypes() ) - printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); - - for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ ) - printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); - - printCallElement(vc, "runtime.nano", "ignore", runtimeNano); - printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero); - - callReport.flush(); - } - - private void printCallElement(final VariantContext vc, - final Object variable, - final Object key, - final Object value) { - final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); - callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); - } - - public static class ExactCall { - final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; - - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { - this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; - } - - @Override - public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", - vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); - } - } - - public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { - List calls = new LinkedList(); - - // skip the header line - reader.readLine(); - - while ( true ) { - final VariantContextBuilder builder = new VariantContextBuilder(); - final List alleles = new ArrayList(); - final List genotypes = new ArrayList(); - long runtimeNano = -1; - - GenomeLoc currentLoc = null; - while ( true ) { - final String line = reader.readLine(); - if ( line == null ) - return calls; - - final String[] parts = line.split("\t"); - final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); - final String variable = parts[1]; - final String key = parts[2]; - final String value = parts[3]; - - if ( currentLoc == null ) - currentLoc = lineLoc; - - if ( variable.equals("log10PosteriorOfAFzero") ) { - if ( startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart()) ) { - builder.alleles(alleles); - final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; - builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); - builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); - } - break; - } else if ( variable.equals("allele") ) { - final boolean isRef = key.equals("0"); - alleles.add(Allele.create(value, isRef)); - } else if ( variable.equals("PL") ) { - final GenotypeBuilder gb = new GenotypeBuilder(key); - gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); - genotypes.add(gb.make()); - } else if ( variable.equals("runtime.nano") ) { - runtimeNano = Long.valueOf(value); - } else { - // nothing to do - } - } - } - } - public AFCalcResultTracker getResultTracker() { return resultTracker; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java new file mode 100644 index 000000000..3794ba240 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -0,0 +1,166 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.utils.AutoFormattingTime; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.io.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +/** + * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + */ +public class ExactCallLogger implements Cloneable { + private PrintStream callReport = null; + + /** + * Create a new ExactCallLogger writing it's output to outputFile + * + * @param outputFile + */ + public ExactCallLogger(final File outputFile) { + try { + callReport = new PrintStream(new FileOutputStream(outputFile)); + callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); + } catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outputFile, e); + } + } + + /** + * Summarizes information about an exact call that happened + */ + public static class ExactCall { + final VariantContext vc; + final long origNanoTime; + long newNanoTime = -1; + final double origPNonRef; + double newPNonRef = -1; + + public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + this.vc = vc; + this.origNanoTime = origNanoTime; + this.origPNonRef = origPNonRef; + } + + @Override + public String toString() { + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), + origPNonRef, + new AutoFormattingTime(origNanoTime / 1e9).toString(), + newPNonRef, + newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + } + } + + protected void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { + printCallElement(vc, "type", "ignore", vc.getType()); + + int allelei = 0; + for (final Allele a : vc.getAlleles()) + printCallElement(vc, "allele", allelei++, a.getDisplayString()); + + for (final Genotype g : vc.getGenotypes()) + printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); + + for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++) + printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); + + printCallElement(vc, "runtime.nano", "ignore", runtimeNano); + printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0()); + printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0()); + + for ( final Allele allele : result.getAllelesUsedInGenotyping() ) { + if ( allele.isNonReference() ) { + printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); + printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); + } + } + + callReport.flush(); + } + + private void printCallElement(final VariantContext vc, + final Object variable, + final Object key, + final Object value) { + final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); + callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); + } + + /** + * Read in a list of ExactCall objects from reader, keeping only those + * with starts in startsToKeep or all sites (if this is empty) + * + * @param reader + * @param startsToKeep + * @param parser + * @return + * @throws IOException + */ + public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { + if ( reader == null ) throw new IllegalArgumentException("reader cannot be null"); + if ( startsToKeep == null ) throw new IllegalArgumentException("startsToKeep cannot be null"); + if ( parser == null ) throw new IllegalArgumentException("GenomeLocParser cannot be null"); + + List calls = new LinkedList(); + + // skip the header line + reader.readLine(); + + while (true) { + final VariantContextBuilder builder = new VariantContextBuilder(); + final List alleles = new ArrayList(); + final List genotypes = new ArrayList(); + long runtimeNano = -1; + + GenomeLoc currentLoc = null; + while (true) { + final String line = reader.readLine(); + if (line == null) + return calls; + + final String[] parts = line.split("\t"); + final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); + final String variable = parts[1]; + final String key = parts[2]; + final String value = parts[3]; + + if (currentLoc == null) + currentLoc = lineLoc; + + if (variable.equals("log10PosteriorOfAFzero")) { + if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { + builder.alleles(alleles); + final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; + builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); + builder.genotypes(genotypes); + calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + } + break; + } else if (variable.equals("allele")) { + final boolean isRef = key.equals("0"); + alleles.add(Allele.create(value, isRef)); + } else if (variable.equals("PL")) { + final GenotypeBuilder gb = new GenotypeBuilder(key); + gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); + genotypes.add(gb.make()); + } else if (variable.equals("runtime.nano")) { + runtimeNano = Long.valueOf(value); + } else { + // nothing to do + } + } + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 93099f82a..c1b28314c 100755 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "f8184e336dec7632408aa9afa98e6914", nt, nct }); + tests.add(new Object[]{ "BOTH", "8cad82c3a5f5b932042933f136663c8a", nt, nct }); } return tests.toArray(new Object[][]{});