diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index c1d4d34fb..f95e5647c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -157,11 +157,11 @@ public class ApplyRecalibration extends RodWalker implements T if( tranches.size() >= 2 ) { for( int iii = 0; iii < tranches.size() - 1; iii++ ) { final Tranche t = tranches.get(iii); - hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod))); + hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod))); } } if( tranches.size() >= 1 ) { - hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level at VQS Lod < " + tranches.get(0).minVQSLod))); + hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod))); } else { throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 15424f0f7..9228dc375 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -42,26 +42,28 @@ import java.util.*; */ public class Tranche implements Comparable { - private static final int CURRENT_VERSION = 4; + private static final int CURRENT_VERSION = 5; public double ts, minVQSLod, knownTiTv, novelTiTv; public int numKnown,numNovel; public String name; + public VariantRecalibratorArgumentCollection.Mode model; int accessibleTruthSites = 0; int callsAtTruthSites = 0; - public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites) { - this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, "anonymous"); + public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model) { + this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, model, "anonymous"); } - public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, String name ) { + public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model, String name ) { this.ts = ts; this.minVQSLod = minVQSLod; this.novelTiTv = novelTiTv; this.numNovel = numNovel; this.knownTiTv = knownTiTv; this.numKnown = numKnown; + this.model = model; this.name = name; this.accessibleTruthSites = accessibleTruthSites; @@ -104,13 +106,13 @@ public class Tranche implements Comparable { stream.println("# Variant quality score tranches file"); stream.println("# Version number " + CURRENT_VERSION); - stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity"); + stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,model,accessibleTruthSites,callsAtTruthSites,truthSensitivity"); Tranche prev = null; for ( Tranche t : tranches ) { - stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,TruthSensitivityTranche%.2fto%.2f,%d,%d,%.4f%n", - t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, - (prev == null ? 0.0 : prev.ts), t.ts, t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity()); + stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,VQSRTranche%s%.2fto%.2f,%s,%d,%d,%.4f%n", + t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, t.model.toString(), + (prev == null ? 0.0 : prev.ts), t.ts, t.model.toString(), t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity()); prev = t; } @@ -157,11 +159,11 @@ public class Tranche implements Comparable { final String[] vals = line.split(","); if( header == null ) { header = vals; - if ( header.length == 5 || header.length == 8 || header.length == 11 ) + if ( header.length == 5 || header.length == 8 || header.length == 10 ) // old style tranches file, throw an error - throw new UserException.MalformedFile(f, "Unfortunately, your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator"); - if ( header.length != 10 ) - throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line); + throw new UserException.MalformedFile(f, "Unfortunately your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator"); + if ( header.length != 11 ) + throw new UserException.MalformedFile(f, "Expected 11 elements in header line " + line); } else { if ( header.length != vals.length ) throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line); @@ -176,6 +178,7 @@ public class Tranche implements Comparable { getDouble(bindings,"novelTiTv", true), getInteger(bindings,"accessibleTruthSites", false), getInteger(bindings,"callsAtTruthSites", false), + VariantRecalibratorArgumentCollection.parseString(bindings.get("model")), bindings.get("filterName"))); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java index 19c6d501b..d45739528 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java @@ -139,11 +139,11 @@ public class TrancheManager { } } - public static List findTranches( ArrayList data, final double[] tranches, SelectionMetric metric ) { - return findTranches( data, tranches, metric, null ); + public static List findTranches( final ArrayList data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) { + return findTranches( data, tranches, metric, model, null ); } - public static List findTranches( ArrayList data, final double[] trancheThresholds, SelectionMetric metric, File debugFile ) { + public static List findTranches( final ArrayList data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) { logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size())); Collections.sort(data); @@ -153,7 +153,7 @@ public class TrancheManager { List tranches = new ArrayList(); for ( double trancheThreshold : trancheThresholds ) { - Tranche t = findTranche(data, metric, trancheThreshold); + Tranche t = findTranche(data, metric, trancheThreshold, model); if ( t == null ) { if ( tranches.size() == 0 ) @@ -182,7 +182,7 @@ public class TrancheManager { } } - public static Tranche findTranche( final List data, final SelectionMetric metric, final double trancheThreshold ) { + public static Tranche findTranche( final List data, final SelectionMetric metric, final double trancheThreshold, final VariantRecalibratorArgumentCollection.Mode model ) { logger.info(String.format(" Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold))); double metricThreshold = metric.getThreshold(trancheThreshold); @@ -190,7 +190,7 @@ public class TrancheManager { for ( int i = 0; i < n; i++ ) { if ( metric.getRunningMetric(i) >= metricThreshold ) { // we've found the largest group of variants with sensitivity >= our target truth sensitivity - Tranche t = trancheOfVariants(data, i, trancheThreshold); + Tranche t = trancheOfVariants(data, i, trancheThreshold, model); logger.info(String.format(" Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ", trancheThreshold, metricThreshold, i, metric.getRunningMetric(i))); logger.info(String.format(" Tranche is %s", t)); @@ -201,7 +201,7 @@ public class TrancheManager { return null; } - public static Tranche trancheOfVariants( final List data, int minI, double ts ) { + public static Tranche trancheOfVariants( final List data, int minI, double ts, final VariantRecalibratorArgumentCollection.Mode model ) { int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0; double minLod = data.get(minI).lod; @@ -228,7 +228,7 @@ public class TrancheManager { int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY); int nCallsAtTruth = countCallsAtTruth(data, minLod); - return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth); + return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth, model); } public static double fdrToTiTv(double desiredFDR, double targetTiTv) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index ff4125d6c..5bd7cccd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -342,7 +342,7 @@ public class VariantRecalibrator extends RodWalker tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric ); + final List tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE ); tranchesStream.print(Tranche.tranchesString( tranches )); // Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index df396b714..691fab885 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /** * Created by IntelliJ IDEA. @@ -41,6 +42,13 @@ public class VariantRecalibratorArgumentCollection { BOTH } + static Mode parseString(final String input) { + if( input.equals("SNP") ) { return Mode.SNP; } + if( input.equals("INDEL") ) { return Mode.INDEL; } + if( input.equals("BOTH") ) { return Mode.BOTH; } + throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input); + } + @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false) public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP; @Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java index 42181aae6..93b2fe0c9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java @@ -112,7 +112,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { private static List findMyTranches(ArrayList vd, double[] tranches) { final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY ); final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth ); - return TrancheManager.findTranches(vd, tranches, metric); + return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP); } @Test diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index c075bb390..393905961 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -21,9 +21,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", - "0ddd1e0e483d2eaf56004615cea23ec7", // tranches + "62f81e7d2082fbc71cae0101c27fefad", // tranches "b9709e4180e56abc691b208bd3e8626c", // recal file - "4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF + "75c178345f70ca2eb90205662fbdf968"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -70,9 +70,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf", - "da4458d05f6396f5c4ab96f274e5ccdc", // tranches + "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file - "b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF + "888eb042dd33b807bcbb8630896fda94"); // cut VCF @DataProvider(name = "VRIndelTest") public Object[][] createData2() { @@ -130,7 +130,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -o %s" + " -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" + " -recalFile " + privateTestDir + "VQSR.mixedTest.recal", - Arrays.asList("d670c684f73e2744b6c01738a01d5ec4")); + Arrays.asList("ec519e1f01459813dab57aefffc019e2")); executeTest("testApplyRecalibrationSnpAndIndelTogether", spec); } }