Adding the model name to the VQSR filter lines so that they don't get clobbered with consecutive VQSR runs for SNPs and then indels.

2012-07-03 14:30:37 -04:00 · 2012-07-03 14:30:37 -04:00 · 9e8e78de15
parent 031322ff00
commit 9e8e78de15
7 changed files with 40 additions and 29 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@ -157,11 +157,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
        if( tranches.size() >= 2 ) {
            for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
                final Tranche t = tranches.get(iii);
-                hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
+                hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
            }
        }
        if( tranches.size() >= 1 ) {
-            hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level at VQS Lod < " + tranches.get(0).minVQSLod)));
+            hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod)));
        } else {
            throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java
@ -42,26 +42,28 @@ import java.util.*;
 */

 public class Tranche implements Comparable<Tranche> {
-    private static final int CURRENT_VERSION = 4;
+    private static final int CURRENT_VERSION = 5;

    public double ts, minVQSLod, knownTiTv, novelTiTv;
    public int numKnown,numNovel;
    public String name;
+    public VariantRecalibratorArgumentCollection.Mode model;

    int accessibleTruthSites = 0;
    int callsAtTruthSites = 0;

-    public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites) {
-        this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, "anonymous");
+    public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model) {
+        this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, model, "anonymous");
    }

-    public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, String name ) {
+    public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model, String name ) {
        this.ts = ts;
        this.minVQSLod = minVQSLod;
        this.novelTiTv = novelTiTv;
        this.numNovel = numNovel;
        this.knownTiTv = knownTiTv;
        this.numKnown = numKnown;
+        this.model = model;
        this.name = name;

        this.accessibleTruthSites = accessibleTruthSites;
@ -104,13 +106,13 @@ public class Tranche implements Comparable<Tranche> {

        stream.println("# Variant quality score tranches file");
        stream.println("# Version number " + CURRENT_VERSION);
-        stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
+        stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,model,accessibleTruthSites,callsAtTruthSites,truthSensitivity");

        Tranche prev = null;
        for ( Tranche t : tranches ) {
-            stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,TruthSensitivityTranche%.2fto%.2f,%d,%d,%.4f%n",
-                    t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod,
-                    (prev == null ? 0.0 : prev.ts), t.ts, t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
+            stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,VQSRTranche%s%.2fto%.2f,%s,%d,%d,%.4f%n",
+                    t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, t.model.toString(),
+                    (prev == null ? 0.0 : prev.ts), t.ts, t.model.toString(), t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
            prev = t;
        }

@ -157,11 +159,11 @@ public class Tranche implements Comparable<Tranche> {
                final String[] vals = line.split(",");
                if( header == null ) {
                    header = vals;
-                    if ( header.length == 5 || header.length == 8 || header.length == 11 )
+                    if ( header.length == 5 || header.length == 8 || header.length == 10 )
                        // old style tranches file, throw an error
-                        throw new UserException.MalformedFile(f, "Unfortunately, your tranches file is from a previous version of this tool and cannot be used with the latest code.  Please rerun VariantRecalibrator");
-                    if ( header.length != 10 )
-                        throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
+                        throw new UserException.MalformedFile(f, "Unfortunately your tranches file is from a previous version of this tool and cannot be used with the latest code.  Please rerun VariantRecalibrator");
+                    if ( header.length != 11 )
+                        throw new UserException.MalformedFile(f, "Expected 11 elements in header line " + line);
                } else {
                    if ( header.length != vals.length )
                        throw new UserException.MalformedFile(f, "Line had too few/many fields.  Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
@ -176,6 +178,7 @@ public class Tranche implements Comparable<Tranche> {
                            getDouble(bindings,"novelTiTv", true),
                            getInteger(bindings,"accessibleTruthSites", false),
                            getInteger(bindings,"callsAtTruthSites", false),
+                            VariantRecalibratorArgumentCollection.parseString(bindings.get("model")),
                            bindings.get("filterName")));
                }
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java
@ -139,11 +139,11 @@ public class TrancheManager {
        }
    }

-    public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] tranches, SelectionMetric metric ) {
-        return findTranches( data, tranches, metric, null );
+    public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
+        return findTranches( data, tranches, metric, model, null );
    }

-    public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] trancheThresholds, SelectionMetric metric, File debugFile ) {
+    public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
        logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));

        Collections.sort(data);
@ -153,7 +153,7 @@ public class TrancheManager {

        List<Tranche> tranches = new ArrayList<Tranche>();
        for ( double trancheThreshold : trancheThresholds ) {
-            Tranche t = findTranche(data, metric, trancheThreshold);
+            Tranche t = findTranche(data, metric, trancheThreshold, model);

            if ( t == null ) {
                if ( tranches.size() == 0 )
@ -182,7 +182,7 @@ public class TrancheManager {
        }
    }

-    public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold ) {
+    public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold, final VariantRecalibratorArgumentCollection.Mode model ) {
        logger.info(String.format("  Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold)));

        double metricThreshold = metric.getThreshold(trancheThreshold);
@ -190,7 +190,7 @@ public class TrancheManager {
        for ( int i = 0; i < n; i++ ) {
            if ( metric.getRunningMetric(i) >= metricThreshold ) {
                // we've found the largest group of variants with sensitivity >= our target truth sensitivity
-                Tranche t = trancheOfVariants(data, i, trancheThreshold);
+                Tranche t = trancheOfVariants(data, i, trancheThreshold, model);
                logger.info(String.format("  Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ",
                        trancheThreshold, metricThreshold, i, metric.getRunningMetric(i)));
                logger.info(String.format("  Tranche is %s", t));
@ -201,7 +201,7 @@ public class TrancheManager {
        return null;
    }

-    public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts ) {
+    public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
        int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;

        double minLod = data.get(minI).lod;
@ -228,7 +228,7 @@ public class TrancheManager {
        int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY);
        int nCallsAtTruth = countCallsAtTruth(data, minLod);

-        return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth);
+        return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth, model);
    }

    public static double fdrToTiTv(double desiredFDR, double targetTiTv) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -342,7 +342,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
        // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
        final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
        final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
-        final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
+        final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE );
        tranchesStream.print(Tranche.tranchesString( tranches ));

        // Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.gatk.walkers.variantrecalibration;

 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 /**
 * Created by IntelliJ IDEA.
@ -41,6 +42,13 @@ public class VariantRecalibratorArgumentCollection {
        BOTH
    }

+    static Mode parseString(final String input) {
+        if( input.equals("SNP") ) { return Mode.SNP; }
+        if( input.equals("INDEL") ) { return Mode.INDEL; }
+        if( input.equals("BOTH") ) { return Mode.BOTH; }
+        throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input);
+    }
+
    @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false)
    public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
    @Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java
@ -112,7 +112,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
    private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
        final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
        final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
-        return TrancheManager.findTranches(vd, tranches, metric);
+        return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP);
    }

    @Test
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@ -21,9 +21,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    }

    VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
-            "0ddd1e0e483d2eaf56004615cea23ec7",  // tranches
+            "62f81e7d2082fbc71cae0101c27fefad",  // tranches
            "b9709e4180e56abc691b208bd3e8626c",  // recal file
-            "4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF
+            "75c178345f70ca2eb90205662fbdf968"); // cut VCF

    @DataProvider(name = "VRTest")
    public Object[][] createData1() {
@ -70,9 +70,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    }

    VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
-            "da4458d05f6396f5c4ab96f274e5ccdc",  // tranches
+            "b7589cd098dc153ec64c02dcff2838e4",  // tranches
            "a04a9001f62eff43d363f4d63769f3ee",  // recal file
-            "b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF
+            "888eb042dd33b807bcbb8630896fda94"); // cut VCF

    @DataProvider(name = "VRIndelTest")
    public Object[][] createData2() {
@ -130,7 +130,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
                        " -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
-                Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
+                Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
        executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
    }
 }