Adding the model name to the VQSR filter lines so that they don't get clobbered with consecutive VQSR runs for SNPs and then indels.

This commit is contained in:
Ryan Poplin 2012-07-03 14:30:37 -04:00
parent 031322ff00
commit 9e8e78de15
7 changed files with 40 additions and 29 deletions

View File

@ -157,11 +157,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
if( tranches.size() >= 2 ) {
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
final Tranche t = tranches.get(iii);
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
}
}
if( tranches.size() >= 1 ) {
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level at VQS Lod < " + tranches.get(0).minVQSLod)));
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod)));
} else {
throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL);
}

View File

@ -42,26 +42,28 @@ import java.util.*;
*/
public class Tranche implements Comparable<Tranche> {
private static final int CURRENT_VERSION = 4;
private static final int CURRENT_VERSION = 5;
public double ts, minVQSLod, knownTiTv, novelTiTv;
public int numKnown,numNovel;
public String name;
public VariantRecalibratorArgumentCollection.Mode model;
int accessibleTruthSites = 0;
int callsAtTruthSites = 0;
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites) {
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, "anonymous");
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model) {
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, model, "anonymous");
}
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, String name ) {
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model, String name ) {
this.ts = ts;
this.minVQSLod = minVQSLod;
this.novelTiTv = novelTiTv;
this.numNovel = numNovel;
this.knownTiTv = knownTiTv;
this.numKnown = numKnown;
this.model = model;
this.name = name;
this.accessibleTruthSites = accessibleTruthSites;
@ -104,13 +106,13 @@ public class Tranche implements Comparable<Tranche> {
stream.println("# Variant quality score tranches file");
stream.println("# Version number " + CURRENT_VERSION);
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,model,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
Tranche prev = null;
for ( Tranche t : tranches ) {
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,TruthSensitivityTranche%.2fto%.2f,%d,%d,%.4f%n",
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod,
(prev == null ? 0.0 : prev.ts), t.ts, t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,VQSRTranche%s%.2fto%.2f,%s,%d,%d,%.4f%n",
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, t.model.toString(),
(prev == null ? 0.0 : prev.ts), t.ts, t.model.toString(), t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
prev = t;
}
@ -157,11 +159,11 @@ public class Tranche implements Comparable<Tranche> {
final String[] vals = line.split(",");
if( header == null ) {
header = vals;
if ( header.length == 5 || header.length == 8 || header.length == 11 )
if ( header.length == 5 || header.length == 8 || header.length == 10 )
// old style tranches file, throw an error
throw new UserException.MalformedFile(f, "Unfortunately, your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
if ( header.length != 10 )
throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
throw new UserException.MalformedFile(f, "Unfortunately your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
if ( header.length != 11 )
throw new UserException.MalformedFile(f, "Expected 11 elements in header line " + line);
} else {
if ( header.length != vals.length )
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
@ -176,6 +178,7 @@ public class Tranche implements Comparable<Tranche> {
getDouble(bindings,"novelTiTv", true),
getInteger(bindings,"accessibleTruthSites", false),
getInteger(bindings,"callsAtTruthSites", false),
VariantRecalibratorArgumentCollection.parseString(bindings.get("model")),
bindings.get("filterName")));
}
}

View File

@ -139,11 +139,11 @@ public class TrancheManager {
}
}
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] tranches, SelectionMetric metric ) {
return findTranches( data, tranches, metric, null );
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
return findTranches( data, tranches, metric, model, null );
}
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] trancheThresholds, SelectionMetric metric, File debugFile ) {
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
Collections.sort(data);
@ -153,7 +153,7 @@ public class TrancheManager {
List<Tranche> tranches = new ArrayList<Tranche>();
for ( double trancheThreshold : trancheThresholds ) {
Tranche t = findTranche(data, metric, trancheThreshold);
Tranche t = findTranche(data, metric, trancheThreshold, model);
if ( t == null ) {
if ( tranches.size() == 0 )
@ -182,7 +182,7 @@ public class TrancheManager {
}
}
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold ) {
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold, final VariantRecalibratorArgumentCollection.Mode model ) {
logger.info(String.format(" Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold)));
double metricThreshold = metric.getThreshold(trancheThreshold);
@ -190,7 +190,7 @@ public class TrancheManager {
for ( int i = 0; i < n; i++ ) {
if ( metric.getRunningMetric(i) >= metricThreshold ) {
// we've found the largest group of variants with sensitivity >= our target truth sensitivity
Tranche t = trancheOfVariants(data, i, trancheThreshold);
Tranche t = trancheOfVariants(data, i, trancheThreshold, model);
logger.info(String.format(" Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ",
trancheThreshold, metricThreshold, i, metric.getRunningMetric(i)));
logger.info(String.format(" Tranche is %s", t));
@ -201,7 +201,7 @@ public class TrancheManager {
return null;
}
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts ) {
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
double minLod = data.get(minI).lod;
@ -228,7 +228,7 @@ public class TrancheManager {
int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY);
int nCallsAtTruth = countCallsAtTruth(data, minLod);
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth);
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth, model);
}
public static double fdrToTiTv(double desiredFDR, double targetTiTv) {

View File

@ -342,7 +342,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE );
tranchesStream.print(Tranche.tranchesString( tranches ));
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
* Created by IntelliJ IDEA.
@ -41,6 +42,13 @@ public class VariantRecalibratorArgumentCollection {
BOTH
}
static Mode parseString(final String input) {
if( input.equals("SNP") ) { return Mode.SNP; }
if( input.equals("INDEL") ) { return Mode.INDEL; }
if( input.equals("BOTH") ) { return Mode.BOTH; }
throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input);
}
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false)
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
@Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)

View File

@ -112,7 +112,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
return TrancheManager.findTranches(vd, tranches, metric);
return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP);
}
@Test

View File

@ -21,9 +21,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"62f81e7d2082fbc71cae0101c27fefad", // tranches
"b9709e4180e56abc691b208bd3e8626c", // recal file
"4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -70,9 +70,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF
"888eb042dd33b807bcbb8630896fda94"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createData2() {
@ -130,7 +130,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}