Adding the model name to the VQSR filter lines so that they don't get clobbered with consecutive VQSR runs for SNPs and then indels.
This commit is contained in:
parent
031322ff00
commit
9e8e78de15
|
|
@ -157,11 +157,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
||||||
if( tranches.size() >= 2 ) {
|
if( tranches.size() >= 2 ) {
|
||||||
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
|
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
|
||||||
final Tranche t = tranches.get(iii);
|
final Tranche t = tranches.get(iii);
|
||||||
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
|
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( tranches.size() >= 1 ) {
|
if( tranches.size() >= 1 ) {
|
||||||
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level at VQS Lod < " + tranches.get(0).minVQSLod)));
|
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod)));
|
||||||
} else {
|
} else {
|
||||||
throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL);
|
throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,26 +42,28 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class Tranche implements Comparable<Tranche> {
|
public class Tranche implements Comparable<Tranche> {
|
||||||
private static final int CURRENT_VERSION = 4;
|
private static final int CURRENT_VERSION = 5;
|
||||||
|
|
||||||
public double ts, minVQSLod, knownTiTv, novelTiTv;
|
public double ts, minVQSLod, knownTiTv, novelTiTv;
|
||||||
public int numKnown,numNovel;
|
public int numKnown,numNovel;
|
||||||
public String name;
|
public String name;
|
||||||
|
public VariantRecalibratorArgumentCollection.Mode model;
|
||||||
|
|
||||||
int accessibleTruthSites = 0;
|
int accessibleTruthSites = 0;
|
||||||
int callsAtTruthSites = 0;
|
int callsAtTruthSites = 0;
|
||||||
|
|
||||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites) {
|
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model) {
|
||||||
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, "anonymous");
|
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, model, "anonymous");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, String name ) {
|
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model, String name ) {
|
||||||
this.ts = ts;
|
this.ts = ts;
|
||||||
this.minVQSLod = minVQSLod;
|
this.minVQSLod = minVQSLod;
|
||||||
this.novelTiTv = novelTiTv;
|
this.novelTiTv = novelTiTv;
|
||||||
this.numNovel = numNovel;
|
this.numNovel = numNovel;
|
||||||
this.knownTiTv = knownTiTv;
|
this.knownTiTv = knownTiTv;
|
||||||
this.numKnown = numKnown;
|
this.numKnown = numKnown;
|
||||||
|
this.model = model;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
|
||||||
this.accessibleTruthSites = accessibleTruthSites;
|
this.accessibleTruthSites = accessibleTruthSites;
|
||||||
|
|
@ -104,13 +106,13 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
|
|
||||||
stream.println("# Variant quality score tranches file");
|
stream.println("# Variant quality score tranches file");
|
||||||
stream.println("# Version number " + CURRENT_VERSION);
|
stream.println("# Version number " + CURRENT_VERSION);
|
||||||
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
|
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,model,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
|
||||||
|
|
||||||
Tranche prev = null;
|
Tranche prev = null;
|
||||||
for ( Tranche t : tranches ) {
|
for ( Tranche t : tranches ) {
|
||||||
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,TruthSensitivityTranche%.2fto%.2f,%d,%d,%.4f%n",
|
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,VQSRTranche%s%.2fto%.2f,%s,%d,%d,%.4f%n",
|
||||||
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod,
|
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, t.model.toString(),
|
||||||
(prev == null ? 0.0 : prev.ts), t.ts, t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
|
(prev == null ? 0.0 : prev.ts), t.ts, t.model.toString(), t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
|
||||||
prev = t;
|
prev = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -157,11 +159,11 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
final String[] vals = line.split(",");
|
final String[] vals = line.split(",");
|
||||||
if( header == null ) {
|
if( header == null ) {
|
||||||
header = vals;
|
header = vals;
|
||||||
if ( header.length == 5 || header.length == 8 || header.length == 11 )
|
if ( header.length == 5 || header.length == 8 || header.length == 10 )
|
||||||
// old style tranches file, throw an error
|
// old style tranches file, throw an error
|
||||||
throw new UserException.MalformedFile(f, "Unfortunately, your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
|
throw new UserException.MalformedFile(f, "Unfortunately your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
|
||||||
if ( header.length != 10 )
|
if ( header.length != 11 )
|
||||||
throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
|
throw new UserException.MalformedFile(f, "Expected 11 elements in header line " + line);
|
||||||
} else {
|
} else {
|
||||||
if ( header.length != vals.length )
|
if ( header.length != vals.length )
|
||||||
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
|
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
|
||||||
|
|
@ -176,6 +178,7 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
getDouble(bindings,"novelTiTv", true),
|
getDouble(bindings,"novelTiTv", true),
|
||||||
getInteger(bindings,"accessibleTruthSites", false),
|
getInteger(bindings,"accessibleTruthSites", false),
|
||||||
getInteger(bindings,"callsAtTruthSites", false),
|
getInteger(bindings,"callsAtTruthSites", false),
|
||||||
|
VariantRecalibratorArgumentCollection.parseString(bindings.get("model")),
|
||||||
bindings.get("filterName")));
|
bindings.get("filterName")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -139,11 +139,11 @@ public class TrancheManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] tranches, SelectionMetric metric ) {
|
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||||
return findTranches( data, tranches, metric, null );
|
return findTranches( data, tranches, metric, model, null );
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] trancheThresholds, SelectionMetric metric, File debugFile ) {
|
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
|
||||||
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
|
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
|
||||||
|
|
||||||
Collections.sort(data);
|
Collections.sort(data);
|
||||||
|
|
@ -153,7 +153,7 @@ public class TrancheManager {
|
||||||
|
|
||||||
List<Tranche> tranches = new ArrayList<Tranche>();
|
List<Tranche> tranches = new ArrayList<Tranche>();
|
||||||
for ( double trancheThreshold : trancheThresholds ) {
|
for ( double trancheThreshold : trancheThresholds ) {
|
||||||
Tranche t = findTranche(data, metric, trancheThreshold);
|
Tranche t = findTranche(data, metric, trancheThreshold, model);
|
||||||
|
|
||||||
if ( t == null ) {
|
if ( t == null ) {
|
||||||
if ( tranches.size() == 0 )
|
if ( tranches.size() == 0 )
|
||||||
|
|
@ -182,7 +182,7 @@ public class TrancheManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold ) {
|
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||||
logger.info(String.format(" Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold)));
|
logger.info(String.format(" Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold)));
|
||||||
|
|
||||||
double metricThreshold = metric.getThreshold(trancheThreshold);
|
double metricThreshold = metric.getThreshold(trancheThreshold);
|
||||||
|
|
@ -190,7 +190,7 @@ public class TrancheManager {
|
||||||
for ( int i = 0; i < n; i++ ) {
|
for ( int i = 0; i < n; i++ ) {
|
||||||
if ( metric.getRunningMetric(i) >= metricThreshold ) {
|
if ( metric.getRunningMetric(i) >= metricThreshold ) {
|
||||||
// we've found the largest group of variants with sensitivity >= our target truth sensitivity
|
// we've found the largest group of variants with sensitivity >= our target truth sensitivity
|
||||||
Tranche t = trancheOfVariants(data, i, trancheThreshold);
|
Tranche t = trancheOfVariants(data, i, trancheThreshold, model);
|
||||||
logger.info(String.format(" Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ",
|
logger.info(String.format(" Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ",
|
||||||
trancheThreshold, metricThreshold, i, metric.getRunningMetric(i)));
|
trancheThreshold, metricThreshold, i, metric.getRunningMetric(i)));
|
||||||
logger.info(String.format(" Tranche is %s", t));
|
logger.info(String.format(" Tranche is %s", t));
|
||||||
|
|
@ -201,7 +201,7 @@ public class TrancheManager {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts ) {
|
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||||
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
||||||
|
|
||||||
double minLod = data.get(minI).lod;
|
double minLod = data.get(minI).lod;
|
||||||
|
|
@ -228,7 +228,7 @@ public class TrancheManager {
|
||||||
int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY);
|
int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY);
|
||||||
int nCallsAtTruth = countCallsAtTruth(data, minLod);
|
int nCallsAtTruth = countCallsAtTruth(data, minLod);
|
||||||
|
|
||||||
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth);
|
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth, model);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double fdrToTiTv(double desiredFDR, double targetTiTv) {
|
public static double fdrToTiTv(double desiredFDR, double targetTiTv) {
|
||||||
|
|
|
||||||
|
|
@ -342,7 +342,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
|
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
|
||||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
|
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
|
||||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||||
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
|
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE );
|
||||||
tranchesStream.print(Tranche.tranchesString( tranches ));
|
tranchesStream.print(Tranche.tranchesString( tranches ));
|
||||||
|
|
||||||
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
|
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -41,6 +42,13 @@ public class VariantRecalibratorArgumentCollection {
|
||||||
BOTH
|
BOTH
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Mode parseString(final String input) {
|
||||||
|
if( input.equals("SNP") ) { return Mode.SNP; }
|
||||||
|
if( input.equals("INDEL") ) { return Mode.INDEL; }
|
||||||
|
if( input.equals("BOTH") ) { return Mode.BOTH; }
|
||||||
|
throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input);
|
||||||
|
}
|
||||||
|
|
||||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false)
|
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false)
|
||||||
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
||||||
@Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)
|
@Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
|
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
|
||||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
|
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
|
||||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||||
return TrancheManager.findTranches(vd, tranches, metric);
|
return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
"62f81e7d2082fbc71cae0101c27fefad", // tranches
|
||||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||||
"4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF
|
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRTest")
|
@DataProvider(name = "VRTest")
|
||||||
public Object[][] createData1() {
|
public Object[][] createData1() {
|
||||||
|
|
@ -70,9 +70,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||||
"b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF
|
"888eb042dd33b807bcbb8630896fda94"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRIndelTest")
|
@DataProvider(name = "VRIndelTest")
|
||||||
public Object[][] createData2() {
|
public Object[][] createData2() {
|
||||||
|
|
@ -130,7 +130,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
||||||
Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
|
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
|
||||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue