Adding the model name to the VQSR filter lines so that they don't get clobbered with consecutive VQSR runs for SNPs and then indels.
This commit is contained in:
parent
031322ff00
commit
9e8e78de15
|
|
@ -157,11 +157,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
if( tranches.size() >= 2 ) {
|
||||
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
|
||||
final Tranche t = tranches.get(iii);
|
||||
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
|
||||
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
|
||||
}
|
||||
}
|
||||
if( tranches.size() >= 1 ) {
|
||||
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level at VQS Lod < " + tranches.get(0).minVQSLod)));
|
||||
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod)));
|
||||
} else {
|
||||
throw new UserException("No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,26 +42,28 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
public class Tranche implements Comparable<Tranche> {
|
||||
private static final int CURRENT_VERSION = 4;
|
||||
private static final int CURRENT_VERSION = 5;
|
||||
|
||||
public double ts, minVQSLod, knownTiTv, novelTiTv;
|
||||
public int numKnown,numNovel;
|
||||
public String name;
|
||||
public VariantRecalibratorArgumentCollection.Mode model;
|
||||
|
||||
int accessibleTruthSites = 0;
|
||||
int callsAtTruthSites = 0;
|
||||
|
||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites) {
|
||||
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, "anonymous");
|
||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model) {
|
||||
this(ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, model, "anonymous");
|
||||
}
|
||||
|
||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, String name ) {
|
||||
public Tranche(double ts, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, int accessibleTruthSites, int callsAtTruthSites, VariantRecalibratorArgumentCollection.Mode model, String name ) {
|
||||
this.ts = ts;
|
||||
this.minVQSLod = minVQSLod;
|
||||
this.novelTiTv = novelTiTv;
|
||||
this.numNovel = numNovel;
|
||||
this.knownTiTv = knownTiTv;
|
||||
this.numKnown = numKnown;
|
||||
this.model = model;
|
||||
this.name = name;
|
||||
|
||||
this.accessibleTruthSites = accessibleTruthSites;
|
||||
|
|
@ -104,13 +106,13 @@ public class Tranche implements Comparable<Tranche> {
|
|||
|
||||
stream.println("# Variant quality score tranches file");
|
||||
stream.println("# Version number " + CURRENT_VERSION);
|
||||
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
|
||||
stream.println("targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,model,accessibleTruthSites,callsAtTruthSites,truthSensitivity");
|
||||
|
||||
Tranche prev = null;
|
||||
for ( Tranche t : tranches ) {
|
||||
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,TruthSensitivityTranche%.2fto%.2f,%d,%d,%.4f%n",
|
||||
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod,
|
||||
(prev == null ? 0.0 : prev.ts), t.ts, t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
|
||||
stream.printf("%.2f,%d,%d,%.4f,%.4f,%.4f,VQSRTranche%s%.2fto%.2f,%s,%d,%d,%.4f%n",
|
||||
t.ts, t.numKnown, t.numNovel, t.knownTiTv, t.novelTiTv, t.minVQSLod, t.model.toString(),
|
||||
(prev == null ? 0.0 : prev.ts), t.ts, t.model.toString(), t.accessibleTruthSites, t.callsAtTruthSites, t.getTruthSensitivity());
|
||||
prev = t;
|
||||
}
|
||||
|
||||
|
|
@ -157,11 +159,11 @@ public class Tranche implements Comparable<Tranche> {
|
|||
final String[] vals = line.split(",");
|
||||
if( header == null ) {
|
||||
header = vals;
|
||||
if ( header.length == 5 || header.length == 8 || header.length == 11 )
|
||||
if ( header.length == 5 || header.length == 8 || header.length == 10 )
|
||||
// old style tranches file, throw an error
|
||||
throw new UserException.MalformedFile(f, "Unfortunately, your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
|
||||
if ( header.length != 10 )
|
||||
throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
|
||||
throw new UserException.MalformedFile(f, "Unfortunately your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
|
||||
if ( header.length != 11 )
|
||||
throw new UserException.MalformedFile(f, "Expected 11 elements in header line " + line);
|
||||
} else {
|
||||
if ( header.length != vals.length )
|
||||
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
|
||||
|
|
@ -176,6 +178,7 @@ public class Tranche implements Comparable<Tranche> {
|
|||
getDouble(bindings,"novelTiTv", true),
|
||||
getInteger(bindings,"accessibleTruthSites", false),
|
||||
getInteger(bindings,"callsAtTruthSites", false),
|
||||
VariantRecalibratorArgumentCollection.parseString(bindings.get("model")),
|
||||
bindings.get("filterName")));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -139,11 +139,11 @@ public class TrancheManager {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] tranches, SelectionMetric metric ) {
|
||||
return findTranches( data, tranches, metric, null );
|
||||
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||
return findTranches( data, tranches, metric, model, null );
|
||||
}
|
||||
|
||||
public static List<Tranche> findTranches( ArrayList<VariantDatum> data, final double[] trancheThresholds, SelectionMetric metric, File debugFile ) {
|
||||
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
|
||||
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
|
||||
|
||||
Collections.sort(data);
|
||||
|
|
@ -153,7 +153,7 @@ public class TrancheManager {
|
|||
|
||||
List<Tranche> tranches = new ArrayList<Tranche>();
|
||||
for ( double trancheThreshold : trancheThresholds ) {
|
||||
Tranche t = findTranche(data, metric, trancheThreshold);
|
||||
Tranche t = findTranche(data, metric, trancheThreshold, model);
|
||||
|
||||
if ( t == null ) {
|
||||
if ( tranches.size() == 0 )
|
||||
|
|
@ -182,7 +182,7 @@ public class TrancheManager {
|
|||
}
|
||||
}
|
||||
|
||||
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold ) {
|
||||
public static Tranche findTranche( final List<VariantDatum> data, final SelectionMetric metric, final double trancheThreshold, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||
logger.info(String.format(" Tranche threshold %.2f => selection metric threshold %.3f", trancheThreshold, metric.getThreshold(trancheThreshold)));
|
||||
|
||||
double metricThreshold = metric.getThreshold(trancheThreshold);
|
||||
|
|
@ -190,7 +190,7 @@ public class TrancheManager {
|
|||
for ( int i = 0; i < n; i++ ) {
|
||||
if ( metric.getRunningMetric(i) >= metricThreshold ) {
|
||||
// we've found the largest group of variants with sensitivity >= our target truth sensitivity
|
||||
Tranche t = trancheOfVariants(data, i, trancheThreshold);
|
||||
Tranche t = trancheOfVariants(data, i, trancheThreshold, model);
|
||||
logger.info(String.format(" Found tranche for %.3f: %.3f threshold starting with variant %d; running score is %.3f ",
|
||||
trancheThreshold, metricThreshold, i, metric.getRunningMetric(i)));
|
||||
logger.info(String.format(" Tranche is %s", t));
|
||||
|
|
@ -201,7 +201,7 @@ public class TrancheManager {
|
|||
return null;
|
||||
}
|
||||
|
||||
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts ) {
|
||||
public static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
||||
|
||||
double minLod = data.get(minI).lod;
|
||||
|
|
@ -228,7 +228,7 @@ public class TrancheManager {
|
|||
int accessibleTruthSites = countCallsAtTruth(data, Double.NEGATIVE_INFINITY);
|
||||
int nCallsAtTruth = countCallsAtTruth(data, minLod);
|
||||
|
||||
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth);
|
||||
return new Tranche(ts, minLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, nCallsAtTruth, model);
|
||||
}
|
||||
|
||||
public static double fdrToTiTv(double desiredFDR, double targetTiTv) {
|
||||
|
|
|
|||
|
|
@ -342,7 +342,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
|
||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
|
||||
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric, VRAC.MODE );
|
||||
tranchesStream.print(Tranche.tranchesString( tranches ));
|
||||
|
||||
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -41,6 +42,13 @@ public class VariantRecalibratorArgumentCollection {
|
|||
BOTH
|
||||
}
|
||||
|
||||
static Mode parseString(final String input) {
|
||||
if( input.equals("SNP") ) { return Mode.SNP; }
|
||||
if( input.equals("INDEL") ) { return Mode.INDEL; }
|
||||
if( input.equals("BOTH") ) { return Mode.BOTH; }
|
||||
throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input);
|
||||
}
|
||||
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false)
|
||||
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
||||
@Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
|
||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
return TrancheManager.findTranches(vd, tranches, metric);
|
||||
return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"62f81e7d2082fbc71cae0101c27fefad", // tranches
|
||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||
"4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF
|
||||
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -70,9 +70,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||
"b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF
|
||||
"888eb042dd33b807bcbb8630896fda94"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
public Object[][] createData2() {
|
||||
|
|
@ -130,7 +130,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
|
||||
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue