Adding tranche plot generation back to VQSR
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5736 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e73720c2db
commit
5bade81c6d
|
|
@ -52,7 +52,7 @@ alpha = 1 - titvFPEstV(targetTITV, novelTiTv)
|
|||
|
||||
numGood = round(alpha * data2$numNovel);
|
||||
|
||||
#numGood = round(data2$numNovel * (1-data2$FDRtranche/100))
|
||||
#numGood = round(data2$numNovel * (1-data2$targetTruthSensitivity/100))
|
||||
numBad = data2$numNovel - numGood;
|
||||
|
||||
numPrevGood = leftShift(numGood, 0)
|
||||
|
|
@ -66,12 +66,12 @@ barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (100
|
|||
#abline(v= d[2,dim(d)[2]], lty=2)
|
||||
#abline(v= d[1,3], lty=2)
|
||||
if ( ! suppressLegend )
|
||||
legend(5, 1.75, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
|
||||
legend(5, length(data2$targetTruthSensitivity)-1.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
|
||||
|
||||
mtext("Ti/Tv",2,line=2.25,at=length(data2$FDRtranche)*1.2,las=1, cex=1)
|
||||
mtext("FNR",2,line=0,at=length(data2$FDRtranche)*1.2,las=1, cex=1)
|
||||
axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche, las=1, cex.axis=1.0)
|
||||
axis(2,line=1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=round(novelTiTv,3), las=1, cex.axis=1.0)
|
||||
mtext("Ti/Tv",2,line=2.25,at=length(data2$targetTruthSensitivity)*1.2,las=1, cex=1)
|
||||
mtext("truth",2,line=0,at=length(data2$targetTruthSensitivity)*1.2,las=1, cex=1)
|
||||
axis(2,line=-1,at=0.7+(0:(length(data2$targetTruthSensitivity)-1))*1.2,tick=FALSE,labels=data2$targetTruthSensitivity, las=1, cex.axis=1.0)
|
||||
axis(2,line=1,at=0.7+(0:(length(data2$targetTruthSensitivity)-1))*1.2,tick=FALSE,labels=round(novelTiTv,3), las=1, cex.axis=1.0)
|
||||
|
||||
# plot sensitivity vs. specificity
|
||||
sensitivity = data2$truthSensitivity
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -68,11 +69,13 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Output(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
|
||||
private PrintStream RECAL_FILE;
|
||||
@Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyRecalibration", required=true)
|
||||
private PrintStream TRANCHES_FILE;
|
||||
private File TRANCHES_FILE;
|
||||
|
||||
/////////////////////////////
|
||||
// Additional Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required=false)
|
||||
private double TARGET_TITV = 2.15;
|
||||
@Argument(fullName="use_annotation", shortName="an", doc="The names of the annotations which should used for calculations", required=true)
|
||||
private String[] USE_ANNOTATIONS = null;
|
||||
@Argument(fullName="TStranche", shortName="tranche", doc="The levels of novel false discovery rate (FDR, implied by ti/tv) at which to slice the data. (in percent, that is 1.0 for 1 percent)", required=false)
|
||||
|
|
@ -83,6 +86,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private String RSCRIPT_FILE = null;
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required=false)
|
||||
private String PATH_TO_RESOURCES = "R/";
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
|
||||
|
|
@ -97,6 +102,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
private VariantDataManager dataManager;
|
||||
private PrintStream tranchesStream;
|
||||
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
||||
private final Set<String> inputNames = new HashSet<String>();
|
||||
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
|
||||
|
|
@ -136,6 +142,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
if( inputNames.size() == 0 ) {
|
||||
throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
|
||||
}
|
||||
|
||||
try {
|
||||
tranchesStream = new PrintStream(TRANCHES_FILE);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -224,7 +236,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
|
||||
TRANCHES_FILE.print(Tranche.tranchesString( tranches ));
|
||||
tranchesStream.print(Tranche.tranchesString( tranches ));
|
||||
|
||||
double lodCutoff = 0.0;
|
||||
for( final Tranche tranche : tranches ) {
|
||||
|
|
@ -240,6 +252,20 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
createVisualizationScript( randomData, goodModel, badModel, lodCutoff );
|
||||
}
|
||||
|
||||
// Execute Rscript command to create the tranche plot
|
||||
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||
final String rScriptTranchesCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Tranches.R" + " " + TRANCHES_FILE.getAbsolutePath() + " " + TARGET_TITV;
|
||||
logger.info( rScriptTranchesCommandLine );
|
||||
|
||||
// Execute the RScript command to plot the table of truth values
|
||||
try {
|
||||
Process p;
|
||||
p = Runtime.getRuntime().exec( rScriptTranchesCommandLine );
|
||||
p.waitFor();
|
||||
} catch ( Exception e ) {
|
||||
Utils.warnUser("Unable to execute the RScript command. While not critical to the calculations themselves, the script outputs a report that is extremely useful for confirming that the recalibration proceded as expected. We highly recommend trying to rerun the script manually if possible.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue