Moving AnalyzeAnnotations into the archive because it has outlived its usefulness.
This commit is contained in:
parent
43959e6780
commit
5faf40b79d
|
|
@ -1,190 +0,0 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
input = args[1]
|
||||
annotationName = args[2]
|
||||
minBinCutoff = as.numeric(args[3])
|
||||
medianNumVariants = args[4]
|
||||
|
||||
c <- read.table(input, header=T)
|
||||
|
||||
all = c[c$numVariants>minBinCutoff & c$category=="all",]
|
||||
novel = c[c$numVariants>minBinCutoff & c$category=="novel",]
|
||||
dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",]
|
||||
truth = c[c$numVariants>minBinCutoff & c$category=="truth",]
|
||||
|
||||
#
|
||||
# Calculate min, max, medians
|
||||
#
|
||||
|
||||
d = c[c$numVariants>minBinCutoff,]
|
||||
ymin = min(d$titv)
|
||||
ymax = max(d$titv)
|
||||
xmin = min(d$value)
|
||||
xmax = max(d$value)
|
||||
m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
|
||||
ma = all[all$value > m,]
|
||||
mb = all[all$value < m,]
|
||||
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
|
||||
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
|
||||
if(medianNumVariants == "true") {
|
||||
vc = cumsum( all$numVariants/sum(all$numVariants) )
|
||||
m10 = all$value[ max(which(vc<=0.10)) ]
|
||||
m25 = all$value[ max(which(vc<=0.25)) ]
|
||||
m = all$value[ max(which(vc<=0.5)) ]
|
||||
m75 = all$value[ min(which(vc>=0.75)) ]
|
||||
m90 = all$value[ min(which(vc>=0.90)) ]
|
||||
}
|
||||
|
||||
#
|
||||
# Plot TiTv ratio as a function of the annotation
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
points(novel$value,novel$titv,col="green",pch=20)
|
||||
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(truth$value,truth$titv,col="magenta",pch=20)
|
||||
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
|
||||
} else {
|
||||
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot TiTv ratio as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".TiTv_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
points(novel$value,novel$titv,col="green",pch=20)
|
||||
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(truth$value,truth$titv,col="magenta",pch=20)
|
||||
legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20))
|
||||
} else {
|
||||
legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot dbsnp and true positive rate as a function of the annotation
|
||||
#
|
||||
|
||||
ymin = min(all$dbsnp)
|
||||
ymax = max(all$dbsnp)
|
||||
outfile = paste(input, ".truthRate.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
t = all[all$truePositive>0,]
|
||||
yLabel = "DBsnp/True Positive Rate"
|
||||
ymin = min(min(all$dbsnp),min(t$truePositive))
|
||||
ymax = max(max(all$dbsnp),max(t$truePositive))
|
||||
}
|
||||
plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(t$value,t$truePositive,col="magenta",pch=20);
|
||||
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".truthRate_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
yLabel = "DBsnp Rate"
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
yLabel = "DBsnp/Truth Rate"
|
||||
}
|
||||
plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,ylim=c(ymin,ymax),pch=20,xaxt="n",ps=14);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
abline(v=m,lty=2,col="red")
|
||||
abline(v=m75,lty=3)
|
||||
abline(v=m25,lty=3)
|
||||
text(m, ymin, "50", col="red", cex=0.6);
|
||||
text(m75, ymin, "75", col="black", cex=0.6);
|
||||
text(m25, ymin, "25", col="black", cex=0.6);
|
||||
if(medianNumVariants == "true") {
|
||||
abline(v=m90,lty=3)
|
||||
abline(v=m10,lty=3)
|
||||
text(m10, ymin, "10", col="black", cex=0.6);
|
||||
text(m90, ymin, "90", col="black", cex=0.6);
|
||||
}
|
||||
if( sum(all$truePositive==0) != length(all$truePositive) ) {
|
||||
points(t$value,t$truePositive,col="magenta",pch=20);
|
||||
legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot histogram of the annotation's value
|
||||
#
|
||||
|
||||
outfile = paste(input, ".Histogram.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
dev.off()
|
||||
|
||||
#
|
||||
# Plot histogram of the annotation's value, log scale on x-axis
|
||||
#
|
||||
|
||||
outfile = paste(input, ".Histogram_log.pdf", sep="")
|
||||
pdf(outfile, height=7, width=7)
|
||||
par(cex=1.1)
|
||||
plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4);
|
||||
axis(1,axTicks(1), format(axTicks(1), scientific=F))
|
||||
dev.off()
|
||||
|
|
@ -1,154 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field.
|
||||
*
|
||||
* @author rpoplin
|
||||
* @since Jan 15, 2010
|
||||
* @help.summary Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field.
|
||||
*/
|
||||
|
||||
public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false)
|
||||
private String OUTPUT_PREFIX = "analyzeAnnotations/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
private String PATH_TO_RESOURCES = "R/";
|
||||
@Argument(fullName = "min_variants_per_bin", shortName = "minBinSize", doc = "The minimum number of variants in a bin in order to calculate truth metrics.", required = false)
|
||||
private int MIN_VARIANTS_PER_BIN = 1000;
|
||||
@Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false)
|
||||
private int MAX_VARIANTS_PER_BIN = 20000;
|
||||
@Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false)
|
||||
private String SAMPLE_NAME = null;
|
||||
@Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false)
|
||||
private String[] ANNOTATION_NAMES = null;
|
||||
@Argument(fullName = "indicate_mean_num_vars", shortName = "meanNumVars", doc = "If supplied, plots will indicate the distribution of number of variants instead of distribution of value of annotation", required = false)
|
||||
private boolean INDICATE_MEAN_NUM_VARS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
private AnnotationDataManager dataManager;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public void initialize() {
|
||||
|
||||
// Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots
|
||||
HashMap<String,String> nameMap = null;
|
||||
if( ANNOTATION_NAMES != null ) {
|
||||
nameMap = new HashMap<String,String>();
|
||||
for( String nameLine : ANNOTATION_NAMES ) {
|
||||
String[] vals = nameLine.split(",");
|
||||
nameMap.put(vals[0],vals[1]);
|
||||
}
|
||||
}
|
||||
dataManager = new AnnotationDataManager( nameMap, INDICATE_MEAN_NUM_VARS );
|
||||
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
if( tracker != null ) {
|
||||
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
|
||||
|
||||
// First find out if this variant is in the truth sets
|
||||
boolean isInTruthSet = false;
|
||||
boolean isTrueVariant = false;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if( vc != null && vc.isSNP() && !vc.isFiltered() ) {
|
||||
if( vc.getSource().toUpperCase().startsWith("TRUTH") ) {
|
||||
isInTruthSet = true;
|
||||
if( vc.isBiallelic() && vc.isVariant() ) {
|
||||
isTrueVariant = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add each annotation in this VCF Record to the dataManager
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if( vc != null && vc.isSNP() && vc.isBiallelic() && !vc.isFiltered() ) {
|
||||
if( !vc.getSource().toUpperCase().startsWith("TRUTH") ) {
|
||||
if( vc.isVariant() ) {
|
||||
dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1; // This value isn't actually used for anything
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0; // Nothing to do here
|
||||
}
|
||||
|
||||
public Integer reduce( Integer value, Integer sum ) {
|
||||
return 0; // Nothing to do here
|
||||
}
|
||||
|
||||
public void onTraversalDone( Integer sum ) {
|
||||
|
||||
// For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables
|
||||
dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jan 18, 2010
|
||||
*/
|
||||
|
||||
public class AnnotationDataManager {
|
||||
|
||||
private final HashMap<String, TreeSet<AnnotationDatum>> data;
|
||||
private final HashMap<String,String> nameMap;
|
||||
private final boolean INDICATE_MEAN_NUM_VARS;
|
||||
|
||||
public AnnotationDataManager( HashMap<String,String> _nameMap, boolean _INDICATE_MEAN_NUM_VARS ) {
|
||||
data = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||
nameMap = _nameMap;
|
||||
INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS;
|
||||
}
|
||||
|
||||
public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
|
||||
|
||||
if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName
|
||||
if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out
|
||||
return;
|
||||
}
|
||||
} // else, process all samples
|
||||
|
||||
// Loop over each annotation in the vcf record
|
||||
final Map<String,Object> infoField = new HashMap<String, Object>(vc.getAttributes());
|
||||
infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations
|
||||
for( Map.Entry<String, Object> annotation : infoField.entrySet() ) {
|
||||
|
||||
float value;
|
||||
try {
|
||||
value = Float.parseFloat( annotation.getValue().toString() );
|
||||
} catch( NumberFormatException e ) {
|
||||
continue; // Skip over annotations that aren't floats, like "DB"
|
||||
}
|
||||
|
||||
TreeSet<AnnotationDatum> treeSet = data.get( annotation.getKey() );
|
||||
if( treeSet == null ) { // This annotation hasn't been seen before
|
||||
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
|
||||
data.put( annotation.getKey(), treeSet );
|
||||
}
|
||||
AnnotationDatum datum = new AnnotationDatum( value );
|
||||
if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present
|
||||
datum = treeSet.tailSet(datum).first();
|
||||
} else {
|
||||
treeSet.add(datum);
|
||||
}
|
||||
|
||||
final boolean isNovelVariant = !vc.hasID() || !vc.getID().contains("rs");
|
||||
|
||||
// Decide if the variant is a transition or transversion
|
||||
if( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 ) {
|
||||
datum.incrementTi( isNovelVariant, isInTruthSet, isTrueVariant );
|
||||
} else {
|
||||
datum.incrementTv( isNovelVariant, isInTruthSet, isTrueVariant );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX,
|
||||
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
|
||||
|
||||
final AnnotationDatum thisAnnotationBin = new AnnotationDatum();
|
||||
System.out.println( "\nFinished reading variants into memory. Executing RScript commands:" );
|
||||
|
||||
// For each annotation we've seen
|
||||
for( final String annotationKey : data.keySet() ) {
|
||||
|
||||
String filename = OUTPUT_PREFIX + annotationKey + ".dat";
|
||||
PrintStream output;
|
||||
try {
|
||||
output = new PrintStream(filename); // Create the intermediate data file for this annotation
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(new File(filename), "Can't create intermediate output annotation data file. Does the output directory exist?", e);
|
||||
}
|
||||
|
||||
// Output a header line
|
||||
output.println("value\ttitv\tdbsnp\ttruePositive\tnumVariants\tcategory");
|
||||
|
||||
// Bin SNPs and calculate truth metrics for each bin
|
||||
thisAnnotationBin.clearBin();
|
||||
for( final AnnotationDatum datum : data.get( annotationKey ) ) {
|
||||
thisAnnotationBin.combine( datum );
|
||||
if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() +
|
||||
"\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth");
|
||||
thisAnnotationBin.clearBin();
|
||||
}
|
||||
// else, continue accumulating variants because this bin isn't full yet
|
||||
}
|
||||
|
||||
// One final bin that may not have been dumped out
|
||||
if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) != 0 ) {
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() +
|
||||
"\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp");
|
||||
output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth");
|
||||
thisAnnotationBin.clearBin();
|
||||
}
|
||||
|
||||
// Close the PrintStream
|
||||
output.close();
|
||||
|
||||
String annotationName = null;
|
||||
if( nameMap != null ) {
|
||||
annotationName = nameMap.get(annotationKey);
|
||||
}
|
||||
if( annotationName == null ) { // This annotation is not in the name map so use the key instead
|
||||
annotationName = annotationKey;
|
||||
}
|
||||
|
||||
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " +
|
||||
OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN + " " + INDICATE_MEAN_NUM_VARS;
|
||||
System.out.println( rScriptCommandLine );
|
||||
|
||||
// Execute the RScript command to plot the table of truth values
|
||||
try {
|
||||
Runtime.getRuntime().exec( rScriptCommandLine );
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CannotExecuteRScript( rScriptCommandLine, e );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,170 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.analyzeannotations;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jan 18, 2010
|
||||
*/
|
||||
|
||||
public class AnnotationDatum implements Comparator<AnnotationDatum> {
|
||||
|
||||
public float value;
|
||||
private final int[] ti;
|
||||
private final int[] tv;
|
||||
|
||||
public static final int FULL_SET = 0;
|
||||
public static final int NOVEL_SET = 1;
|
||||
public static final int DBSNP_SET = 2;
|
||||
public static final int TRUTH_SET = 3;
|
||||
public static final int TRUE_POSITIVE = 4;
|
||||
private static final int NUM_SETS = 5;
|
||||
|
||||
public AnnotationDatum() {
|
||||
|
||||
value = 0.0f;
|
||||
ti = new int[NUM_SETS];
|
||||
tv = new int[NUM_SETS];
|
||||
for( int iii = 0; iii < NUM_SETS; iii++ ) {
|
||||
ti[iii] = 0;
|
||||
tv[iii] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public AnnotationDatum( final float _value ) {
|
||||
|
||||
value = _value;
|
||||
ti = new int[NUM_SETS];
|
||||
tv = new int[NUM_SETS];
|
||||
for( int iii = 0; iii < NUM_SETS; iii++ ) {
|
||||
ti[iii] = 0;
|
||||
tv[iii] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
final public void incrementTi( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) {
|
||||
|
||||
ti[FULL_SET]++;
|
||||
if( isNovelVariant ) {
|
||||
ti[NOVEL_SET]++;
|
||||
} else { // Is known, in DBsnp
|
||||
ti[DBSNP_SET]++;
|
||||
}
|
||||
if( isInTruthSet ) {
|
||||
ti[TRUTH_SET]++;
|
||||
if( isTrueVariant ) {
|
||||
ti[TRUE_POSITIVE]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final public void incrementTv( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) {
|
||||
|
||||
tv[FULL_SET]++;
|
||||
if( isNovelVariant ) {
|
||||
tv[NOVEL_SET]++;
|
||||
} else { // Is known, in DBsnp
|
||||
tv[DBSNP_SET]++;
|
||||
}
|
||||
if( isInTruthSet ) {
|
||||
tv[TRUTH_SET]++;
|
||||
if( isTrueVariant ) {
|
||||
tv[TRUE_POSITIVE]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final public void combine( final AnnotationDatum that ) {
|
||||
|
||||
for( int iii = 0; iii < NUM_SETS; iii++ ) {
|
||||
this.ti[iii] += that.ti[iii];
|
||||
this.tv[iii] += that.tv[iii];
|
||||
}
|
||||
this.value = that.value; // Overwrite this bin's value
|
||||
}
|
||||
|
||||
final public float calcTiTv( final int INDEX ) {
|
||||
|
||||
if( ti[INDEX] < 0 || tv[INDEX] < 0 ) {
|
||||
throw new ReviewedStingException( "Integer overflow detected! There are too many variants piled up in one annotation bin." );
|
||||
}
|
||||
|
||||
if( tv[INDEX] == 0 ) { // Don't divide by zero
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
return ((float) ti[INDEX]) / ((float) tv[INDEX]);
|
||||
}
|
||||
|
||||
final public float calcDBsnpRate() {
|
||||
|
||||
if( ti[FULL_SET] + tv[FULL_SET] == 0 ) { // Don't divide by zero
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
return 100.0f * ((float) ti[DBSNP_SET] + tv[DBSNP_SET]) /
|
||||
((float) ti[FULL_SET] + tv[FULL_SET]);
|
||||
}
|
||||
|
||||
final public float calcTPrate() {
|
||||
|
||||
if( ti[TRUTH_SET] + tv[TRUTH_SET] == 0 ) { // Don't divide by zero
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
return 100.0f * ((float) ti[TRUE_POSITIVE] + tv[TRUE_POSITIVE]) /
|
||||
((float) ti[TRUTH_SET] + tv[TRUTH_SET]);
|
||||
}
|
||||
|
||||
final public int numVariants( final int INDEX ) {
|
||||
return ti[INDEX] + tv[INDEX];
|
||||
}
|
||||
|
||||
final public void clearBin() {
|
||||
value = 0.0f;
|
||||
for( int iii = 0; iii < NUM_SETS; iii++ ) {
|
||||
ti[iii] = 0;
|
||||
tv[iii] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public int compare( AnnotationDatum a1, AnnotationDatum a2 ) { // Function needed for this to be a Comparator
|
||||
if( a1.value < a2.value ) { return -1; }
|
||||
if( a1.value > a2.value ) { return 1; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
public int equals( AnnotationDatum that ) { // Function needed for this to be sorted correctly in a TreeSet
|
||||
if( this.value < that.value ) { return -1; }
|
||||
if( this.value > that.value ) { return 1; }
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,6 @@
|
|||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java" />
|
||||
<file name="public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java" />
|
||||
<file name="public/R/plot_Annotations_BinnedTruthMetrics.R" />
|
||||
<file name="public/R/plot_Tranches.R" />
|
||||
<file name="public/R/titvFPEst.R" />
|
||||
</resources>
|
||||
|
|
|
|||
Loading…
Reference in New Issue