From 16da5011c0e56c4aa0cd173a6e1f9ecaa02c5810 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Fri, 29 Jan 2010 21:58:31 +0000 Subject: [PATCH] Added a new option for indicating the mean number of variants on the AnalyzeAnnotations plots. This way one can say, for example, filtering at this point will keep 75 percent of all the variants. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2744 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_Annotations_BinnedTruthMetrics.R | 35 +++++++++++++++---- .../AnalyzeAnnotationsWalker.java | 5 ++- .../AnnotationDataManager.java | 14 ++++---- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/R/plot_Annotations_BinnedTruthMetrics.R b/R/plot_Annotations_BinnedTruthMetrics.R index e20191516..9c0eee3e9 100644 --- a/R/plot_Annotations_BinnedTruthMetrics.R +++ b/R/plot_Annotations_BinnedTruthMetrics.R @@ -6,6 +6,7 @@ verbose = TRUE input = args[1] annotationName = args[2] minBinCutoff = as.numeric(args[3]) +medianNumVariants = args[4] c <- read.table(input, header=T) @@ -14,11 +15,26 @@ novel = c[c$numVariants>minBinCutoff & c$category=="novel",] dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",] truth = c[c$numVariants>minBinCutoff & c$category=="truth",] +# +# Calculate min, max, medians +# + d = c[c$numVariants>minBinCutoff,] ymin = min(d$titv) ymax = max(d$titv) xmin = min(d$value) xmax = max(d$value) +m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) +ma = all[all$value > m,] +mb = all[all$value < m,] +m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) +m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) +if(medianNumVariants == "true") { +vc = cumsum( all$numVariants/sum(all$numVariants) ) +m25 = all$value[ max(which(vc<=0.25)) ] +m = all$value[ max(which(vc<=0.5)) ] +m75 = all$value[ min(which(vc>=0.75)) ] +} # # Plot TiTv ratio as a function of the annotation @@ -29,11 +45,6 @@ pdf(outfile, height=7, width=7) par(cex=1.1) plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) -m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) -ma = all[all$value > m,] -mb = all[all$value < m,] -m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) -m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) abline(v=m,lty=2) abline(v=m75,lty=2) abline(v=m25,lty=2) @@ -84,7 +95,6 @@ t = all[all$truePositive>0,] yLabel = "DBsnp/True Positive Rate" ymin = min(min(all$dbsnp),min(t$truePositive)) ymax = max(max(all$dbsnp),max(t$truePositive)) - } plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) @@ -126,6 +136,17 @@ dev.off() outfile = paste(input, ".Histogram.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14); +plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); +axis(1,axTicks(1), format(axTicks(1), scientific=F)) +dev.off() + +# +# Plot histogram of the annotation's value, log scale on x-axis +# + +outfile = paste(input, ".Histogram_log.pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); axis(1,axTicks(1), format(axTicks(1), scientific=F)) dev.off() \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index eccea7e6b..acbb2b10f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -65,6 +65,9 @@ public class AnalyzeAnnotationsWalker extends RodWalker { private String SAMPLE_NAME = null; @Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false) private String[] ANNOTATION_NAMES = null; + @Argument(fullName = "indicate_mean_num_vars", shortName = "meanNumVars", doc = "If supplied, plots will indicate the distribution of number of variants instead of distribution of value of annotation", required = false) + private boolean INDICATE_MEAN_NUM_VARS = false; + ///////////////////////////// @@ -89,7 +92,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker { nameMap.put(vals[0],vals[1]); } } - dataManager = new AnnotationDataManager( nameMap ); + dataManager = new AnnotationDataManager( nameMap, INDICATE_MEAN_NUM_VARS ); if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 55e456c18..d61d6a65d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -42,12 +42,14 @@ import java.io.FileNotFoundException; public class AnnotationDataManager { - public final HashMap> data; - public final HashMap nameMap; + private final HashMap> data; + private final HashMap nameMap; + private final boolean INDICATE_MEAN_NUM_VARS; - public AnnotationDataManager( HashMap _nameMap ) { + public AnnotationDataManager( HashMap _nameMap, boolean _INDICATE_MEAN_NUM_VARS ) { data = new HashMap>(); nameMap = _nameMap; + INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS; } public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { @@ -62,7 +64,7 @@ public class AnnotationDataManager { final Map infoField = variant.getInfoValues(); for( String annotationKey : infoField.keySet() ) { - float value = 0.0f; + float value; try { value = Float.parseFloat( infoField.get( annotationKey ) ); } catch( NumberFormatException e ) { @@ -140,13 +142,13 @@ public class AnnotationDataManager { output.close(); String annotationName = nameMap.get(annotationKey); - if( annotationName == null ) { // name is not in the map so use the key + if( annotationName == null ) { // This annotation is not in the name map so use the key instead annotationName = annotationKey; } // Print out the command line to make it clear to the user what is being executed and how one might modify it final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN; + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN + " " + INDICATE_MEAN_NUM_VARS; System.out.println( rScriptCommandLine ); // Execute the RScript command to plot the table of TiTv values