diff --git a/R/plot_Annotations_BinnedTruthMetrics.R b/R/plot_Annotations_BinnedTruthMetrics.R index e20191516..9c0eee3e9 100644 --- a/R/plot_Annotations_BinnedTruthMetrics.R +++ b/R/plot_Annotations_BinnedTruthMetrics.R @@ -6,6 +6,7 @@ verbose = TRUE input = args[1] annotationName = args[2] minBinCutoff = as.numeric(args[3]) +medianNumVariants = args[4] c <- read.table(input, header=T) @@ -14,11 +15,26 @@ novel = c[c$numVariants>minBinCutoff & c$category=="novel",] dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",] truth = c[c$numVariants>minBinCutoff & c$category=="truth",] +# +# Calculate min, max, medians +# + d = c[c$numVariants>minBinCutoff,] ymin = min(d$titv) ymax = max(d$titv) xmin = min(d$value) xmax = max(d$value) +m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) +ma = all[all$value > m,] +mb = all[all$value < m,] +m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) +m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) +if(medianNumVariants == "true") { +vc = cumsum( all$numVariants/sum(all$numVariants) ) +m25 = all$value[ max(which(vc<=0.25)) ] +m = all$value[ max(which(vc<=0.5)) ] +m75 = all$value[ min(which(vc>=0.75)) ] +} # # Plot TiTv ratio as a function of the annotation @@ -29,11 +45,6 @@ pdf(outfile, height=7, width=7) par(cex=1.1) plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) -m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) -ma = all[all$value > m,] -mb = all[all$value < m,] -m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) -m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) abline(v=m,lty=2) abline(v=m75,lty=2) abline(v=m25,lty=2) @@ -84,7 +95,6 @@ t = all[all$truePositive>0,] yLabel = "DBsnp/True Positive Rate" ymin = min(min(all$dbsnp),min(t$truePositive)) ymax = max(max(all$dbsnp),max(t$truePositive)) - } plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) @@ -126,6 +136,17 @@ dev.off() outfile = paste(input, ".Histogram.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14); +plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); +axis(1,axTicks(1), format(axTicks(1), scientific=F)) +dev.off() + +# +# Plot histogram of the annotation's value, log scale on x-axis +# + +outfile = paste(input, ".Histogram_log.pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); axis(1,axTicks(1), format(axTicks(1), scientific=F)) dev.off() \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index eccea7e6b..acbb2b10f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -65,6 +65,9 @@ public class AnalyzeAnnotationsWalker extends RodWalker { private String SAMPLE_NAME = null; @Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false) private String[] ANNOTATION_NAMES = null; + @Argument(fullName = "indicate_mean_num_vars", shortName = "meanNumVars", doc = "If supplied, plots will indicate the distribution of number of variants instead of distribution of value of annotation", required = false) + private boolean INDICATE_MEAN_NUM_VARS = false; + ///////////////////////////// @@ -89,7 +92,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker { nameMap.put(vals[0],vals[1]); } } - dataManager = new AnnotationDataManager( nameMap ); + dataManager = new AnnotationDataManager( nameMap, INDICATE_MEAN_NUM_VARS ); if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 55e456c18..d61d6a65d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -42,12 +42,14 @@ import java.io.FileNotFoundException; public class AnnotationDataManager { - public final HashMap> data; - public final HashMap nameMap; + private final HashMap> data; + private final HashMap nameMap; + private final boolean INDICATE_MEAN_NUM_VARS; - public AnnotationDataManager( HashMap _nameMap ) { + public AnnotationDataManager( HashMap _nameMap, boolean _INDICATE_MEAN_NUM_VARS ) { data = new HashMap>(); nameMap = _nameMap; + INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS; } public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { @@ -62,7 +64,7 @@ public class AnnotationDataManager { final Map infoField = variant.getInfoValues(); for( String annotationKey : infoField.keySet() ) { - float value = 0.0f; + float value; try { value = Float.parseFloat( infoField.get( annotationKey ) ); } catch( NumberFormatException e ) { @@ -140,13 +142,13 @@ public class AnnotationDataManager { output.close(); String annotationName = nameMap.get(annotationKey); - if( annotationName == null ) { // name is not in the map so use the key + if( annotationName == null ) { // This annotation is not in the name map so use the key instead annotationName = annotationKey; } // Print out the command line to make it clear to the user what is being executed and how one might modify it final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN; + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN + " " + INDICATE_MEAN_NUM_VARS; System.out.println( rScriptCommandLine ); // Execute the RScript command to plot the table of TiTv values