diff --git a/R/plot_Annotations_BinnedTruthMetrics.R b/R/plot_Annotations_BinnedTruthMetrics.R index 6e68ac88d..e20191516 100644 --- a/R/plot_Annotations_BinnedTruthMetrics.R +++ b/R/plot_Annotations_BinnedTruthMetrics.R @@ -24,7 +24,7 @@ xmax = max(d$value) # Plot TiTv ratio as a function of the annotation # -outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="") +outfile = paste(input, ".TiTv.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); @@ -51,7 +51,7 @@ dev.off() # Plot TiTv ratio as a function of the annotation, log scale on the x-axis # -outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="") +outfile = paste(input, ".TiTv_log.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); @@ -75,7 +75,7 @@ dev.off() ymin = min(all$dbsnp) ymax = max(all$dbsnp) -outfile = paste(input, ".truthRate.", annotationName, ".pdf", sep="") +outfile = paste(input, ".truthRate.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) yLabel = "DBsnp Rate" @@ -101,7 +101,7 @@ dev.off() # Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis # -outfile = paste(input, ".truthRate_log.", annotationName, ".pdf", sep="") +outfile = paste(input, ".truthRate_log.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) yLabel = "DBsnp Rate" @@ -123,7 +123,7 @@ dev.off() # Plot histogram of the annotation's value # -outfile = paste(input, "annotationHistogram.", annotationName, ".pdf", sep="") +outfile = paste(input, ".Histogram.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java index 2d21dd3ea..86b5e9e7d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java @@ -32,7 +32,7 @@ import net.sf.samtools.SAMRecord; * User: rpoplin * Date: Jan 29, 2010 * - * The number of previous N bases (along the direction of the read) that contain G's and C's. + * The number of previous N bases (along the direction of the read) that contain G's and C's. The goal is to correct for dye slippage. * Only valid for Illumina reads. Otherwise return -1. */ diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index afb7feb92..eccea7e6b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -11,6 +11,8 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import java.util.HashMap; + /* * Copyright (c) 2010 The Broad Institute * @@ -61,12 +63,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker { private int MAX_VARIANTS_PER_BIN = 20000; @Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false) private String SAMPLE_NAME = null; + @Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false) + private String[] ANNOTATION_NAMES = null; ///////////////////////////// // Private Member Variables ///////////////////////////// - private final AnnotationDataManager dataManager = new AnnotationDataManager(); + private AnnotationDataManager dataManager; //--------------------------------------------------------------------------------------------------------------- // @@ -76,6 +80,17 @@ public class AnalyzeAnnotationsWalker extends RodWalker { public void initialize() { + // Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots + HashMap nameMap = null; + if( ANNOTATION_NAMES != null ) { + nameMap = new HashMap(); + for( String nameLine : ANNOTATION_NAMES ) { + String[] vals = nameLine.split(","); + nameMap.put(vals[0],vals[1]); + } + } + dataManager = new AnnotationDataManager( nameMap ); + if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } } @@ -89,13 +104,14 @@ public class AnalyzeAnnotationsWalker extends RodWalker { if( tracker != null ) { - // First find out if this variant is in the truth set + // First find out if this variant is in the truth sets boolean isInTruthSet = false; boolean isTrueVariant = false; for( ReferenceOrderedDatum rod : tracker.getAllRods() ) { if( rod != null && rod.getName().toUpperCase().startsWith("TRUTH") ) { isInTruthSet = true; + // Next see if the truth sets say this site is variant or reference if( rod instanceof RodVCF ) { if( ((RodVCF) rod).isSNP() ) { isTrueVariant = true; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 53e9bbc90..55e456c18 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -43,9 +43,11 @@ import java.io.FileNotFoundException; public class AnnotationDataManager { public final HashMap> data; + public final HashMap nameMap; - public AnnotationDataManager() { + public AnnotationDataManager( HashMap _nameMap ) { data = new HashMap>(); + nameMap = _nameMap; } public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { @@ -136,10 +138,15 @@ public class AnnotationDataManager { // Close the PrintStream output.close(); + + String annotationName = nameMap.get(annotationKey); + if( annotationName == null ) { // name is not in the map so use the key + annotationName = annotationKey; + } // Print out the command line to make it clear to the user what is being executed and how one might modify it final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN; + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN; System.out.println( rScriptCommandLine ); // Execute the RScript command to plot the table of TiTv values