From 4bcdab580cabaefde3e36e9a3ab87f93fc6f81ed Mon Sep 17 00:00:00 2001 From: rpoplin Date: Wed, 27 Jan 2010 04:50:54 +0000 Subject: [PATCH] --output_dir has been changed to --output_prefix to give the user more control over the names of the resulting mass of files in AnalyzeAnnotations. The fontsize of the axes is increased. Cumulative filtering plots are removed since the binned filtering plots are much more useful. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2700 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_Annotations_BinnedTiTv.R | 17 +++-- R/plot_Annotations_CumulativeTiTv.R | 30 -------- .../AnalyzeAnnotationsWalker.java | 16 +---- .../AnnotationDataManager.java | 69 +++---------------- 4 files changed, 21 insertions(+), 111 deletions(-) delete mode 100644 R/plot_Annotations_CumulativeTiTv.R diff --git a/R/plot_Annotations_BinnedTiTv.R b/R/plot_Annotations_BinnedTiTv.R index 88e9597a3..99596aa5e 100644 --- a/R/plot_Annotations_BinnedTiTv.R +++ b/R/plot_Annotations_BinnedTiTv.R @@ -4,9 +4,8 @@ args <- commandArgs(TRUE) verbose = TRUE input = args[1] -outputDir = args[2] -annotationName = args[3] -minBinCutoff = as.numeric(args[4]) +annotationName = args[2] +minBinCutoff = as.numeric(args[3]) c <- read.table(input, header=T) @@ -25,10 +24,10 @@ ymax = max(d$titv) xmin = min(d$value) xmax = max(d$value) -outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="") +outfile = paste(input, ".TiTv.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n"); +plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) ma = all[all$value > m,] @@ -43,10 +42,10 @@ points(dbsnp$value,dbsnp$titv,col="blue",pch=20) legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) dev.off() -outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="") +outfile = paste(input, ".TiTv_log.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n"); +plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) abline(v=m,lty=2) abline(v=m75,lty=2) @@ -57,9 +56,9 @@ legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(2 dev.off() -outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="") +outfile = paste(input, "TiTv_hist.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) -plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h",xaxt="n"); +plot(all$value,all$numVariants,xlab=annotationName,ylab="num variants in bin",type="h",xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) dev.off() \ No newline at end of file diff --git a/R/plot_Annotations_CumulativeTiTv.R b/R/plot_Annotations_CumulativeTiTv.R deleted file mode 100644 index 39ad292e5..000000000 --- a/R/plot_Annotations_CumulativeTiTv.R +++ /dev/null @@ -1,30 +0,0 @@ -#!/broad/tools/apps/R-2.6.0/bin/Rscript - -args <- commandArgs(TRUE) -verbose = TRUE - -input = args[1] -outputDir = args[2] -annotationName = args[3] - - -c <- read.table(input, header=T) - -# -# Plot cumulative Ti/Tv ratio as a function of the annotation -# - -gt = c[c$GT==1 & c$numVariants>1000,] -lt = c[c$GT==0 & c$numVariants>1000,] - -outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(gt$value,gt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"> x",sep=" "),pch=20); -dev.off() - -outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(lt$value,lt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"< x",sep=" "),pch=20); -dev.off() \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 58bcf5d38..7a27ef31d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -48,8 +48,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker { ///////////////////////////// // Command Line Arguments ///////////////////////////// - @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) - private String OUTPUT_DIR = "analyzeAnnotations/"; + @Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false) + private String OUTPUT_PREFIX = "analyzeAnnotations/"; @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) @@ -73,18 +73,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker { // //--------------------------------------------------------------------------------------------------------------- - /** - * Create the output directory and setup the path variables - */ public void initialize() { - // create the output directory where all the data tables and plots will go - try { - Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR); - } catch (IOException e) { - throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR); - } - if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; } if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } } @@ -128,6 +118,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker { public void onTraversalDone( Integer sum ) { // For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables - dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_DIR, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN); + dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index cd3be3640..d022b66e8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -138,55 +138,19 @@ public class AnnotationDataManager { } } - public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR, + public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX, final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) { + System.out.println( "\nExecuting RScript commands:" ); + for( String annotationKey: dataFull.keySet() ) { - /* PrintStream output; try { - output = new PrintStream(OUTPUT_DIR + annotationKey + ".cumulative.dat"); + output = new PrintStream(OUTPUT_PREFIX + annotationKey + ".dat"); } catch (FileNotFoundException e) { - throw new StingException("Can't create intermediate output annotation data file."); - } - // Output a header line - output.println("value\tcumulativeTiTv\tnumVariants\tGT"); - - // Filter SNPs greater than this annotation value - int numTi = 0; - int numTv = 0; - for( AnnotationDatum datum : data.get( annotationKey ) ) { - numTi += datum.numTransitions; - numTv += datum.numTransversions; - float titv; - if( numTv == 0) { titv = 0.0f; } - else { titv = ((float) numTi) / ((float) numTv); } - output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t1"); - - } - - // Filter SNPs less than this annotation value - numTi = 0; - numTv = 0; - Iterator iter = data.get( annotationKey ).descendingIterator(); - while( iter.hasNext() ) { - final AnnotationDatum datum = iter.next(); - numTi += datum.numTransitions; - numTv += datum.numTransversions; - float titv; - if( numTv == 0) { titv = 0.0f; } - else { titv = ((float) numTi) / ((float) numTv); } - output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t0"); - - } - */ - - PrintStream output; - try { - output = new PrintStream(OUTPUT_DIR + annotationKey + ".binned.dat"); - } catch (FileNotFoundException e) { - throw new StingException("Can't create intermediate output annotation data file."); + throw new StingException("Can't create intermediate output annotation data file. Does the output directory exist? " + + OUTPUT_PREFIX + annotationKey + ".dat"); } // Output a header line @@ -266,30 +230,17 @@ public class AnnotationDataManager { output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t2"); } - - /* - System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey); - - try { - Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".cumulative.dat"+ " " + OUTPUT_DIR + " " + annotationKey); - } catch (Exception e) { - throw new StingException("Unable to execute RScript command"); - } - */ + output.close(); System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey + - " " + MIN_VARIANTS_PER_BIN); + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN); // Execute the RScript command to plot the table of TiTv values try { final Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey + - " " + MIN_VARIANTS_PER_BIN); + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN); } catch (Exception e) { - throw new StingException("Unable to execute RScript command"); + throw new StingException( "Unable to execute RScript command" ); } } }