diff --git a/R/plot_Annotations_BinnedTiTv.R b/R/plot_Annotations_BinnedTiTv.R new file mode 100644 index 000000000..01490b4bd --- /dev/null +++ b/R/plot_Annotations_BinnedTiTv.R @@ -0,0 +1,47 @@ +#!/broad/tools/apps/R-2.6.0/bin/Rscript + +args <- commandArgs(TRUE) +verbose = TRUE + +input = args[1] +outputDir = args[2] +annotationName = args[3] +minBinCutoff = as.numeric(args[4]) + + +c <- read.table(input, header=T) + +# +# Plot TiTv ratio as a function of the annotation +# + +gt = c[c$numVariants>minBinCutoff,] + +outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20); +m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants)) +ma = gt[gt$value > m,] +mb = gt[gt$value < m,] +m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) +m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) +abline(v=m,lty=2) +abline(v=m75,lty=2) +abline(v=m25,lty=2) +dev.off() + +outfile = paste(outputDir, "binnedTiTv_quartiles.", annotationName, ".pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,xlim=c(0,80)) +abline(v=m,lty=2) +abline(v=m75,lty=2) +abline(v=m25,lty=2) +dev.off() + +outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="") +pdf(outfile, height=7, width=7) +par(cex=1.1) +plot(gt$value,gt$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h"); +dev.off() \ No newline at end of file diff --git a/R/plot_Annotations_CumulativeTiTv.R b/R/plot_Annotations_CumulativeTiTv.R index 6b981b2e1..39ad292e5 100644 --- a/R/plot_Annotations_CumulativeTiTv.R +++ b/R/plot_Annotations_CumulativeTiTv.R @@ -4,25 +4,26 @@ args <- commandArgs(TRUE) verbose = TRUE input = args[1] -annotationName = args[2] +outputDir = args[2] +annotationName = args[3] c <- read.table(input, header=T) # -# Plot residual error as a function of the covariate +# Plot cumulative Ti/Tv ratio as a function of the annotation # gt = c[c$GT==1 & c$numVariants>1000,] lt = c[c$GT==0 & c$numVariants>1000,] -outfile = paste(input, ".cumulativeTiTv_", annotationName, "_GTfilter.pdf", sep="") +outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) plot(gt$value,gt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"> x",sep=" "),pch=20); dev.off() -outfile = paste(input, ".cumulativeTiTv_", annotationName, "_LTfilter.pdf", sep="") +outfile = paste(outputDir, "cumulativeTiTv.", annotationName, ".GTfilter.pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) plot(lt$value,lt$cumulativeTiTv,xlab=annotationName,ylab="Ti/Tv ratio",main=paste("Filter out SNPs with",annotationName,"< x",sep=" "),pch=20); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 353830a70..7b647385d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -52,6 +52,11 @@ public class AnalyzeAnnotationsWalker extends RodWalker { private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) private String PATH_TO_RESOURCES = "R/"; + @Argument(fullName = "min_variants_per_bin", shortName = "minBinSize", doc = "The minimum number of variants in a bin in order to calculate truth metrics.", required = false) + private int MIN_VARIANTS_PER_BIN = 1000; + @Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false) + private int MAX_VARIANTS_PER_BIN = 20000; + ///////////////////////////// // Private Member Variables @@ -116,6 +121,6 @@ public class AnalyzeAnnotationsWalker extends RodWalker { public void onTraversalDone( Integer sum ) { // For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables - dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_DIR); + dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_DIR, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index ce903a8ee..2638adfb8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -52,11 +52,14 @@ public class AnnotationDataManager { // Loop over each annotation in the vcf record final Map infoField = variant.getInfoValues(); for( String annotationKey : infoField.keySet() ) { + if( annotationKey.equalsIgnoreCase("AC") ) { + continue; // This annotation isn't a float value + } final float value = Float.parseFloat( infoField.get( annotationKey ) ); TreeSet treeSet = data.get( annotationKey ); if( treeSet == null ) { // This annotation hasn't been seen before - treeSet = new TreeSet( new AnnotationDatum() ); + treeSet = new TreeSet( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation data.put( annotationKey, treeSet ); } AnnotationDatum datum = new AnnotationDatum( value ); @@ -66,6 +69,7 @@ public class AnnotationDataManager { treeSet.add(datum); } + // Decide if it was a Ti or a Tv if( BaseUtils.isTransition(variant.getReferenceForSNP(), variant.getAlternativeBaseForSNP()) ) { datum.incrementTi(); } else { @@ -74,21 +78,25 @@ public class AnnotationDataManager { } } - public void plotCumulativeTables(final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR) { + public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR, + final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) { for( String annotationKey: data.keySet() ) { + + /* PrintStream output; try { - output = new PrintStream(OUTPUT_DIR + annotationKey + ".dat"); + output = new PrintStream(OUTPUT_DIR + annotationKey + ".cumulative.dat"); } catch (FileNotFoundException e) { throw new StingException("Can't create intermediate output annotation data file."); } // Output a header line output.println("value\tcumulativeTiTv\tnumVariants\tGT"); + // Filter SNPs greater than this annotation value int numTi = 0; int numTv = 0; - for( AnnotationDatum datum : data.get( annotationKey )) { + for( AnnotationDatum datum : data.get( annotationKey ) ) { numTi += datum.numTransitions; numTv += datum.numTransversions; float titv; @@ -98,6 +106,7 @@ public class AnnotationDataManager { } + // Filter SNPs less than this annotation value numTi = 0; numTv = 0; Iterator iter = data.get( annotationKey ).descendingIterator(); @@ -111,15 +120,66 @@ public class AnnotationDataManager { output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) +"\t0"); } + */ + PrintStream output; + try { + output = new PrintStream(OUTPUT_DIR + annotationKey + ".binned.dat"); + } catch (FileNotFoundException e) { + throw new StingException("Can't create intermediate output annotation data file."); + } + + // Output a header line + output.println("value\ttitv\tnumVariants\t"); + + // Bin SNPs and calculate truth metrics for each bin, right now this is just TiTv + int numTi = 0; + int numTv = 0; + AnnotationDatum lastDatum = null; + for( AnnotationDatum datum : data.get( annotationKey ) ) { + numTi += datum.numTransitions; + numTv += datum.numTransversions; + lastDatum = datum; + if( numTi + numTv >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full + float titv; + if( numTv == 0) { titv = 0.0f; } + else { titv = ((float) numTi) / ((float) numTv); } + output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv)); + numTi = 0; + numTv = 0; + } + // else, continue accumulating variants because this bin isn't full yet + } + if( numTi != 0 || numTv != 0 ) { // one final bin that may not have been dumped out + float titv; + if( numTv == 0) { titv = 0.0f; } + else { titv = ((float) numTi) / ((float) numTv); } + output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)); + } + + /* System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".dat" + " " + annotationKey); + OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey); try { Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " + - OUTPUT_DIR + annotationKey + ".dat" + " " + annotationKey); + OUTPUT_DIR + annotationKey + ".cumulative.dat"+ " " + OUTPUT_DIR + " " + annotationKey); } catch (Exception e) { - throw new StingException("Unable to run RScript command"); + throw new StingException("Unable to execute RScript command"); + } + */ + + //System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + + // OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey + + // " " + MIN_VARIANTS_PER_BIN); + + // Execute the RScript command to plot the table of TiTv values + try { + final Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + + OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey + + " " + MIN_VARIANTS_PER_BIN); + } catch (Exception e) { + throw new StingException("Unable to execute RScript command"); } } }