From 5faf40b79deea87ceb0e682bf6e27721cc51f621 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sat, 2 Jul 2011 10:39:53 -0400 Subject: [PATCH] Moving AnalyzeAnnotations into the archive because it has outlived its usefulness. --- .../R/plot_Annotations_BinnedTruthMetrics.R | 190 ------------------ .../AnalyzeAnnotationsWalker.java | 154 -------------- .../AnnotationDataManager.java | 169 ---------------- .../analyzeannotations/AnnotationDatum.java | 170 ---------------- public/packages/GenomeAnalysisTK.xml | 1 - 5 files changed, 684 deletions(-) delete mode 100644 public/R/plot_Annotations_BinnedTruthMetrics.R delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnalyzeAnnotationsWalker.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDataManager.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDatum.java diff --git a/public/R/plot_Annotations_BinnedTruthMetrics.R b/public/R/plot_Annotations_BinnedTruthMetrics.R deleted file mode 100644 index 9f9ee290c..000000000 --- a/public/R/plot_Annotations_BinnedTruthMetrics.R +++ /dev/null @@ -1,190 +0,0 @@ -#!/bin/env Rscript - -args <- commandArgs(TRUE) -verbose = TRUE - -input = args[1] -annotationName = args[2] -minBinCutoff = as.numeric(args[3]) -medianNumVariants = args[4] - -c <- read.table(input, header=T) - -all = c[c$numVariants>minBinCutoff & c$category=="all",] -novel = c[c$numVariants>minBinCutoff & c$category=="novel",] -dbsnp = c[c$numVariants>minBinCutoff & c$category=="dbsnp",] -truth = c[c$numVariants>minBinCutoff & c$category=="truth",] - -# -# Calculate min, max, medians -# - -d = c[c$numVariants>minBinCutoff,] -ymin = min(d$titv) -ymax = max(d$titv) -xmin = min(d$value) -xmax = max(d$value) -m = weighted.mean(all$value,all$numVariants/sum(all$numVariants)) -ma = all[all$value > m,] -mb = all[all$value < m,] -m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants)) -m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants)) -if(medianNumVariants == "true") { -vc = cumsum( all$numVariants/sum(all$numVariants) ) -m10 = all$value[ max(which(vc<=0.10)) ] -m25 = all$value[ max(which(vc<=0.25)) ] -m = all$value[ max(which(vc<=0.5)) ] -m75 = all$value[ min(which(vc>=0.75)) ] -m90 = all$value[ min(which(vc>=0.90)) ] -} - -# -# Plot TiTv ratio as a function of the annotation -# - -outfile = paste(input, ".TiTv.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -abline(v=m,lty=2,col="red") -abline(v=m75,lty=3) -abline(v=m25,lty=3) -text(m, ymin, "50", col="red", cex=0.6); -text(m75, ymin, "75", col="black", cex=0.6); -text(m25, ymin, "25", col="black", cex=0.6); -if(medianNumVariants == "true") { -abline(v=m90,lty=3) -abline(v=m10,lty=3) -text(m10, ymin, "10", col="black", cex=0.6); -text(m90, ymin, "90", col="black", cex=0.6); -} -points(novel$value,novel$titv,col="green",pch=20) -points(dbsnp$value,dbsnp$titv,col="blue",pch=20) -if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(truth$value,truth$titv,col="magenta",pch=20) -legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20)) -} else { -legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) -} -dev.off() - -# -# Plot TiTv ratio as a function of the annotation, log scale on the x-axis -# - -outfile = paste(input, ".TiTv_log.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv Ratio",pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -abline(v=m,lty=2,col="red") -abline(v=m75,lty=3) -abline(v=m25,lty=3) -text(m, ymin, "50", col="red", cex=0.6); -text(m75, ymin, "75", col="black", cex=0.6); -text(m25, ymin, "25", col="black", cex=0.6); -if(medianNumVariants == "true") { -abline(v=m90,lty=3) -abline(v=m10,lty=3) -text(m10, ymin, "10", col="black", cex=0.6); -text(m90, ymin, "90", col="black", cex=0.6); -} -points(novel$value,novel$titv,col="green",pch=20) -points(dbsnp$value,dbsnp$titv,col="blue",pch=20) -if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(truth$value,truth$titv,col="magenta",pch=20) -legend("topleft", c("all","novel","dbsnp","truth"),col=c("black","green","blue","magenta"),pch=c(20,20,20,20)) -} else { -legend("topleft", c("all","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20)) -} -dev.off() - -# -# Plot dbsnp and true positive rate as a function of the annotation -# - -ymin = min(all$dbsnp) -ymax = max(all$dbsnp) -outfile = paste(input, ".truthRate.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -yLabel = "DBsnp Rate" -if( sum(all$truePositive==0) != length(all$truePositive) ) { -t = all[all$truePositive>0,] -yLabel = "DBsnp/True Positive Rate" -ymin = min(min(all$dbsnp),min(t$truePositive)) -ymax = max(max(all$dbsnp),max(t$truePositive)) -} -plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -abline(v=m,lty=2,col="red") -abline(v=m75,lty=3) -abline(v=m25,lty=3) -text(m, ymin, "50", col="red", cex=0.6); -text(m75, ymin, "75", col="black", cex=0.6); -text(m25, ymin, "25", col="black", cex=0.6); -if(medianNumVariants == "true") { -abline(v=m90,lty=3) -abline(v=m10,lty=3) -text(m10, ymin, "10", col="black", cex=0.6); -text(m90, ymin, "90", col="black", cex=0.6); -} -if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(t$value,t$truePositive,col="magenta",pch=20); -legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20)) -} -dev.off() - -# -# Plot dbsnp and true positive rate as a function of the annotation, log scale on the x-axis -# - -outfile = paste(input, ".truthRate_log.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -yLabel = "DBsnp Rate" -if( sum(all$truePositive==0) != length(all$truePositive) ) { -yLabel = "DBsnp/Truth Rate" -} -plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,ylim=c(ymin,ymax),pch=20,xaxt="n",ps=14); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -abline(v=m,lty=2,col="red") -abline(v=m75,lty=3) -abline(v=m25,lty=3) -text(m, ymin, "50", col="red", cex=0.6); -text(m75, ymin, "75", col="black", cex=0.6); -text(m25, ymin, "25", col="black", cex=0.6); -if(medianNumVariants == "true") { -abline(v=m90,lty=3) -abline(v=m10,lty=3) -text(m10, ymin, "10", col="black", cex=0.6); -text(m90, ymin, "90", col="black", cex=0.6); -} -if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(t$value,t$truePositive,col="magenta",pch=20); -legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20)) -} -dev.off() - -# -# Plot histogram of the annotation's value -# - -outfile = paste(input, ".Histogram.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(all$value,all$numVariants,xlab=annotationName,ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -dev.off() - -# -# Plot histogram of the annotation's value, log scale on x-axis -# - -outfile = paste(input, ".Histogram_log.pdf", sep="") -pdf(outfile, height=7, width=7) -par(cex=1.1) -plot(all$value,all$numVariants,xlab=annotationName,log="x",ylab="Num variants in bin",type="h",xaxt="n",ps=14,lwd=4); -axis(1,axTicks(1), format(axTicks(1), scientific=F)) -dev.off() diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnalyzeAnnotationsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnalyzeAnnotationsWalker.java deleted file mode 100755 index 54fd0a20c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnalyzeAnnotationsWalker.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.analyzeannotations; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.commandline.Argument; - -import java.util.HashMap; -import java.util.Collection; - -/** - * Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field. - * - * @author rpoplin - * @since Jan 15, 2010 - * @help.summary Takes variant calls as .vcf files and creates plots of truth metrics as a function of the various annotations found in the INFO field. - */ - -public class AnalyzeAnnotationsWalker extends RodWalker { - - ///////////////////////////// - // Command Line Arguments - ///////////////////////////// - @Argument(fullName = "output_prefix", shortName = "output", doc = "The output path and name to prepend to all plots and intermediate data files", required = false) - private String OUTPUT_PREFIX = "analyzeAnnotations/"; - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) - private String PATH_TO_RSCRIPT = "Rscript"; - @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) - private String PATH_TO_RESOURCES = "R/"; - @Argument(fullName = "min_variants_per_bin", shortName = "minBinSize", doc = "The minimum number of variants in a bin in order to calculate truth metrics.", required = false) - private int MIN_VARIANTS_PER_BIN = 1000; - @Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false) - private int MAX_VARIANTS_PER_BIN = 20000; - @Argument(fullName = "sampleName", shortName = "sampleName", doc = "If supplied, only process variants found in this sample.", required = false) - private String SAMPLE_NAME = null; - @Argument(fullName = "name", shortName = "name", doc = "Labels for the annotations to make plots look nicer. Each name is a separate -name argument. For example, -name DP,Depth -name AB,AlleleBalance", required = false) - private String[] ANNOTATION_NAMES = null; - @Argument(fullName = "indicate_mean_num_vars", shortName = "meanNumVars", doc = "If supplied, plots will indicate the distribution of number of variants instead of distribution of value of annotation", required = false) - private boolean INDICATE_MEAN_NUM_VARS = false; - - ///////////////////////////// - // Private Member Variables - ///////////////////////////// - private AnnotationDataManager dataManager; - - //--------------------------------------------------------------------------------------------------------------- - // - // initialize - // - //--------------------------------------------------------------------------------------------------------------- - - public void initialize() { - - // Create a HashMap associating the names of the annotations to full Strings that can be used as labels on plots - HashMap nameMap = null; - if( ANNOTATION_NAMES != null ) { - nameMap = new HashMap(); - for( String nameLine : ANNOTATION_NAMES ) { - String[] vals = nameLine.split(","); - nameMap.put(vals[0],vals[1]); - } - } - dataManager = new AnnotationDataManager( nameMap, INDICATE_MEAN_NUM_VARS ); - - if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } - } - - //--------------------------------------------------------------------------------------------------------------- - // - // map - // - //--------------------------------------------------------------------------------------------------------------- - - public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - - if( tracker != null ) { - Collection VCs = tracker.getAllVariantContexts(ref); - - // First find out if this variant is in the truth sets - boolean isInTruthSet = false; - boolean isTrueVariant = false; - for ( VariantContext vc : VCs ) { - if( vc != null && vc.isSNP() && !vc.isFiltered() ) { - if( vc.getSource().toUpperCase().startsWith("TRUTH") ) { - isInTruthSet = true; - if( vc.isBiallelic() && vc.isVariant() ) { - isTrueVariant = true; - } - } - } - } - - // Add each annotation in this VCF Record to the dataManager - for ( VariantContext vc : VCs ) { - if( vc != null && vc.isSNP() && vc.isBiallelic() && !vc.isFiltered() ) { - if( !vc.getSource().toUpperCase().startsWith("TRUTH") ) { - if( vc.isVariant() ) { - dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant ); - } - } - } - } - } - - return 1; // This value isn't actually used for anything - } - - //--------------------------------------------------------------------------------------------------------------- - // - // reduce - // - //--------------------------------------------------------------------------------------------------------------- - - public Integer reduceInit() { - return 0; // Nothing to do here - } - - public Integer reduce( Integer value, Integer sum ) { - return 0; // Nothing to do here - } - - public void onTraversalDone( Integer sum ) { - - // For each annotation, decide how to cut up the data, output intermediate cumulative p(true) tables, and call RScript to plot the tables - dataManager.plotCumulativeTables(PATH_TO_RSCRIPT, PATH_TO_RESOURCES, OUTPUT_PREFIX, MIN_VARIANTS_PER_BIN, MAX_VARIANTS_PER_BIN); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDataManager.java deleted file mode 100755 index 2c06b30d6..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDataManager.java +++ /dev/null @@ -1,169 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.analyzeannotations; - -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.util.*; -import java.io.IOException; -import java.io.PrintStream; -import java.io.FileNotFoundException; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 18, 2010 - */ - -public class AnnotationDataManager { - - private final HashMap> data; - private final HashMap nameMap; - private final boolean INDICATE_MEAN_NUM_VARS; - - public AnnotationDataManager( HashMap _nameMap, boolean _INDICATE_MEAN_NUM_VARS ) { - data = new HashMap>(); - nameMap = _nameMap; - INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS; - } - - public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { - - if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName - if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out - return; - } - } // else, process all samples - - // Loop over each annotation in the vcf record - final Map infoField = new HashMap(vc.getAttributes()); - infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations - for( Map.Entry annotation : infoField.entrySet() ) { - - float value; - try { - value = Float.parseFloat( annotation.getValue().toString() ); - } catch( NumberFormatException e ) { - continue; // Skip over annotations that aren't floats, like "DB" - } - - TreeSet treeSet = data.get( annotation.getKey() ); - if( treeSet == null ) { // This annotation hasn't been seen before - treeSet = new TreeSet( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation - data.put( annotation.getKey(), treeSet ); - } - AnnotationDatum datum = new AnnotationDatum( value ); - if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present - datum = treeSet.tailSet(datum).first(); - } else { - treeSet.add(datum); - } - - final boolean isNovelVariant = !vc.hasID() || !vc.getID().contains("rs"); - - // Decide if the variant is a transition or transversion - if( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 ) { - datum.incrementTi( isNovelVariant, isInTruthSet, isTrueVariant ); - } else { - datum.incrementTv( isNovelVariant, isInTruthSet, isTrueVariant ); - } - } - } - - public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX, - final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) { - - final AnnotationDatum thisAnnotationBin = new AnnotationDatum(); - System.out.println( "\nFinished reading variants into memory. Executing RScript commands:" ); - - // For each annotation we've seen - for( final String annotationKey : data.keySet() ) { - - String filename = OUTPUT_PREFIX + annotationKey + ".dat"; - PrintStream output; - try { - output = new PrintStream(filename); // Create the intermediate data file for this annotation - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotCreateOutputFile(new File(filename), "Can't create intermediate output annotation data file. Does the output directory exist?", e); - } - - // Output a header line - output.println("value\ttitv\tdbsnp\ttruePositive\tnumVariants\tcategory"); - - // Bin SNPs and calculate truth metrics for each bin - thisAnnotationBin.clearBin(); - for( final AnnotationDatum datum : data.get( annotationKey ) ) { - thisAnnotationBin.combine( datum ); - if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() + - "\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth"); - thisAnnotationBin.clearBin(); - } - // else, continue accumulating variants because this bin isn't full yet - } - - // One final bin that may not have been dumped out - if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) != 0 ) { - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.calcDBsnpRate() + "\t" + thisAnnotationBin.calcTPrate() + - "\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp"); - output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.TRUTH_SET ) + "\t-1\t-1\t" + thisAnnotationBin.numVariants( AnnotationDatum.TRUTH_SET ) + "\ttruth"); - thisAnnotationBin.clearBin(); - } - - // Close the PrintStream - output.close(); - - String annotationName = null; - if( nameMap != null ) { - annotationName = nameMap.get(annotationKey); - } - if( annotationName == null ) { // This annotation is not in the name map so use the key instead - annotationName = annotationKey; - } - - // Print out the command line to make it clear to the user what is being executed and how one might modify it - final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTruthMetrics.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationName + " " + MIN_VARIANTS_PER_BIN + " " + INDICATE_MEAN_NUM_VARS; - System.out.println( rScriptCommandLine ); - - // Execute the RScript command to plot the table of truth values - try { - Runtime.getRuntime().exec( rScriptCommandLine ); - } catch ( IOException e ) { - throw new UserException.CannotExecuteRScript( rScriptCommandLine, e ); - } - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDatum.java deleted file mode 100755 index 888eb7e6e..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/analyzeannotations/AnnotationDatum.java +++ /dev/null @@ -1,170 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.analyzeannotations; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Comparator; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 18, 2010 - */ - -public class AnnotationDatum implements Comparator { - - public float value; - private final int[] ti; - private final int[] tv; - - public static final int FULL_SET = 0; - public static final int NOVEL_SET = 1; - public static final int DBSNP_SET = 2; - public static final int TRUTH_SET = 3; - public static final int TRUE_POSITIVE = 4; - private static final int NUM_SETS = 5; - - public AnnotationDatum() { - - value = 0.0f; - ti = new int[NUM_SETS]; - tv = new int[NUM_SETS]; - for( int iii = 0; iii < NUM_SETS; iii++ ) { - ti[iii] = 0; - tv[iii] = 0; - } - } - - public AnnotationDatum( final float _value ) { - - value = _value; - ti = new int[NUM_SETS]; - tv = new int[NUM_SETS]; - for( int iii = 0; iii < NUM_SETS; iii++ ) { - ti[iii] = 0; - tv[iii] = 0; - } - } - - final public void incrementTi( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) { - - ti[FULL_SET]++; - if( isNovelVariant ) { - ti[NOVEL_SET]++; - } else { // Is known, in DBsnp - ti[DBSNP_SET]++; - } - if( isInTruthSet ) { - ti[TRUTH_SET]++; - if( isTrueVariant ) { - ti[TRUE_POSITIVE]++; - } - } - } - - final public void incrementTv( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) { - - tv[FULL_SET]++; - if( isNovelVariant ) { - tv[NOVEL_SET]++; - } else { // Is known, in DBsnp - tv[DBSNP_SET]++; - } - if( isInTruthSet ) { - tv[TRUTH_SET]++; - if( isTrueVariant ) { - tv[TRUE_POSITIVE]++; - } - } - } - - final public void combine( final AnnotationDatum that ) { - - for( int iii = 0; iii < NUM_SETS; iii++ ) { - this.ti[iii] += that.ti[iii]; - this.tv[iii] += that.tv[iii]; - } - this.value = that.value; // Overwrite this bin's value - } - - final public float calcTiTv( final int INDEX ) { - - if( ti[INDEX] < 0 || tv[INDEX] < 0 ) { - throw new ReviewedStingException( "Integer overflow detected! There are too many variants piled up in one annotation bin." ); - } - - if( tv[INDEX] == 0 ) { // Don't divide by zero - return 0.0f; - } - - return ((float) ti[INDEX]) / ((float) tv[INDEX]); - } - - final public float calcDBsnpRate() { - - if( ti[FULL_SET] + tv[FULL_SET] == 0 ) { // Don't divide by zero - return 0.0f; - } - - return 100.0f * ((float) ti[DBSNP_SET] + tv[DBSNP_SET]) / - ((float) ti[FULL_SET] + tv[FULL_SET]); - } - - final public float calcTPrate() { - - if( ti[TRUTH_SET] + tv[TRUTH_SET] == 0 ) { // Don't divide by zero - return 0.0f; - } - - return 100.0f * ((float) ti[TRUE_POSITIVE] + tv[TRUE_POSITIVE]) / - ((float) ti[TRUTH_SET] + tv[TRUTH_SET]); - } - - final public int numVariants( final int INDEX ) { - return ti[INDEX] + tv[INDEX]; - } - - final public void clearBin() { - value = 0.0f; - for( int iii = 0; iii < NUM_SETS; iii++ ) { - ti[iii] = 0; - tv[iii] = 0; - } - } - - public int compare( AnnotationDatum a1, AnnotationDatum a2 ) { // Function needed for this to be a Comparator - if( a1.value < a2.value ) { return -1; } - if( a1.value > a2.value ) { return 1; } - return 0; - } - - public int equals( AnnotationDatum that ) { // Function needed for this to be sorted correctly in a TreeSet - if( this.value < that.value ) { return -1; } - if( this.value > that.value ) { return 1; } - return 0; - } -} diff --git a/public/packages/GenomeAnalysisTK.xml b/public/packages/GenomeAnalysisTK.xml index f923ea698..14b837211 100644 --- a/public/packages/GenomeAnalysisTK.xml +++ b/public/packages/GenomeAnalysisTK.xml @@ -19,7 +19,6 @@ -