diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 7a27ef31d..4863fe5a8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -8,8 +8,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.cmdLine.Argument; -import java.io.IOException; - /* * Copyright (c) 2010 The Broad Institute * diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 5d467669e..74a88d3c2 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; import java.util.*; +import java.io.IOException; import java.io.PrintStream; import java.io.FileNotFoundException; @@ -47,7 +48,7 @@ public class AnnotationDataManager { data = new HashMap>(); } - public void addAnnotations( RodVCF variant, String sampleName ) { + public void addAnnotations( final RodVCF variant, final String sampleName ) { if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName if( variant.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out @@ -60,14 +61,10 @@ public class AnnotationDataManager { for( String annotationKey : infoField.keySet() ) { float value = 0.0f; - boolean skipThisAnnotation = false; try { value = Float.parseFloat( infoField.get( annotationKey ) ); } catch( NumberFormatException e ) { - skipThisAnnotation = true; // Skip over annotations that aren't floats, like older versions of "AC" and "DB" - } - if( skipThisAnnotation ) { - continue; // Skip over annotations that aren't floats, like older versions of "AC" and "DB" + continue; // Skip over annotations that aren't floats, like "DB" } TreeSet treeSet = data.get( annotationKey ); @@ -76,17 +73,17 @@ public class AnnotationDataManager { data.put( annotationKey, treeSet ); } AnnotationDatum datum = new AnnotationDatum( value ); - if( treeSet.contains(datum) ) { + if( treeSet.contains(datum) ) { // contains uses AnnotationDatum's equals function, so it only checks if the value field is already present datum = treeSet.tailSet(datum).first(); } else { treeSet.add(datum); } final boolean isNovelVariant = variant.getID().equals("."); - final boolean isTrueVariant = false; + final boolean isTrueVariant = false; //BUGBUG: Check truth input file to see if this variant is in the truth set - // Decide if it was a Ti or a Tv - if( BaseUtils.isTransition(variant.getReferenceForSNP(), variant.getAlternativeBaseForSNP()) ) { + // Decide if the variant is a transition or transversion + if( BaseUtils.isTransition( variant.getReferenceForSNP(), variant.getAlternativeBaseForSNP()) ) { datum.incrementTi( isNovelVariant, isTrueVariant ); } else { datum.incrementTv( isNovelVariant, isTrueVariant ); @@ -97,6 +94,7 @@ public class AnnotationDataManager { public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_PREFIX, final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) { + final AnnotationDatum thisAnnotationBin = new AnnotationDatum(); System.out.println( "\nExecuting RScript commands:" ); // For each annotation we've seen @@ -113,8 +111,8 @@ public class AnnotationDataManager { // Output a header line output.println("value\ttitv\tnumVariants\tcategory"); - // Bin SNPs and calculate truth metrics for each bin, right now this is just TiTv - AnnotationDatum thisAnnotationBin = new AnnotationDatum(); + // Bin SNPs and calculate truth metrics for each bin + thisAnnotationBin.clearBin(); for( AnnotationDatum datum : data.get( annotationKey ) ) { thisAnnotationBin.combine( datum ); if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full @@ -126,24 +124,26 @@ public class AnnotationDataManager { // else, continue accumulating variants because this bin isn't full yet } - if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) != 0 ) { // one final bin that may not have been dumped out + if( thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) != 0 ) { // One final bin that may not have been dumped out output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.FULL_SET ) + "\t" + thisAnnotationBin.numVariants( AnnotationDatum.FULL_SET ) + "\tall"); output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.NOVEL_SET ) + "\t" + thisAnnotationBin.numVariants( AnnotationDatum.NOVEL_SET ) + "\tnovel"); output.println( thisAnnotationBin.value + "\t" + thisAnnotationBin.calcTiTv( AnnotationDatum.DBSNP_SET ) + "\t" + thisAnnotationBin.numVariants( AnnotationDatum.DBSNP_SET ) + "\tdbsnp"); } + // Close the PrintStream output.close(); - System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN); + // Print out the command line to make it clear to the user what is being executed and how one might modify it + final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + + OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN; + System.out.println( rScriptCommandLine ); // Execute the RScript command to plot the table of TiTv values try { - final Process p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " + - OUTPUT_PREFIX + annotationKey + ".dat" + " " + annotationKey + " " + MIN_VARIANTS_PER_BIN); - } catch ( Exception e ) { - throw new StingException( "Unable to execute RScript command" ); + Runtime.getRuntime().exec( rScriptCommandLine ); + } catch ( IOException e ) { + throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine ); } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java index 8dfdde197..33b0dd096 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java @@ -98,7 +98,7 @@ public class AnnotationDatum implements Comparator { this.ti[iii] += that.ti[iii]; this.tv[iii] += that.tv[iii]; } - this.value = that.value; // overwrite this bin's value + this.value = that.value; // Overwrite this bin's value } final public float calcTiTv( final int INDEX ) {