From ccec0b4d734d5bf2da3b449aa1cbd55e77944d10 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 27 Aug 2011 12:54:13 -0400 Subject: [PATCH] AnalyzeCovariates uses the general RScript system now -- Convenience constructor for collection for testing -- callRScript() now accepts Objects not Strings, for convenience --- .../analyzecovariates/AnalyzeCovariates.java | 55 +++++++------------ .../sting/utils/R/RScriptExecutor.java | 14 ++++- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 2ff8aa979..7ea515591 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Map; import java.util.regex.Pattern; @@ -91,12 +93,12 @@ import java.util.regex.Pattern; * -resources resources/ \ * -ignoreQ 5 * - * + * */ @DocumentedGATKFeature( - groupName = "AnalyzeCovariates", - summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") + groupName = "AnalyzeCovariates", + summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") public class AnalyzeCovariates extends CommandLineProgram { ///////////////////////////// @@ -118,7 +120,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String PATH_TO_RESOURCES = "public/R/"; @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) private int IGNORE_QSCORES_LESS_THAN = 5; - @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) + @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups /** @@ -323,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram { } private void callRScripts() { + RScriptExecutor.RScriptArgumentCollection argumentCollection = + new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES)); + RScriptExecutor executor = new RScriptExecutor(argumentCollection, true); int numReadGroups = 0; - + // for each read group for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - - Process p; if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { String readGroup = readGroupKey.toString(); @@ -338,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram { // for each covariate for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { Covariate cov = requestedCovariates.get(iii); - try { - - if (DO_INDEL_QUALITY) { - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - - } else { + final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"; + if (DO_INDEL_QUALITY) { + executor.callRScripts("plot_indelQuality.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice + } else { if( iii == 1 ) { - // Analyze reported quality - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored - p.waitFor(); - } else { // Analyze all other covariates - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - } + // Analyze reported quality + executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename, + IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored + } else { // Analyze all other covariates + executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice } - } catch (InterruptedException e) { - e.printStackTrace(); - System.exit(-1); - } catch (IOException e) { - System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option."); - e.printStackTrace(); - System.exit(-1); } } } else { // at the maximum number of read groups so break out diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index d5e9dd9b3..c0493fe22 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -57,8 +57,16 @@ public class RScriptExecutor { public String PATH_TO_RSCRIPT = "Rscript"; @Advanced - @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + @Argument(fullName = "path_to_Rresources", shortName = "Rresources", doc = "Path to resources folder holding the Sting R scripts.", required = false) public List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); + + public RScriptArgumentCollection() {} + + /** For testing and convenience */ + public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) { + this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT; + this.PATH_TO_RESOURCES = PATH_TO_RESOURCES; + } } final RScriptArgumentCollection myArgs; @@ -69,11 +77,11 @@ public class RScriptExecutor { this.exceptOnError = exceptOnError; } - public void callRScripts(String scriptName, String... scriptArgs) { + public void callRScripts(String scriptName, Object... scriptArgs) { callRScripts(scriptName, Arrays.asList(scriptArgs)); } - public void callRScripts(String scriptName, List scriptArgs) { + public void callRScripts(String scriptName, List scriptArgs) { try { final File pathToScript = findScript(scriptName); if ( pathToScript == null ) return; // we failed but shouldn't exception out