From e37a638e0903c3644be8b6923b7be6dbb6afdd1c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 26 Aug 2011 13:24:06 -0400 Subject: [PATCH 1/8] Fix for disallowed characters in GATKReportTable -- Illegal characters are automatically replaced with _ --- .../sting/gatk/report/GATKReportTable.java | 4 +++- .../sting/queue/util/QJobReport.scala | 13 +++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 152e1a57b..3e3aa29a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -92,6 +92,8 @@ import java.util.regex.Pattern; * @author Khalid Shakir */ public class GATKReportTable { + /** REGEX that matches any table with an invalid name */ + public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]"; private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2; private String tableName; private String tableDescription; @@ -111,7 +113,7 @@ public class GATKReportTable { * @return true if the name is valid, false if otherwise */ private boolean isValidName(String name) { - Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]"); + Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); Matcher m = p.matcher(name); return !m.find(); diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index a1fa10cd8..04ae54c21 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -51,7 +51,7 @@ trait QJobReport extends Logging { logger.info("info " + info) reportFeatures = Map( "iteration" -> 1, - "analysisName" -> self.analysisName, + "analysisName" -> getReportGroup, "jobName" -> QJobReport.workAroundSameJobNames(this), "intermediate" -> self.isIntermediate, "startTime" -> info.getStartTime.getTime, @@ -59,15 +59,12 @@ trait QJobReport extends Logging { "formattedStartTime" -> info.getFormattedStartTime, "formattedDoneTime" -> info.getFormattedDoneTime, "runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null") ++ reportFeatures - -// // handle the special case of iterations -// reportFeatures.get("iteration") match { -// case None => reportFeatures("iteration") = 1 -// case _ => ; -// } + // note -- by adding reportFeatures second we override iteration + // (or any other binding) with the user provided value } - def getReportGroup = analysisName + /** The report Group is the analysis name transform to only contain valid GATKReportTable characters */ + def getReportGroup = self.analysisName.replaceAll(GATKReportTable.INVALID_TABLE_NAME_REGEX, "_") def getReportFeatures = reportFeatures def getReportFeatureNames: List[String] = getReportFeatures.keys.toList From ccfed5d64d49d0c8591e243c6c488cd1dc54c4c0 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Sat, 27 Aug 2011 02:32:47 -0400 Subject: [PATCH 4/8] Enable Contracts for Java by default for test targets. Contracts remain disabled for non-test build targets. To enable for non-test targets, run with -Duse.contracts=true. To disable for test targets, run with -Duse.contracts=false. --- build.xml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/build.xml b/build.xml index ef53f6aa4..8fcc78e6a 100644 --- a/build.xml +++ b/build.xml @@ -48,8 +48,9 @@ - - + + + @@ -176,6 +177,10 @@ + + + + @@ -744,8 +749,7 @@ - - + From 50908fe285580430420d8375ecf1418977728ac6 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Sat, 27 Aug 2011 03:17:46 -0400 Subject: [PATCH 5/8] Enable conditional inclusion of documentation for hidden features in GATKDocs. To include documentation for hidden features in the generated GATKDocs, run with -Dgatkdocs.include.hidden=true I will enable this flag when bamboo generates GATKDocs for unstable. --- build.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 8fcc78e6a..275cb5555 100644 --- a/build.xml +++ b/build.xml @@ -490,11 +490,16 @@ + + + + + + additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> From 1ceb020fae2b23e279f423c7eb9e6505a47c6780 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 27 Aug 2011 10:50:05 -0400 Subject: [PATCH 7/8] UnitTests for RScript --- .../sting/utils/R/RScriptExecutor.java | 9 +- .../utils/R/RScriptExecutorUnitTest.java | 106 ++++++++++++++++++ 2 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index 868ea89b5..d5e9dd9b3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.utils.PathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.IOException; @@ -53,11 +54,11 @@ public class RScriptExecutor { public static class RScriptArgumentCollection { @Advanced @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) - private String PATH_TO_RSCRIPT = "Rscript"; + public String PATH_TO_RSCRIPT = "Rscript"; @Advanced @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) - private List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); + public List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); } final RScriptArgumentCollection myArgs; @@ -98,7 +99,7 @@ public class RScriptExecutor { } } - generateException("Couldn't find script: " + scriptName + " in " + myArgs.PATH_TO_RSCRIPT); + generateException("Couldn't find script: " + scriptName + " in " + myArgs.PATH_TO_RESOURCES); return null; } @@ -112,7 +113,7 @@ public class RScriptExecutor { private void generateException(String msg, Throwable e) { if ( exceptOnError ) - throw new RuntimeException(msg, e); + throw new UserException(msg, e); else logger.warn(msg + (e == null ? "" : ":" + e.getMessage())); } diff --git a/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java new file mode 100644 index 000000000..1bbf74db9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/R/RScriptExecutorUnitTest.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffElement; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.gatk.walkers.diffengine.DiffNode; +import org.broadinstitute.sting.gatk.walkers.diffengine.Difference; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Basic unit test for RScriptExecutor in reduced reads + */ +public class RScriptExecutorUnitTest extends BaseTest { + final static String testrscript = "print(\"hello, world\")\n"; + final static String publicRScript = "plot_Tranches.R"; + final static String privateRScript = "variantCallQC.R"; + + // -------------------------------------------------------------------------------- + // + // Difference testing routines + // + // -------------------------------------------------------------------------------- + + private void testOne(String script, String pathToRscript, String anotherSearchPath, boolean exceptOnError) { + RScriptExecutor.RScriptArgumentCollection collection = + new RScriptExecutor.RScriptArgumentCollection(); + if ( pathToRscript != null ) + collection.PATH_TO_RSCRIPT = pathToRscript; + if ( anotherSearchPath != null ) { + List x = new ArrayList(collection.PATH_TO_RESOURCES); + x.add(anotherSearchPath); + collection.PATH_TO_RESOURCES = x; + } + RScriptExecutor executor = new RScriptExecutor(collection, exceptOnError); + executor.callRScripts(script); + } + + @Test + public void testPublic() { testOne(publicRScript, null, null, true); } + + @Test + public void testPrivate() { testOne(privateRScript, null, null, true); } + + // make sure we don't break finding something in private by adding another directory + @Test + public void testPrivateWithAdditionalPath1() { testOne(privateRScript, null, "dist", true); } + + // make sure we don't break finding something in private by adding another directory + @Test + public void testPrivateWithAdditionalPath2() { testOne(privateRScript, null, "doesNotExist", true); } + + @Test(expectedExceptions = UserException.class) + public void testNonExistantScriptException() { testOne("does_not_exist.R", null, null, true); } + + @Test() + public void testNonExistantScriptNoException() { testOne("does_not_exist.R", null, null, false); } + + @Test(expectedExceptions = UserException.class) + public void testNonExistantRScriptException() { testOne(publicRScript, "badRScriptValue", null, true); } + + @Test() + public void testNonExistantRScriptNoException() { testOne(publicRScript, "badRScriptValue", null, false); } + + @Test() + public void testScriptInNewPath() throws IOException { + File t = createTempFile("myTestScript", ".R"); + FileUtils.writeStringToFile(t, testrscript); + testOne(t.getName(), null, t.getParent(), true); + } +} \ No newline at end of file From ccec0b4d734d5bf2da3b449aa1cbd55e77944d10 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 27 Aug 2011 12:54:13 -0400 Subject: [PATCH 8/8] AnalyzeCovariates uses the general RScript system now -- Convenience constructor for collection for testing -- callRScript() now accepts Objects not Strings, for convenience --- .../analyzecovariates/AnalyzeCovariates.java | 55 +++++++------------ .../sting/utils/R/RScriptExecutor.java | 14 ++++- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 2ff8aa979..7ea515591 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Map; import java.util.regex.Pattern; @@ -91,12 +93,12 @@ import java.util.regex.Pattern; * -resources resources/ \ * -ignoreQ 5 * - * + * */ @DocumentedGATKFeature( - groupName = "AnalyzeCovariates", - summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") + groupName = "AnalyzeCovariates", + summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") public class AnalyzeCovariates extends CommandLineProgram { ///////////////////////////// @@ -118,7 +120,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String PATH_TO_RESOURCES = "public/R/"; @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) private int IGNORE_QSCORES_LESS_THAN = 5; - @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) + @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups /** @@ -323,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram { } private void callRScripts() { + RScriptExecutor.RScriptArgumentCollection argumentCollection = + new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES)); + RScriptExecutor executor = new RScriptExecutor(argumentCollection, true); int numReadGroups = 0; - + // for each read group for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { - - Process p; if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { String readGroup = readGroupKey.toString(); @@ -338,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram { // for each covariate for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { Covariate cov = requestedCovariates.get(iii); - try { - - if (DO_INDEL_QUALITY) { - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - - } else { + final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"; + if (DO_INDEL_QUALITY) { + executor.callRScripts("plot_indelQuality.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice + } else { if( iii == 1 ) { - // Analyze reported quality - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored - p.waitFor(); - } else { // Analyze all other covariates - p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " + - OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + - cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice - p.waitFor(); - } + // Analyze reported quality + executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename, + IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored + } else { // Analyze all other covariates + executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename, + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice } - } catch (InterruptedException e) { - e.printStackTrace(); - System.exit(-1); - } catch (IOException e) { - System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option."); - e.printStackTrace(); - System.exit(-1); } } } else { // at the maximum number of read groups so break out diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index d5e9dd9b3..c0493fe22 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -57,8 +57,16 @@ public class RScriptExecutor { public String PATH_TO_RSCRIPT = "Rscript"; @Advanced - @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + @Argument(fullName = "path_to_Rresources", shortName = "Rresources", doc = "Path to resources folder holding the Sting R scripts.", required = false) public List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); + + public RScriptArgumentCollection() {} + + /** For testing and convenience */ + public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) { + this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT; + this.PATH_TO_RESOURCES = PATH_TO_RESOURCES; + } } final RScriptArgumentCollection myArgs; @@ -69,11 +77,11 @@ public class RScriptExecutor { this.exceptOnError = exceptOnError; } - public void callRScripts(String scriptName, String... scriptArgs) { + public void callRScripts(String scriptName, Object... scriptArgs) { callRScripts(scriptName, Arrays.asList(scriptArgs)); } - public void callRScripts(String scriptName, List scriptArgs) { + public void callRScripts(String scriptName, List scriptArgs) { try { final File pathToScript = findScript(scriptName); if ( pathToScript == null ) return; // we failed but shouldn't exception out