diff --git a/build.xml b/build.xml index fe4c7a3f4..fc495f7cc 100644 --- a/build.xml +++ b/build.xml @@ -79,6 +79,17 @@ + + + + + + + + + + + @@ -457,6 +468,26 @@ + + + + + + + + + + + + + + + @@ -957,6 +988,12 @@ + + diff --git a/ivy.xml b/ivy.xml index 3f3d1c97f..a394aa6d7 100644 --- a/ivy.xml +++ b/ivy.xml @@ -30,6 +30,9 @@ + + + diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index aba4fc109..d88e7030e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -43,7 +43,7 @@ import java.util.Locale; public abstract class CommandLineProgram { /** The command-line program and the arguments it returned. */ - protected ParsingEngine parser = null; + public ParsingEngine parser = null; /** the default log level */ @Argument(fullName = "logging_level", @@ -144,6 +144,11 @@ public abstract class CommandLineProgram { public static int result = -1; + @SuppressWarnings("unchecked") + public static void start(CommandLineProgram clp, String[] args) throws Exception { + start(clp, args, false); + } + /** * This function is called to start processing the command line, and kick * off the execute message of the program. @@ -153,7 +158,7 @@ public abstract class CommandLineProgram { * @throws Exception when an exception occurs */ @SuppressWarnings("unchecked") - public static void start(CommandLineProgram clp, String[] args) throws Exception { + public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception { try { // setup our log layout @@ -180,8 +185,9 @@ public abstract class CommandLineProgram { // - InvalidArgument in case these arguments are specified by plugins. // - MissingRequiredArgument in case the user requested help. Handle that later, once we've // determined the full complement of arguments. - parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument, - ParsingEngine.ValidationType.InvalidArgument)); + if ( ! dryRun ) + parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument, + ParsingEngine.ValidationType.InvalidArgument)); parser.loadArgumentsIntoObject(clp); // Initialize the logger using the loaded command line. @@ -195,36 +201,40 @@ public abstract class CommandLineProgram { if (isHelpPresent(parser)) printHelpAndExit(clp, parser); - parser.validate(); + if ( ! dryRun ) parser.validate(); } else { parser.parse(args); - if (isHelpPresent(parser)) - printHelpAndExit(clp, parser); + if ( ! dryRun ) { + if (isHelpPresent(parser)) + printHelpAndExit(clp, parser); - parser.validate(); + parser.validate(); + } parser.loadArgumentsIntoObject(clp); // Initialize the logger using the loaded command line. clp.setupLoggerLevel(layout); } - // if they specify a log location, output our data there - if (clp.toFile != null) { - FileAppender appender; - try { - appender = new FileAppender(layout, clp.toFile, false); - logger.addAppender(appender); - } catch (IOException e) { - throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists"); + if ( ! dryRun ) { + // if they specify a log location, output our data there + if (clp.toFile != null) { + FileAppender appender; + try { + appender = new FileAppender(layout, clp.toFile, false); + logger.addAppender(appender); + } catch (IOException e) { + throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists"); + } } + + // regardless of what happens next, generate the header information + HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); + + // call the execute + CommandLineProgram.result = clp.execute(); } - - // regardless of what happens next, generate the header information - HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); - - // call the execute - CommandLineProgram.result = clp.execute(); } catch (ArgumentException e) { clp.parser.printHelp(clp.getApplicationDetails()); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 8423bb2f2..0dc18e6f9 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -45,7 +45,7 @@ public class ParsingEngine { * A list of defined arguments against which command lines are matched. * Package protected for testing access. */ - ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions(); + public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions(); /** * A list of matches from defined arguments to command-line text. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index da2be74bf..2af29ea70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.Attribution; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; /** - * @author aaron - * @version 1.0 - * @date May 8, 2009 - *

- * Class CommandLineGATK - *

+ * The GATK engine itself. Manages map/reduce data access and runs walkers. + * * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here, * the gatk engine should deal with any data related information. */ +@DocumentedGATKFeature( + groupName = "GATK Engine", + summary = "Features and arguments for the GATK engine itself, available to all walkers.", + extraDocs = { ReadFilter.class, UserException.class }) public class CommandLineGATK extends CommandLineExecutable { @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") private String analysisName = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index cf190835e..6aeb42faa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.help.DescriptionTaglet; -import org.broadinstitute.sting.utils.help.DisplayNameTaglet; -import org.broadinstitute.sting.utils.help.SummaryTaglet; +import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager { * @return A suitable display name for the package. */ public String getPackageDisplayName(String packageName) { - // Try to find an override for the display name of this package. - String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME); - String displayName; - if(helpText.containsKey(displayNameKey)) { - displayName = helpText.getString(displayNameKey); - } - else { - // If no override exists... - // ...try to compute the override from the text of the package name, while accounting for - // unpackaged walkers. - displayName = packageName.substring(packageName.lastIndexOf('.')+1); - if(displayName.trim().equals("")) displayName = ""; - } + // ...try to compute the override from the text of the package name, while accounting for + // unpackaged walkers. + String displayName = packageName.substring(packageName.lastIndexOf('.')+1); + if (displayName.trim().equals("")) displayName = ""; return displayName; } @@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager { * @return Package help text, or "" if none exists. */ public String getPackageSummaryText(String packageName) { - String key = String.format("%s.%s",packageName,SummaryTaglet.NAME); + String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME); if(!helpText.containsKey(key)) return ""; return helpText.getString(key); @@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager { * @return Walker summary description, or "" if none exists. */ public String getWalkerSummaryText(Class walkerType) { - String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME); + String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME); if(!helpText.containsKey(walkerSummary)) return ""; return helpText.getString(walkerSummary); @@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager { * @return Walker full description, or "" if none exists. */ public String getWalkerDescriptionText(Class walkerType) { - String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME); + String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME); if(!helpText.containsKey(walkerDescription)) return ""; return helpText.getString(walkerDescription); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 9466fdf75..48fd73e0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -49,7 +49,7 @@ public class LinearMicroScheduler extends MicroScheduler { Accumulator accumulator = Accumulator.create(engine,walker); int counter = 0; - for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) { + for (Shard shard : shardStrategy ) { if ( shard == null ) // we ran out of shards that aren't owned break; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 23e5769f1..e731b9864 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.threading.*; import javax.management.JMException; import javax.management.MBeanServer; import javax.management.ObjectName; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; import java.lang.management.ManagementFactory; import java.util.Collection; @@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { private final MBeanServer mBeanServer; private final ObjectName mBeanName; - protected GenomeLocProcessingTracker processingTracker; - /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the * selected walker. @@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { * @return The best-fit microscheduler. */ public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods, int nThreadsToUse) { - if (engine.getArguments().processingTrackerFile != null) { - if ( walker instanceof ReadWalker ) - throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers")); - } - if (walker instanceof TreeReducible && nThreadsToUse > 1) { if(walker.isReduceByInterval()) throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass()))); @@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { catch (JMException ex) { throw new ReviewedStingException("Unable to register microscheduler with JMX", ex); } - - // - // create the processing tracker - // - if ( engine.getArguments().processingTrackerFile != null ) { - logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably."); - if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) { - engine.getArguments().processingTrackerFile.delete(); - logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile); - } - - PrintStream statusStream = null; - if ( engine.getArguments().processingTrackerStatusFile != null ) { - try { - statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile)); - } catch ( FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e); - } - } - - ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID); - processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ; - logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID); - } else { - // create a NoOp version that doesn't do anything but say "yes" - processingTracker = new NoOpGenomeLocProcessingTracker(); - } } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java index 227637761..bf3ce352a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java @@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; /** * A SamRecordFilter that also depends on the header. */ +@DocumentedGATKFeature( + groupName = "Read filters", + summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" ) public abstract class ReadFilter implements SamRecordFilter { /** * Sets the header for use by this filter. diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 69c0b3e0a..acee1a6a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -154,9 +154,13 @@ public class GATKRunReport { private long nReads; public enum PhoneHomeOption { + /** Disable phone home */ NO_ET, + /** Standard option. Writes to local repository if it can be found, or S3 otherwise */ STANDARD, + /** Force output to STDOUT. For debugging only */ STDOUT, + /** Force output to S3. For debugging only */ AWS_S3 // todo -- remove me -- really just for testing purposes } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java index ff3b6d82f..5f11686a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadFilters.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.picard.filter.SamRecordFilter; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.lang.annotation.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 384742302..9e261a0b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -26,11 +26,14 @@ package org.broadinstitute.sting.gatk.walkers; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GenericDocumentationHandler; import java.util.List; @@ -44,6 +47,10 @@ import java.util.List; @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) +@DocumentedGATKFeature( + groupName = "GATK walkers", + summary = "General tools available for running on the command line as part of the GATK package", + extraDocs = {CommandLineGATK.class}) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); private GenomeAnalysisEngine toolkit; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 89e20dad1..5f8f19892 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -42,6 +42,7 @@ import java.util.*; * Date: 7/4/11 * Time: 12:51 PM * A generic engine for comparing tree-structured objects + * */ public class DiffEngine { final protected static Logger logger = Logger.getLogger(DiffEngine.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index fba6549fb..b679f967a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,45 +35,159 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import java.io.File; import java.io.PrintStream; +import java.util.Arrays; import java.util.List; /** + * A generic engine for comparing tree-structured objects + *

* Compares two record-oriented files, itemizing specific difference between equivalent * records in the two files. Reports both itemized and summarized differences. + *

+ * What are the summarized differences and the DiffObjectsWalker + *

+ * The GATK contains a summarizing difference engine that compares hierarchical data structures to emit: + *

    + *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. + *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. + *
+ * + *

+ * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + * + *

+ * Why? + *

+ * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + * + *

Understanding the output + *

The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named + * nodes. Suppose I have two trees: + *

+ *     Tree1=(A=1 B=(C=2 D=3))
+ *     Tree2=(A=1 B=(C=3 D=3 E=4))
+ *     Tree3=(A=1 B=(C=4 D=3 E=4))
+ * 
+ *

+ * where every node in the tree is named, or is a raw value (here all leaf values are integers). The DiffEngine + * traverses these data structures by name, identifies equivalent nodes by fully qualified names + * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). + * These itemized differences are listed as: + *

+ *     Tree1.B.C=2 != Tree2.B.C=3
+ *     Tree1.B.C=2 != Tree3.B.C=4
+ *     Tree2.B.C=3 != Tree3.B.C=4
+ *     Tree1.B.E=MISSING != Tree2.B.E=4
+ * 
+ *

+ * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though + * is that it computes similarity among the itemized differences and displays the count of differences names + * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs + * only once. So the summary is: + * + *

+ *     *.B.C : 3
+ *     *.B.E : 1
+ * 
+ *

where the * operator indicates that any named field matches. This output is sorted by counts, and provides an + * immediate picture of the commonly occurring differences among the files. + *

+ * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, + * detected by the integrationtest integration (more below). You can see that in the although there are many specific + * instances of these differences between the two files, the summarized differences provide an immediate picture that + * the AC, AF, and AN fields are the major causes of the differences. + *

+ *

+   [testng] path                                                             count
+   [testng] *.*.*.AC                                                         6
+   [testng] *.*.*.AF                                                         6
+   [testng] *.*.*.AN                                                         6
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC  1
+
+ * * @author Mark DePristo - * @version 0.1 + * @since 7/4/11 */ @Requires(value={}) public class DiffObjectsWalker extends RodWalker { + /** + * Writes out a file of the DiffEngine format: + * + * http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine + */ @Output(doc="File to which results should be written",required=true) protected PrintStream out; - @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) - int MAX_OBJECTS_TO_READ = -1; - - @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) - int MAX_DIFFS = 0; - - @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) - int MAX_COUNT1_DIFFS = 0; - - @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) - int minCountForDiff = 1; - - @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) - boolean showItemizedDifferences = false; - + /** + * The master file against which we will compare test. This is one of the two required + * files to do the comparison. Conceptually master is the original file contained the expected + * results, but this doesn't currently have an impact on the calculations, but might in the future. + */ @Argument(fullName="master", shortName="m", doc="Master file: expected results", required=true) File masterFile; + /** + * The test file against which we will compare to the master. This is one of the two required + * files to do the comparison. Conceptually test is the derived file from master, but this + * doesn't currently have an impact on the calculations, but might in the future. + */ @Argument(fullName="test", shortName="t", doc="Test file: new results to compare to the master file", required=true) File testFile; - final DiffEngine diffEngine = new DiffEngine(); + /** + * The engine will read at most this number of objects from each of master and test files. This reduces + * the memory requirements for DiffObjects but does limit you to comparing at most this number of objects + */ + @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + int MAX_OBJECTS_TO_READ = -1; + + /** + * The max number of differences to display when summarizing. For example, if there are 10M differences, but + * maxDiffs is 10, then the comparison aborts after first ten summarized differences are shown. Note that + * the system shows differences sorted by frequency, so these 10 would be the most common between the two files. + * A value of 0 means show all possible differences. + */ + @Argument(fullName="maxDiffs", shortName="M", doc="Max. number of diffs to process", required=false) + int MAX_DIFFS = 0; + + /** + * The maximum number of singleton (occurs exactly once between the two files) to display when writing out + * the summary. Only applies if maxDiffs hasn't been exceeded. For example, if maxDiffs is 10 and maxCount1Diffs + * is 2 and there are 20 diffs with count > 1, then only 10 are shown, all of which have count above 1. + */ + @Argument(fullName="maxCount1Diffs", shortName="M1", doc="Max. number of diffs occuring exactly once in the file to process", required=false) + int MAX_COUNT1_DIFFS = 0; + + /** + * Only differences that occur more than minCountForDiff are displayed. For example, if minCountForDiff is 10, then + * a difference must occur at least 10 times between the two files to be shown. + */ + @Argument(fullName="minCountForDiff", shortName="MCFD", doc="Min number of observations for a records to display", required=false) + int minCountForDiff = 1; + + /** + * If provided, the system will write out the summarized, individual differences. May lead to enormous outputs, + * depending on how many differences are found. Note these are not sorted in any way, so if you have 10M + * common differences in the files, you will see 10M records, whereas the final summarize will just list the + * difference and its count of 10M. + */ + @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) + boolean showItemizedDifferences = false; + + DiffEngine diffEngine; @Override public void initialize() { - + this.diffEngine = new DiffEngine(); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index df89efe6d..26fa9a258 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -40,7 +40,6 @@ import java.util.List; * of paired reads. * * @author mhanna - * @version 0.1 */ public class CountPairsWalker extends ReadPairWalker { @Output diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 8c6539f8d..775cde1f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -58,6 +58,8 @@ import java.util.List; import java.util.Map; /** + * First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). + * * This walker is designed to work as the first pass in a two-pass processing step. * It does a by-locus traversal operating only at sites that are not in dbSNP. * We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. @@ -72,7 +74,6 @@ import java.util.Map; * * @author rpoplin * @since Nov 3, 2009 - * @help.summary First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). */ @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0277fda0d..fec7ee4e6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -54,8 +54,10 @@ import java.util.ResourceBundle; import java.util.regex.Pattern; /** + * Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as measured by reference mismatch rate. + * * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. - + * * For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc) * Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read. * This walker then outputs a new bam file with these updated (recalibrated) reads. @@ -65,7 +67,6 @@ import java.util.regex.Pattern; * * @author rpoplin * @since Nov 3, 2009 - * @help.summary Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as measured by reference mismatch rate. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 3e8a6ed17..e1f2ae983 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -8,6 +8,18 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; +/** + * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site + * + *

+ * It is a three-state stratification: + *

    + *
  • The locus is a CpG site ("CpG") + *
  • The locus is not a CpG site ("non_CpG") + *
  • The locus is either a CpG or not a CpG site ("all") + *
+ * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. + */ public class CpG extends VariantStratifier { private ArrayList states; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 403c67d3e..b195fd35f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -49,8 +49,6 @@ import java.util.*; * * @author rpoplin * @since Mar 14, 2011 - * - * @help.summary Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration */ public class ApplyRecalibration extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 8179463eb..76c888640 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -53,8 +53,6 @@ import java.util.*; * * User: rpoplin * Date: 3/12/11 - * - * @help.summary Takes variant calls as .vcf files, learns a Gaussian mixture model over the variant annotations and evaluates the variant -- assigning an informative lod score */ public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { diff --git a/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java b/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java new file mode 100644 index 000000000..619beddb8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/ContigComparator.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.utils; + +import java.util.Comparator; +import java.util.Set; +import java.util.TreeSet; + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 7/23/11 + * Time: 6:07 PM + * + * Contig comparator -- sorting contigs like Picard + * + * This is very useful if you want to output your text files or manipulate data in the usual chromosome ordering : + * 1 + * 2 + * 3 + * ... + * 21 + * 22 + * X + * Y + * GL*** + * ... + * Just use this comparator in any SortedSet class constructor and your data will be sorted like in the BAM file. + */ +public class ContigComparator implements Comparator { + private Set specialChrs; + + public ContigComparator() { + specialChrs = new TreeSet(); + specialChrs.add("X"); + specialChrs.add("Y"); + } + + public int compare(String chr1, String chr2) { + if (chr1.equals(chr2)) + return 0; + + Integer x = convertStringWithoutException(chr1); + Integer y = convertStringWithoutException(chr2); + // both contigs are numbered + if (x != null && y != null) + return (x < y) ? -1:1; + + // both contigs are named + if (x == null && y == null) { + // both contigs are special contigs or neither contig is a special contigs + if (specialChrs.contains(chr1) && specialChrs.contains(chr2) || (!specialChrs.contains(chr1) && !specialChrs.contains(chr2))) + return chr1.compareTo(chr2); + // one contig is a special and the other is not special + if (specialChrs.contains(chr1)) + return -1; + return 1; + } + + // one contig is named the other is numbered + if (x != null) + return -1; + return 1; + } + + private Integer convertStringWithoutException(String contig) { + Integer x = null; + try { + x = Integer.decode(contig); + } catch (NumberFormatException n){} + return x; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 7eab6f6c9..3c3299ff5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -29,6 +29,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; @@ -43,6 +44,9 @@ import java.util.Arrays; * Date: Sep 3, 2010 * Time: 2:24:09 PM */ +@DocumentedGATKFeature( + groupName = "User exceptions", + summary = "Exceptions caused by incorrect user behavior, such as bad files, bad arguments, etc." ) public class UserException extends ReviewedStingException { public UserException(String msg) { super(msg); } public UserException(String msg, Throwable e) { super(msg, e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java deleted file mode 100644 index 65c332048..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/DescriptionTaglet.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; - -import java.util.Map; - -/** - * Provide an alternate description for the given help system. - * - * @author mhanna - * @version 0.1 - */ -public class DescriptionTaglet extends HelpTaglet { - /** - * The key tag for this taglet. - */ - public static final String NAME = "help.description"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return false since overviews are always named - * by the @WalkerName tag. - * @return false always - */ - @Override - public boolean inOverview() { - return true; - } - - /** - * Will return true to indicate that packages can be given useful - * description. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - DescriptionTaglet tag = new DescriptionTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java deleted file mode 100644 index 6c6dad736..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/DisplayNameTaglet.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; - -import java.util.Map; - -/** - * Provide a display name in the help for packages - * - * @author mhanna - * @version 0.1 - */ -public class DisplayNameTaglet extends HelpTaglet { - /** - * The display name for this taglet. - */ - public static final String NAME = "help.display.name"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return true to indicate that packages can be given useful - * display text. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - DisplayNameTaglet tag = new DisplayNameTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java new file mode 100644 index 000000000..710503ca8 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import java.lang.annotation.*; + +/** + * An annotation to identify a class as a GATK capability for documentation + * + * @author depristo + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface DocumentedGATKFeature { + public boolean enable() default true; + public String groupName(); + public String summary() default ""; + public Class handler() default GenericDocumentationHandler.class; + public Class[] extraDocs() default {}; +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java new file mode 100644 index 000000000..366df0c3a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.RootDoc; + +import java.io.*; +import java.util.Set; + +/** + * + */ +public abstract class DocumentedGATKFeatureHandler { + private GATKDoclet doclet; + + protected RootDoc getRootDoc() { + return this.doclet.rootDoc; + } + + public void setDoclet(GATKDoclet doclet) { + this.doclet = doclet; + } + + public GATKDoclet getDoclet() { + return doclet; + } + + public boolean shouldBeProcessed(ClassDoc doc) { return true; } + + public String getDestinationFilename(ClassDoc doc) { + return HelpUtils.getClassName(doc).replace(".", "_") + ".html"; + } + + public abstract String getTemplateName(ClassDoc doc) throws IOException; + public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all); +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java new file mode 100644 index 000000000..65c6624d5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; + +import java.util.HashMap; +import java.util.Map; + +/** +* Created by IntelliJ IDEA. +* User: depristo +* Date: 7/24/11 +* Time: 7:59 PM +* To change this template use File | Settings | File Templates. +*/ +public class GATKDocWorkUnit implements Comparable { + // known at the start + final String name, filename, group; + final DocumentedGATKFeatureHandler handler; + final ClassDoc classDoc; + final Class clazz; + final DocumentedGATKFeature annotation; + final String buildTimestamp, absoluteVersion; + + // set by the handler + String summary; + Map forTemplate; + + public GATKDocWorkUnit(String name, String filename, String group, + DocumentedGATKFeature annotation, DocumentedGATKFeatureHandler handler, + ClassDoc classDoc, Class clazz, + String buildTimestamp, String absoluteVersion) { + this.annotation = annotation; + this.name = name; + this.filename = filename; + this.group = group; + this.handler = handler; + this.classDoc = classDoc; + this.clazz = clazz; + this.buildTimestamp = buildTimestamp; + this.absoluteVersion = absoluteVersion; + } + + public void setHandlerContent(String summary, Map forTemplate) { + this.summary = summary; + this.forTemplate = forTemplate; + } + + public Map toMap() { + Map data = new HashMap(); + data.put("name", name); + data.put("summary", summary); + data.put("filename", filename); + data.put("group", group); + return data; + } + + public int compareTo(GATKDocWorkUnit other) { + return this.name.compareTo(other.name); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java new file mode 100644 index 000000000..0b4c69e3c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.RootDoc; +import freemarker.template.Configuration; +import freemarker.template.DefaultObjectWrapper; +import freemarker.template.Template; +import freemarker.template.TemplateException; +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.util.*; + +/** + * + */ +public class GATKDoclet { + final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); + final protected static File DESTINATION_DIR = new File("gatkdocs"); + final protected static Logger logger = Logger.getLogger(GATKDoclet.class); + protected static String buildTimestamp = null, absoluteVersion = null; + protected static boolean showHiddenFeatures = false; + + RootDoc rootDoc; + + /** + * Extracts the contents of certain types of javadoc and adds them to an XML file. + * @param rootDoc The documentation root. + * @return Whether the JavaDoc run succeeded. + * @throws java.io.IOException if output can't be written. + */ + public static boolean start(RootDoc rootDoc) throws IOException { + logger.setLevel(Level.INFO); + // load arguments + for(String[] options: rootDoc.options()) { + if(options[0].equals("-build-timestamp")) + buildTimestamp = options[1]; + if (options[0].equals("-absolute-version")) + absoluteVersion = options[1]; + if (options[0].equals("-include-hidden")) + showHiddenFeatures = true; + } + + GATKDoclet doclet = new GATKDoclet(); + doclet.processDocs(rootDoc); + return true; + } + + /** + * Validate the given options against options supported by this doclet. + * @param option Option to validate. + * @return Number of potential parameters; 0 if not supported. + */ + public static int optionLength(String option) { + if(option.equals("-build-timestamp") || option.equals("-absolute-version") || option.equals("-include-hidden")) { + return 2; + } + return 0; + } + + public boolean showHiddenFeatures() { + return showHiddenFeatures; + } + + public Set workUnits() { + TreeSet m = new TreeSet(); + + for ( ClassDoc doc : rootDoc.classes() ) { + logger.debug("Considering " + doc); + Class clazz = getClassForClassDoc(doc); + DocumentedGATKFeature feature = getFeatureForClassDoc(doc); + DocumentedGATKFeatureHandler handler = createHandler(doc, feature); + if ( handler != null && handler.shouldBeProcessed(doc) ) { + logger.info("Going to generate documentation for class " + doc); + String filename = handler.getDestinationFilename(doc); + GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), + filename, feature.groupName(), + feature, handler, doc, clazz, + buildTimestamp, absoluteVersion); + m.add(unit); + } + } + + return m; + } + + protected void processDocs(RootDoc rootDoc) { + // setup the global access to the root + this.rootDoc = rootDoc; + + try { + // basic setup + DESTINATION_DIR.mkdirs(); + FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css")); + + /* ------------------------------------------------------------------- */ + /* You should do this ONLY ONCE in the whole application life-cycle: */ + + Configuration cfg = new Configuration(); + // Specify the data source where the template files come from. + cfg.setDirectoryForTemplateLoading(SETTINGS_DIR); + // Specify how templates will see the data-model. This is an advanced topic... + cfg.setObjectWrapper(new DefaultObjectWrapper()); + + Set myWorkUnits = workUnits(); + for ( GATKDocWorkUnit workUnit : myWorkUnits ) { + processDocWorkUnit(cfg, workUnit, myWorkUnits); + } + + processIndex(cfg, new ArrayList(myWorkUnits)); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } + } + + private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeature feature) { + try { + if ( feature != null ) { + if ( feature.enable() ) { + DocumentedGATKFeatureHandler handler = feature.handler().newInstance(); + handler.setDoclet(this); + return handler; + } else { + logger.info("Skipping disabled Documentation for " + doc); + } + } + } catch ( IllegalAccessException e) { + throw new RuntimeException(e); // the constructor is now private -- this is an error + } catch ( InstantiationException e) { + throw new RuntimeException(e); // the constructor is now private -- this is an error + } + + return null; + } + + private DocumentedGATKFeature getFeatureForClassDoc(ClassDoc doc) { + // todo -- what do I need the ? extends Object to pass the compiler? + Class docClass = getClassForClassDoc(doc); + if ( docClass != null && docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) { + return docClass.getAnnotation(DocumentedGATKFeature.class); + } else { + return null; // not annotated so it shouldn't be documented + } + } + + private Class getClassForClassDoc(ClassDoc doc) { + try { + // todo -- what do I need the ? extends Object to pass the compiler? + return (Class)HelpUtils.getClassForDoc(doc); + } catch ( ClassNotFoundException e) { + //logger.warn("Couldn't find class for ClassDoc " + doc); + // we got a classdoc for a class we can't find. Maybe in a library or something + return null; + } catch ( NoClassDefFoundError e ) { + return null; + } catch ( UnsatisfiedLinkError e) { + return null; // naughty BWA bindings + } + } + + public static ClassDoc getClassDocForClass(RootDoc rootDoc, Class clazz) { + return rootDoc.classNamed(clazz.getName()); + } + + private void processIndex(Configuration cfg, List indexData) throws IOException { + /* Get or create a template */ + Template temp = cfg.getTemplate("generic.index.template.html"); + + /* Merge data-model with template */ + Writer out = new OutputStreamWriter(new FileOutputStream(new File(DESTINATION_DIR + "/index.html"))); + try { + temp.process(groupIndexData(indexData), out); + out.flush(); + } catch ( TemplateException e ) { + throw new ReviewedStingException("Failed to create GATK documentation", e); + } + } + + private Map groupIndexData(List indexData) { + // + // root -> data -> { summary -> y, filename -> z }, etc + // -> groups -> group1, group2, etc. + Map root = new HashMap(); + + Collections.sort(indexData); + + Set docFeatures = new HashSet(); + List> data = new ArrayList>(); + for ( GATKDocWorkUnit workUnit : indexData ) { + data.add(workUnit.toMap()); + docFeatures.add(workUnit.annotation); + } + + List> groups = new ArrayList>(); + for ( DocumentedGATKFeature feature : docFeatures ) { + groups.add(toMap(feature)); + } + + root.put("data", data); + root.put("groups", groups); + root.put("timestamp", buildTimestamp); + root.put("version", absoluteVersion); + + return root; + } + + private static final Map toMap(DocumentedGATKFeature annotation) { + Map root = new HashMap(); + root.put("name", annotation.groupName()); + root.put("summary", annotation.summary()); + return root; + } + + public final static GATKDocWorkUnit findWorkUnitForClass(Class c, Set all) { + for ( final GATKDocWorkUnit unit : all ) + if ( unit.clazz.equals(c) ) + return unit; + return null; + } + + private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit, Set all) + throws IOException { + //System.out.printf("Processing documentation for class %s%n", unit.classDoc); + + unit.handler.processOne(rootDoc, unit, all); + + // Get or create a template + Template temp = cfg.getTemplate(unit.handler.getTemplateName(unit.classDoc)); + + // Merge data-model with template + File outputPath = new File(DESTINATION_DIR + "/" + unit.filename); + try { + Writer out = new OutputStreamWriter(new FileOutputStream(outputPath)); + temp.process(unit.forTemplate, out); + out.flush(); + } catch ( TemplateException e ) { + throw new ReviewedStingException("Failed to create GATK documentation", e); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java new file mode 100644 index 000000000..fd1048844 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.google.java.contract.Requires; +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.RootDoc; +import com.sun.javadoc.Tag; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.*; +import java.lang.reflect.Field; +import java.util.*; + +/** + * + */ +public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { + private static Logger logger = Logger.getLogger(GenericDocumentationHandler.class); + GATKDocWorkUnit toProcess; + ClassDoc classdoc; + Set all; + RootDoc rootDoc; + + @Override + public boolean shouldBeProcessed(ClassDoc doc) { + return true; +// try { +// Class type = HelpUtils.getClassForDoc(doc); +// return JVMUtils.isConcrete(type); +// } catch ( ClassNotFoundException e ) { +// return false; +// } + } + + + @Override + public String getTemplateName(ClassDoc doc) throws IOException { + return "generic.template.html"; + } + + @Override + public void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcessArg, Set allArg) { + this.rootDoc = rootDoc; + this.toProcess = toProcessArg; + this.all = allArg; + this.classdoc = toProcess.classDoc; + + //System.out.printf("%s class %s%n", toProcess.group, toProcess.classDoc); + Map root = new HashMap(); + + addHighLevelBindings(root); + addArgumentBindings(root); + addRelatedBindings(root); + + toProcess.setHandlerContent((String)root.get("summary"), root); + } + + protected void addHighLevelBindings(Map root) { + root.put("name", classdoc.name()); + + // Extract overrides from the doc tags. + StringBuilder summaryBuilder = new StringBuilder(); + for(Tag tag: classdoc.firstSentenceTags()) + summaryBuilder.append(tag.text()); + root.put("summary", summaryBuilder.toString()); + root.put("description", classdoc.commentText()); + root.put("timestamp", toProcess.buildTimestamp); + root.put("version", toProcess.absoluteVersion); + + for(Tag tag: classdoc.tags()) { + root.put(tag.name(), tag.text()); + } + } + + protected void addArgumentBindings(Map root) { + ParsingEngine parsingEngine = createStandardGATKParsingEngine(); + + // attempt to instantiate the class + Object instance = makeInstanceIfPossible(toProcess.clazz); + + Map> args = new HashMap>(); + root.put("arguments", args); + args.put("all", new ArrayList()); + args.put("required", new ArrayList()); + args.put("optional", new ArrayList()); + args.put("hidden", new ArrayList()); + args.put("depreciated", new ArrayList()); + try { + for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) { + ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); + FieldDoc fieldDoc = getFieldDoc(classdoc, argumentSource.field.getName()); + Map argBindings = docForArgument(fieldDoc, argumentSource, argDef); // todo -- why can you have multiple ones? + if ( ! argumentSource.isHidden() || getDoclet().showHiddenFeatures() ) { + logger.debug(String.format("Processing %s", argumentSource)); + String kind = "optional"; + if ( argumentSource.isRequired() ) kind = "required"; + else if ( argumentSource.isHidden() ) kind = "hidden"; + else if ( argumentSource.isDeprecated() ) kind = "depreciated"; + + // get the value of the field + if ( instance != null ) { + Object value = getFieldValue(toProcess.clazz, instance, fieldDoc.name()); + if ( value != null ) + argBindings.put("defaultValue", prettyPrintValueString(value)); + } + + args.get(kind).add(argBindings); + args.get("all").add(argBindings); + } else { + logger.debug(String.format("Skipping hidden feature %s", argumentSource)); + } + } + } catch ( ClassNotFoundException e ) { + throw new RuntimeException(e); + } + } + + private Object getFieldValue(Class c, Object instance, String fieldName) { + Field field = JVMUtils.findField(c, fieldName); + if ( field != null ) { + Object value = JVMUtils.getFieldValue(field, instance); + //System.out.printf("Fetched value of field %s in class %s: %s%n", fieldName, c, value); + return value; + } else { + return findFieldValueInArgumentCollections(c, instance, fieldName); + } + } + + private Object findFieldValueInArgumentCollections(Class c, Object instance, String fieldName) { + for ( Field field : JVMUtils.getAllFields(c) ) { + if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + //System.out.printf("Searching for %s in argument collection field %s%n", fieldName, field); + Object fieldValue = JVMUtils.getFieldValue(field, instance); + Object value = getFieldValue(fieldValue.getClass(), fieldValue, fieldName); + if ( value != null ) + return value; + } + } + + return null; + } + + /** + * Assumes value != null + * @param value + * @return + */ + private Object prettyPrintValueString(Object value) { + if ( value.getClass().isArray() ) { + Class type = value.getClass().getComponentType(); + if ( boolean.class.isAssignableFrom(type) ) + return Arrays.toString((boolean[])value); + if ( byte.class.isAssignableFrom(type) ) + return Arrays.toString((byte[])value); + if ( char.class.isAssignableFrom(type) ) + return Arrays.toString((char[])value); + if ( double.class.isAssignableFrom(type) ) + return Arrays.toString((double[])value); + if ( float.class.isAssignableFrom(type) ) + return Arrays.toString((float[])value); + if ( int.class.isAssignableFrom(type) ) + return Arrays.toString((int[])value); + if ( long.class.isAssignableFrom(type) ) + return Arrays.toString((long[])value); + if ( short.class.isAssignableFrom(type) ) + return Arrays.toString((short[])value); + if ( Object.class.isAssignableFrom(type) ) + return Arrays.toString((Object[])value); + else + throw new RuntimeException("Unexpected array type in prettyPrintValue. Value was " + value + " type is " + type); + } else + return value.toString(); + } + + private Object makeInstanceIfPossible(Class c) { + Object instance = null; + try { + // don't try to make something where we will obviously fail + if (! c.isEnum() && ! c.isAnnotation() && ! c.isAnonymousClass() && + ! c.isArray() && ! c.isPrimitive() & JVMUtils.isConcrete(c) ) { + instance = c.newInstance(); + //System.out.printf("Created object of class %s => %s%n", c, instance); + return instance; + } else + return null; + } + catch (IllegalAccessException e ) { } + catch (InstantiationException e ) { } + catch (ExceptionInInitializerError e ) { } + catch (SecurityException e ) { } + // this last one is super dangerous, but some of these methods catch ClassNotFoundExceptions + // and rethrow then as RuntimeExceptions + catch (RuntimeException e) {} + finally { + if ( instance == null ) + logger.warn(String.format("Unable to create instance of class %s => %s", c, instance)); + } + + return instance; + } + + protected void addRelatedBindings(Map root) { + List> extraDocsData = new ArrayList>(); + + // add in all of the explicitly related items + for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { + final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all); + if ( otherUnit == null ) + throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); + extraDocsData.add( + new HashMap(){{ + put("filename", otherUnit.filename); + put("name", otherUnit.name);}}); + + } + + List> hierarchyDocs = new ArrayList>(); + for (final GATKDocWorkUnit other : all ) { + final String relation = classRelationship(toProcess.clazz, other.clazz); + if ( relation != null ) + hierarchyDocs.add( + new HashMap(){{ + put("filename", other.filename); + put("relation", relation); + put("name", other.name);}}); + + } + + root.put("relatedDocs", hierarchyDocs); + root.put("extradocs", extraDocsData); + } + + private static final String classRelationship(Class me, Class other) { + if ( other.equals(me) ) + // no circular references + return null; + else if ( other.isAssignableFrom(me) ) + // toProcess is a superclass of other.clazz + return "superclass"; + else if ( me.isAssignableFrom(other) ) + // toProcess inherits from other.clazz + return "subclass"; + else + return null; + + } + + protected ParsingEngine createStandardGATKParsingEngine() { + CommandLineProgram clp = new CommandLineGATK(); + try { + CommandLineProgram.start(clp, new String[]{}, true); + return clp.parser; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private FieldDoc getFieldDoc(ClassDoc classDoc, String name) { + return getFieldDoc(classDoc, name, true); + } + + private FieldDoc getFieldDoc(ClassDoc classDoc, String name, boolean primary) { + //System.out.printf("Looking for %s in %s%n", name, classDoc.name()); + for ( FieldDoc fieldDoc : classDoc.fields(false) ) { + //System.out.printf("fieldDoc " + fieldDoc + " name " + fieldDoc.name()); + if ( fieldDoc.name().equals(name) ) + return fieldDoc; + + Field field = HelpUtils.getFieldForFieldDoc(fieldDoc); + if ( field.isAnnotationPresent(ArgumentCollection.class) ) { + ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName()); + if ( typeDoc == null ) + throw new ReviewedStingException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc"); + else { + FieldDoc result = getFieldDoc(typeDoc, name, false); + if ( result != null ) + return result; + // else keep searching + } + } + } + + // if we didn't find it here, wander up to the superclass to find the field + if ( classDoc.superclass() != null ) { + return getFieldDoc(classDoc.superclass(), name, false); + } + + if ( primary ) + throw new RuntimeException("No field found for expected field " + name); + else + return null; + } + + protected Map docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) { + Map root = new HashMap(); + root.put("name", def.shortName != null ? "-" + def.shortName : "--" + def.fullName ); + + if ( def.shortName != null && def.fullName != null ) + root.put("synonyms", "--" + def.fullName); + + root.put("required", def.required ? "yes" : "no"); + root.put("type", def.argumentType.getSimpleName()); + + // summary and fulltext + root.put("summary", def.doc != null ? def.doc : ""); + root.put("fulltext", fieldDoc.commentText()); + + List attributes = new ArrayList(); + // this one below is just too much. + //attributes.add(def.ioType.annotationClass.getSimpleName()); + if ( def.required ) attributes.add("required"); + // flag is just boolean, not interesting + //if ( def.isFlag ) attributes.add("flag"); + if ( def.isHidden ) attributes.add("hidden"); + if ( source.isDeprecated() ) attributes.add("depreciated"); + if ( attributes.size() > 0 ) + root.put("attributes", Utils.join(", ", attributes)); + + if ( def.validOptions != null ) { + root.put("options", docForEnumArgument(source.field.getType())); + } + + return root; + } + + @Requires("enumClass.isEnum()") + private List> docForEnumArgument(Class enumClass) { + ClassDoc doc = GATKDoclet.getClassDocForClass(rootDoc, enumClass); + if ( doc == null ) // || ! doc.isEnum() ) + throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc); + + List> bindings = new ArrayList>(); + for (final FieldDoc field : doc.fields(false) ) { + bindings.add( + new HashMap(){{ + put("name", field.name()); + put("summary", field.commentText());}}); + } + + return bindings; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java deleted file mode 100644 index b350b1a29..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpTaglet.java +++ /dev/null @@ -1,91 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.javadoc.Tag; -import com.sun.tools.doclets.Taglet; - -/** - * Basic functionality for the help taglet. - * - * @author mhanna - * @version 0.1 - */ -public abstract class HelpTaglet implements Taglet { - /** - * Return the name of this custom tag. - */ - public abstract String getName(); - - /** - * Will return false since this tag cannot be applied - * to a field. - * @return false since this tag cannot be applied to a field. - */ - public boolean inField() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a constructor. - * @return false since by default, help tags cannot be applied to a constructor. - */ - public boolean inConstructor() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a method. - * @return false since by default, this tag cannot be applied to a method. - */ - public boolean inMethod() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to an overview. - * @return false since by default, help tags cannot be applied to an overview. - */ - public boolean inOverview() { - return false; - } - - /** - * Will return false since by default, help tags cannot be applied to a package. - * description. - * @return false since by default, help tags cannot be applied to a package. - */ - public boolean inPackage() { - return false; - } - - /** - * Will return false since help tags are by default not inline. - * @return false since help tags are by default not inline. - */ - public boolean inType() { - return false; - } - - /** - * Will return false since help tags are by default not inline. - * @return false since help tags are by default not inline. - */ - public boolean isInlineTag() { - return false; - } - - /** - * Create a string representation of this tag. Since this tag is only - * used by the help system, don't output any HTML. - */ - public String toString(Tag tag) { - return null; - } - - /** - * Create a string representation of this tag. Since this tag is only - * used by the help system, don't output any HTML. - */ - public String toString(Tag[] tags) { - return null; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java new file mode 100644 index 000000000..4527c6afe --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.PackageDoc; +import com.sun.javadoc.ProgramElementDoc; +import org.broadinstitute.sting.utils.classloader.JVMUtils; + +import java.lang.reflect.Field; + +public class HelpUtils { + protected static boolean implementsInterface(ProgramElementDoc classDoc, Class... interfaceClasses) { + for (Class interfaceClass : interfaceClasses) + if (assignableToClass(classDoc, interfaceClass, false)) + return true; + return false; + } + + protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) { + try { + Class type = getClassForDoc(classDoc); + return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type)); + } catch (Throwable t) { + // Ignore errors. + return false; + } + } + + protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException { + return Class.forName(getClassName(doc)); + } + + protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) { + try { + Class clazz = getClassForDoc(fieldDoc.containingClass()); + return JVMUtils.findField(clazz, fieldDoc.name()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Reconstitute the class name from the given class JavaDoc object. + * + * @param doc the Javadoc model for the given class. + * @return The (string) class name of the given class. + */ + protected static String getClassName(ProgramElementDoc doc) { + PackageDoc containingPackage = doc.containingPackage(); + return containingPackage.name().length() > 0 ? + String.format("%s.%s", containingPackage.name(), doc.name()) : + String.format("%s", doc.name()); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java index 6ee12d42e..a28a7bcee 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ResourceBundleExtractorDoclet.java @@ -28,14 +28,9 @@ package org.broadinstitute.sting.utils.help; import com.sun.javadoc.*; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; -import java.util.HashSet; -import java.util.Properties; -import java.util.Scanner; -import java.util.Set; +import java.util.*; /** * Extracts certain types of javadoc (specifically package and class descriptions) and makes them available @@ -48,17 +43,21 @@ public class ResourceBundleExtractorDoclet { /** * Taglet for the particular version number. */ - private static final String VERSION_TAGLET_NAME = "version"; + public static final String VERSION_TAGLET_NAME = "version"; + public static final String SUMMARY_TAGLET_NAME = "help.summary"; + public static final String DESCRIPTION_TAGLET_NAME = "help.description"; /** * Maintains a collection of resources in memory as they're accumulated. */ - private static final Properties resourceText = new Properties(); + protected final Properties resourceText = new Properties(); /** * Maintains a collection of classes that should really be documented. */ - private static final Set undocumentedWalkers = new HashSet(); + protected final Set undocumentedWalkers = new HashSet(); + + protected String buildTimestamp = null, absoluteVersion = null; /** * Extracts the contents of certain types of javadoc and adds them to an XML file. @@ -67,26 +66,38 @@ public class ResourceBundleExtractorDoclet { * @throws IOException if output can't be written. */ public static boolean start(RootDoc rootDoc) throws IOException { + ResourceBundleExtractorDoclet doclet = new ResourceBundleExtractorDoclet(); + PrintStream out = doclet.loadData(rootDoc, true); + doclet.processDocs(rootDoc, out); + return true; + } + + protected PrintStream loadData(RootDoc rootDoc, boolean overwriteResourcesFile) { PrintStream out = System.out; - String buildTimestamp = null, versionPrefix = null, versionSuffix = null, absoluteVersion = null; for(String[] options: rootDoc.options()) { if(options[0].equals("-out")) { - loadExistingResourceFile(options[1], rootDoc); - out = new PrintStream(options[1]); + try { + loadExistingResourceFile(options[1], rootDoc); + if ( overwriteResourcesFile ) + out = new PrintStream(options[1]); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } } if(options[0].equals("-build-timestamp")) buildTimestamp = options[1]; - if(options[0].equals("-version-prefix")) - versionPrefix = options[1]; - if(options[0].equals("-version-suffix")) - versionSuffix = options[1]; if (options[0].equals("-absolute-version")) absoluteVersion = options[1]; } resourceText.setProperty("build.timestamp",buildTimestamp); + return out; + } + protected void processDocs(RootDoc rootDoc, PrintStream out) { // Cache packages as we see them, since there's no direct way to iterate over packages. Set packages = new HashSet(); @@ -97,13 +108,19 @@ public class ResourceBundleExtractorDoclet { if(isRequiredJavadocMissing(currentClass) && isWalker(currentClass)) undocumentedWalkers.add(currentClass.name()); - renderHelpText(getClassName(currentClass),currentClass,versionPrefix,versionSuffix,absoluteVersion); + renderHelpText(HelpUtils.getClassName(currentClass),currentClass); } for(PackageDoc currentPackage: packages) - renderHelpText(currentPackage.name(),currentPackage,versionPrefix,versionSuffix,absoluteVersion); + renderHelpText(currentPackage.name(),currentPackage); - resourceText.store(out,"Strings displayed by the Sting help system"); + try { + resourceText.store(out,"Strings displayed by the Sting help system"); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } // ASCII codes for making text blink final String blink = "\u001B\u005B\u0035\u006D"; @@ -111,8 +128,6 @@ public class ResourceBundleExtractorDoclet { if(undocumentedWalkers.size() > 0) Utils.warnUser(String.format("The following walkers are currently undocumented: %s%s%s", blink, Utils.join(" ",undocumentedWalkers), reset)); - - return true; } /** @@ -121,7 +136,7 @@ public class ResourceBundleExtractorDoclet { * @return Number of potential parameters; 0 if not supported. */ public static int optionLength(String option) { - if(option.equals("-build-timestamp") || option.equals("-version-prefix") || option.equals("-version-suffix") || option.equals("-out") || option.equals("-absolute-version") ) { + if(option.equals("-build-timestamp") || option.equals("-out") || option.equals("-absolute-version") ) { return 2; } return 0; @@ -137,7 +152,7 @@ public class ResourceBundleExtractorDoclet { * @throws IOException if there is an I/O-related error other than FileNotFoundException * while attempting to read the resource file. */ - private static void loadExistingResourceFile( String resourceFileName, RootDoc rootDoc ) throws IOException { + private void loadExistingResourceFile( String resourceFileName, RootDoc rootDoc ) throws IOException { try { BufferedReader resourceFile = new BufferedReader(new FileReader(resourceFileName)); try { @@ -157,27 +172,8 @@ public class ResourceBundleExtractorDoclet { * @param classDoc the type of the given class. * @return True if the class of the given name is a walker. False otherwise. */ - private static boolean isWalker(ClassDoc classDoc) { - try { - Class type = Class.forName(getClassName(classDoc)); - return Walker.class.isAssignableFrom(type) && JVMUtils.isConcrete(type); - } - catch(Throwable t) { - // Ignore errors. - return false; - } - } - - /** - * Reconstitute the class name from the given class JavaDoc object. - * @param classDoc the Javadoc model for the given class. - * @return The (string) class name of the given class. - */ - private static String getClassName(ClassDoc classDoc) { - PackageDoc containingPackage = classDoc.containingPackage(); - return containingPackage.name().length() > 0 ? - String.format("%s.%s",containingPackage.name(),classDoc.name()) : - String.format("%s",classDoc.name()); + protected static boolean isWalker(ClassDoc classDoc) { + return HelpUtils.assignableToClass(classDoc, Walker.class, true); } /** @@ -186,8 +182,6 @@ public class ResourceBundleExtractorDoclet { * @return True if the JavaDoc is missing. False otherwise. */ private static boolean isRequiredJavadocMissing(ClassDoc classDoc) { - if(classDoc.containingPackage().name().contains("oneoffprojects")) - return false; return classDoc.commentText().length() == 0 || classDoc.commentText().contains("Created by IntelliJ"); } @@ -195,53 +189,23 @@ public class ResourceBundleExtractorDoclet { * Renders all the help text required for a given name. * @param elementName element name to use as the key * @param element Doc element to process. - * @param versionPrefix Text to add to the start of the version string. - * @param versionSuffix Text to add to the end of the version string. */ - private static void renderHelpText(String elementName, Doc element, String versionPrefix, String versionSuffix, String absoluteVersion) { - // Extract overrides from the doc tags. - String name = null; - String version = null; + private void renderHelpText(String elementName, Doc element) { StringBuilder summaryBuilder = new StringBuilder(); for(Tag tag: element.firstSentenceTags()) summaryBuilder.append(tag.text()); String summary = summaryBuilder.toString(); String description = element.commentText(); - for(Tag tag: element.tags()) { - if(tag.name().equals("@"+DisplayNameTaglet.NAME)) { - if(name != null) - throw new ReviewedStingException("Only one display name tag can be used per package / walker."); - name = tag.text(); - } - else if(tag.name().equals("@"+VERSION_TAGLET_NAME)) { - if ( absoluteVersion != null ) { - version = absoluteVersion; - } - else { - version = String.format("%s%s%s", (versionPrefix != null) ? versionPrefix : "", - tag.text(), - (versionSuffix != null) ? versionSuffix : ""); - } - } - else if(tag.name().equals("@"+SummaryTaglet.NAME)) - summary = tag.text(); - else if(tag.name().equals("@"+DescriptionTaglet.NAME)) - description = tag.text(); - } - - // Write out an alternate element name, if exists. - if(name != null) - resourceText.setProperty(String.format("%s.%s",elementName,DisplayNameTaglet.NAME),name); - - if(version != null) - resourceText.setProperty(String.format("%s.%s",elementName,VERSION_TAGLET_NAME),version); + // this might seem unnecessary, but the GATK command line program uses this tag to determine the version when running + if(absoluteVersion != null) + resourceText.setProperty(String.format("%s.%s",elementName,VERSION_TAGLET_NAME),absoluteVersion); // Write out an alternate element summary, if exists. - resourceText.setProperty(String.format("%s.%s",elementName,SummaryTaglet.NAME),formatText(summary)); + resourceText.setProperty(String.format("%s.%s",elementName,SUMMARY_TAGLET_NAME),formatText(summary)); // Write out an alternate description, if present. - resourceText.setProperty(String.format("%s.%s",elementName,DescriptionTaglet.NAME),formatText(description)); + resourceText.setProperty(String.format("%s.%s",elementName,DESCRIPTION_TAGLET_NAME),formatText(description)); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java b/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java deleted file mode 100644 index db8b55940..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/SummaryTaglet.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.broadinstitute.sting.utils.help; - -import com.sun.tools.doclets.Taglet; - -import java.util.Map; - -/** - * Provide an alternate brief summary for this walker / package. - * Acts as an alternative to the first sentence employed by default. - * @author mhanna - * @version 0.1 - */ -public class SummaryTaglet extends HelpTaglet { - /** - * The key tag for this taglet. - */ - public static final String NAME = "help.summary"; - - /** - * Return the name of this custom tag. - */ - @Override - public String getName() { - return NAME; - } - - /** - * Will return false since overviews are always named - * by the @WalkerName tag. - * @return false always - */ - @Override - public boolean inOverview() { - return true; - } - - /** - * Will return true to indicate that packages can be given useful summary. - * @return true always - */ - @Override - public boolean inPackage() { - return true; - } - - /** - * Register this Taglet. - * @param tagletMap the map to register this tag to. - */ - public static void register(Map tagletMap) { - SummaryTaglet tag = new SummaryTaglet(); - Taglet t = (Taglet)tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java deleted file mode 100644 index d16c19130..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/ClosableReentrantLock.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.util.concurrent.locks.ReentrantLock; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 1/19/11 - * Time: 9:50 AM - * - * Simple extension of a ReentrantLock that supports a close method. - */ -public class ClosableReentrantLock extends ReentrantLock { - public boolean ownsLock() { return super.isHeldByCurrentThread(); } - public void close() {} -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java deleted file mode 100644 index 3763ec67d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/FileBackedGenomeLocProcessingTracker.java +++ /dev/null @@ -1,114 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.*; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * Keeps a copy of the processing locks in a file - */ -public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - private static final Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class); - private static final boolean DEBUG = false; - private static final String READ_MODE = "r"; - private static final String WRITE_MODE = "rws"; - - private final File sharedFile; - private final GenomeLocParser parser; - private long lastReadPosition = 0; - - public FileBackedGenomeLocProcessingTracker(File sharedFile, GenomeLocParser parser, ClosableReentrantLock lock, PrintStream status) { - super(lock, status); - - this.sharedFile = sharedFile; - this.parser = parser; - } - - private RandomAccessFile openFile(String mode) { - try { - return new RandomAccessFile(sharedFile, mode); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } - } - - private void closeFile(RandomAccessFile raFile) { - try { - if ( raFile != null ) raFile.close(); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } - } - - @Override - protected List readNewLocs() { - List newPLocs = new ArrayList(); // todo -- gratitous object creation - - if ( sharedFile.exists() ) { - RandomAccessFile raFile = null; - try { - raFile = openFile(READ_MODE); - //logger.warn(String.format("Reading new locs at: file.length=%d last=%d", raFile.length(), lastReadPosition)); - if ( raFile.length() > lastReadPosition ) { - raFile.seek(lastReadPosition); - - int counter = 0; - String line = raFile.readLine(); // Read another line - while ( line != null ) { - String[] parts = line.split(" "); - if ( parts.length != 2 ) throw new ReviewedStingException("BUG: bad sharedFile line '" + line + "' at " + raFile.getFilePointer()); - ProcessingLoc ploc = new ProcessingLoc(parser.parseGenomeLoc(parts[0]), parts[1]); - //logger.warn(" Read " + ploc); - newPLocs.add(ploc); - line = raFile.readLine(); - counter++; - } - lastReadPosition = raFile.getFilePointer(); - if ( DEBUG ) logger.warn(String.format("Read %s locs from file, current pos is %d, # read new locs is %d", - counter, lastReadPosition, newPLocs.size())); - } - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(sharedFile, e); - } catch (IOException e) { - throw new ReviewedStingException("Couldn't read sharedFile " + sharedFile, e); - } finally { - closeFile(raFile); - } - } - - return newPLocs; - } - - @Override - protected void registerNewLocs(Collection plocs) { - RandomAccessFile raFile = null; - - try { - raFile = openFile(WRITE_MODE); - long startPos = raFile.getFilePointer(); - raFile.seek(raFile.length()); - StringBuffer bytes = new StringBuffer(); - for ( ProcessingLoc ploc : plocs ) { - String packet = String.format("%s %s%n", ploc.getLocation(), ploc.getOwner()); - bytes.append(packet); - if ( DEBUG ) logger.warn(String.format("Wrote loc %s to file: %d + %d bytes ending at %d", ploc, startPos, packet.length(), raFile.getFilePointer())); - } - raFile.write(bytes.toString().getBytes()); - //raFile.getChannel().force(true); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(sharedFile, e); - } finally { - closeFile(raFile); - } - } -} - - diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java deleted file mode 100644 index e97a73fb8..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTracker.java +++ /dev/null @@ -1,486 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.text.SimpleDateFormat; -import java.util.*; - -/** - * Abstract base class to coordinating data processing by a collecting for processes / threads. - * - * Conceptually, the genome is viewed as a collection of non-overlapping genome location: - * - * chr1:1-10 - * chr1:11-20 - * chr1:21-30 - * etc. - * - * This class, and it's concrete derived classes, provide the ability to claim individual locations - * as "mine", and exclude other processes / threads from processing them. At the lowest-level this - * is implemented by the claimOwnership(loc, name) function, that returns true if loc free (unclaimed) - * and makes name the owner of loc. High-level, and more efficient operations provide claiming - * iterators over streams of objects implementing the HasGenomeLocation interface, so that you can - * write code that looks like: - * - * for ( GenomeLoc ownedLoc : onlyOwned(allLocsToProcess.iterator) ) { - * doSomeWork(ownedLoc) - * - * Much of the code in this class is actually surrounding debugging and performance metrics code. - * The actual synchronization code is separated out into the ClosableReentrantLock() system - * and the two abstract functions: - * - * protected abstract void registerNewLocs(Collection plocs); - * protected abstract Collection readNewLocs(); - * - * That maintain the state of the tracker. - * - * That is, the ProcessingTracker is made of two components: a thread / process locking system and - * a subclass that implements the methods to record new claimed state changes and to read out updates - * that may have occurred by another thread or process. - * - * NOTE: this class assumes that all threads / processes are working with the same set of potential - * GenomeLocs to own. Claiming chr1:1-10 and then chr1:5-6 is allowed by the system. Basically, - * you only can stake claim to GenomeLocs that are .equal(). - */ -public abstract class GenomeLocProcessingTracker { - private final static Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class); - private final static SimpleDateFormat STATUS_FORMAT = new SimpleDateFormat("HH:mm:ss,SSS"); - private final static int DEFAULT_OWNERSHIP_ITERATOR_SIZE = 1; - - /** - * Useful state strings for printing status - */ - private final static String GOING_FOR_LOCK = "going_for_lock"; - private final static String RELEASING_LOCK = "releasing_lock"; - private final static String HAVE_LOCK = "have_lock"; - private final static String RUNNING = "running"; - - /** - * A map, for efficiency, that allows quick lookup of the processing loc for a - * given GenomeLoc. The map points from loc -> loc / owner as a ProcessingLoc - */ - private final Map processingLocs; - - /** - * The locking object used to protect data from simulatanous access by multiple - * threads or processes. - */ - private final ClosableReentrantLock lock; - - /** A stream for writing status messages. Can be null if we aren't writing status */ - private final PrintStream status; - - // - // Timers for recording performance information - // Note -- these cannot be used because this class isn't thread safe, and neither are the - // timers, so they result in invalid operations w.r.t. the SimpleTimer contract - // -// protected final SimpleTimer writeTimer = new SimpleTimer("writeTimer"); -// protected final SimpleTimer readTimer = new SimpleTimer("readTimer"); -// protected final SimpleTimer lockWaitTimer = new SimpleTimer("lockWaitTimer"); - protected final SimpleTimer timer = new SimpleTimer(); - protected long nLocks = 0, nWrites = 0, nReads = 0; - - // -------------------------------------------------------------------------------- - // - // Creating ProcessingTrackers - // - // -------------------------------------------------------------------------------- - public GenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) { - this.processingLocs = new HashMap(); - this.status = status; - this.lock = lock; - printStatusHeader(); - } - - // -------------------------------------------------------------------------------- - // - // Code to override to change the dynamics of the the GenomeLocProcessingTracker - // - // -------------------------------------------------------------------------------- - - protected void close() { - lock.close(); - if ( status != null ) status.close(); - } - - /** - * Takes a collection of newly claimed (i.e., previous unclaimed) genome locs - * and the name of their owner and "registers" this data in some persistent way that's - * visible to all threads / processes communicating via this GenomeLocProcessingTracker. - * - * Could be a in-memory data structure (a list) if we are restricting ourselves to intra-memory - * parallelism, a locked file on a shared file system, or a server we communicate with. - * - * @param plocs - */ - protected abstract void registerNewLocs(Collection plocs); - - /** - * The inverse of the registerNewLocs() function. Looks at the persistent data store - * shared by all threads / processes and returns the ones that have appeared since the last - * call to readNewLocs(). Note that we expect the pair of registerNewLocs and readNewLocs to - * include everything, even locs registered by this thread / process. For example: - * - * readNewLocs() => List() - * registerNewLocs(List(x, y,)) => void - * readNewLocs() => List(x,y)) - * - * even for this thread or process. - * @return - */ - protected abstract Collection readNewLocs(); - - - // -------------------------------------------------------------------------------- - // - // Code to claim intervals for processing and query for their ownership - // - // -------------------------------------------------------------------------------- - - /** - * Queries the current database if a location is owned. Does not guarantee that the - * loc can be owned in a future call, though. - * - * @param loc - * @return - */ - public final boolean locIsOwned(GenomeLoc loc, String id) { - return findOwner(loc, id) != null; - } - - /** - * The workhorse routine. Attempt to claim processing ownership of loc, with my name. - * This is an atomic operation -- other threads / processes will wait until this function - * returns. The return result is the ProcessingLoc object describing who owns this - * location. If the location isn't already claimed and we now own the location, the pl owner - * will be myName. Otherwise, the name of the owner can found in the pl. - * - * @param loc - * @param myName - * @return - */ - public final ProcessingLoc claimOwnership(final GenomeLoc loc, final String myName) { - // processingLocs is a shared memory synchronized object, and this - // method is synchronized, so we can just do our processing - return new WithLock(myName) { - public ProcessingLoc doBody() { - ProcessingLoc owner = findOwner(loc, myName); - if ( owner == null ) { // we are unowned - owner = new ProcessingLoc(loc, myName); - registerNewLocsWithTimers(Arrays.asList(owner), myName); - } - return owner; - } - }.run(); - } - - - // -------------------------------------------------------------------------------- - // - // High-level iterator-style interface to claiming ownership - // - // -------------------------------------------------------------------------------- - - /** - * A higher-level, and more efficient, interface to obtain the next location we own. Takes an - * iterator producing objects that support the getLocation() interface, and returns the next - * object in that stream that we can claim ownership of. Returns null if we run out of elements - * during the iteration. - * - * Can be more efficiently implemented in subclasses to avoid multiple unlocking - * - * @param iterator - * @param myName - * @return - */ - public final T claimOwnershipOfNextAvailable(Iterator iterator, String myName) { - OwnershipIterator myIt = new OwnershipIterator(iterator, myName, 1); - return myIt.next(); - } - - public final Iterable onlyOwned(Iterator iterator, String myName) { - return new OwnershipIterator(iterator, myName); - } - - private final class OwnershipIterator implements Iterator, Iterable { - private final Iterator subit; - private final String myName; - private final Queue cache; - private final int cacheSize; - - public OwnershipIterator(Iterator subit, String myName) { - this(subit, myName, DEFAULT_OWNERSHIP_ITERATOR_SIZE); - } - - public OwnershipIterator(Iterator subit, String myName, int cacheSize) { - this.subit = subit; - this.myName = myName; - cache = new LinkedList(); - this.cacheSize = cacheSize; - } - - /** - * Will return true for all elements of subit, even if we can't get ownership of some of the future - * elements and so will return null there - * @return - */ - public final boolean hasNext() { - return cache.peek() != null || subit.hasNext(); - } - - /** - * High performance iterator that only locks and unlocks once per claimed object found. Avoids - * locking / unlocking for each query - * - * @return an object of type T owned by this thread, or null if none of the remaining object could be claimed - */ - public final T next() { - if ( cache.peek() != null) - return cache.poll(); - else { - // cache is empty, we need to fill up the cache and return the first element of the queue - return new WithLock(myName) { - public T doBody() { - // read once the database of owners at the start - updateAndGetProcessingLocs(myName); - - boolean done = false; - Queue pwns = new LinkedList(); // ;-) - while ( !done && cache.size() < cacheSize && subit.hasNext() ) { - final T elt = subit.next(); - GenomeLoc loc = elt.getLocation(); - - ProcessingLoc owner = processingLocs.get(loc); - - if ( owner == null ) { // we are unowned - owner = new ProcessingLoc(loc, myName); - pwns.offer(owner); - if ( ! cache.offer(elt) ) throw new ReviewedStingException("Cache offer unexpectedly failed"); - if ( GenomeLoc.isUnmapped(loc) ) done = true; - } - // if not, we continue our search - } - - registerNewLocsWithTimers(pwns, myName); - - // we've either filled up the cache or run out of elements. Either way we return - // the first element of the cache. If the cache is empty, we return null here. - return cache.poll(); - } - }.run(); - } - } - - public final void remove() { - throw new UnsupportedOperationException(); - } - - public final Iterator iterator() { - return this; - } - } - - // -------------------------------------------------------------------------------- - // - // private / protected low-level accessors / manipulators and utility functions - // - // -------------------------------------------------------------------------------- - - /** - * Useful debugging function that returns the ProcessingLoc who owns loc. ID - * is provided for debugging purposes - * @param loc - * @param id - * @return - */ - protected final ProcessingLoc findOwner(GenomeLoc loc, String id) { - // fast path to check if we already have the existing genome loc in memory for ownership claims - // getProcessingLocs() may be expensive [reading from disk, for example] so we shouldn't call it - // unless necessary - ProcessingLoc x = processingLocs.get(loc); - return x == null ? updateAndGetProcessingLocs(id).get(loc) : x; - } - - /** - * Returns the list of currently owned locations, updating the database as necessary. - * DO NOT MODIFY THIS MAP! As with all parallelizing data structures, the list may be - * out of date immediately after the call returns, or may be updating on the fly. - * @return - */ - protected final Map updateAndGetProcessingLocs(String myName) { - return new WithLock>(myName) { - public Map doBody() { -// readTimer.restart(); - for ( ProcessingLoc p : readNewLocs() ) - processingLocs.put(p.getLocation(), p); -// readTimer.stop(); - nReads++; - return processingLocs; - } - }.run(); - } - - /** - * Wrapper around registerNewLocs that also times the operation - * - * @param plocs - * @param myName - */ - protected final void registerNewLocsWithTimers(Collection plocs, String myName) { -// writeTimer.restart(); - registerNewLocs(plocs); - nWrites++; -// writeTimer.stop(); - } - - private final void printStatusHeader() { - if ( status != null ) status.printf("process.id\thr.time\ttime\tstate%n"); - } - - private final void printStatus(String id, long machineTime, String state) { - // prints a line like processID human-readable-time machine-time state - if ( status != null ) { - status.printf("%s\t%s\t%d\t%s%n", id, STATUS_FORMAT.format(machineTime), machineTime, state); - status.flush(); - } - } - - - /** - * Lock the data structure, preventing other threads / processes from reading and writing to the - * common store - * @param id the name of the process doing the locking - */ - private final void lock(String id) { - //lockWaitTimer.restart(); - boolean hadLock = lock.ownsLock(); - if ( ! hadLock ) { - nLocks++; - //printStatus(id, lockWaitTimer.currentTime(), GOING_FOR_LOCK); - } - lock.lock(); - //lockWaitTimer.stop(); - //if ( ! hadLock ) printStatus(id, lockWaitTimer.currentTime(), HAVE_LOCK); - } - - /** - * Unlock the data structure, allowing other threads / processes to read and write to the common store - * @param id the name of the process doing the unlocking - */ - private final void unlock(String id) { - if ( lock.getHoldCount() == 1 ) printStatus(id, timer.currentTime(), RELEASING_LOCK); - lock.unlock(); - if ( ! lock.ownsLock() ) printStatus(id, timer.currentTime(), RUNNING); - } - - // useful code for getting - public final long getNLocks() { return nLocks; } - public final long getNReads() { return nReads; } - public final long getNWrites() { return nWrites; } -// public final double getTimePerLock() { return lockWaitTimer.getElapsedTime() / Math.max(nLocks, 1); } -// public final double getTimePerRead() { return readTimer.getElapsedTime() / Math.max(nReads,1); } -// public final double getTimePerWrite() { return writeTimer.getElapsedTime() / Math.max(nWrites,1); } - - // -------------------------------------------------------------------------------- - // - // Java-style functional form for with lock do { x }; - // - // -------------------------------------------------------------------------------- - - /** - * Private utility class that executes doBody() method with the lock() acquired and - * handles property unlock()ing the system, even if an error occurs. Allows one to write - * clean code like: - * - * new WithLock(name) { - * public Integer doBody() { doSomething(); return 1; } - * }.run() - * - * @param the return type of the doBody() method - */ - private abstract class WithLock { - private final String myName; - - public WithLock(String myName) { - this.myName = myName; - } - - protected abstract T doBody(); - - public T run() { - boolean locked = false; - try { - lock(myName); - locked = true; - return doBody(); - } finally { - if (locked) unlock(myName); - } - } - } - - // -------------------------------------------------------------------------------- - // - // main function for testing performance - // - // -------------------------------------------------------------------------------- - public static void main(String[] args) { - //BasicConfigurator.configure(); - - final String ref = args[0]; - final File file = new File(args[1]); - final int cycles = Integer.valueOf(args[2]); - - File referenceFile = new File(ref); - try { - final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(referenceFile); - final String chr1 = fasta.getSequenceDictionary().getSequence(1).getSequenceName(); - final GenomeLocParser genomeLocParser = new GenomeLocParser(fasta); - - final class MyTest { - String name; - GenomeLocProcessingTracker tracker; - - MyTest(String name, GenomeLocProcessingTracker tracker) { - this.name = name; - this.tracker = tracker; - } - - public void execute(int cycles) { - SimpleTimer delta = new SimpleTimer("delta"); - SimpleTimer timer = new SimpleTimer("none"); - - if ( file.exists() ) file.delete(); - timer.start(); - delta.start(); - for ( int i = 1; i < cycles; i++ ) { - tracker.claimOwnership(genomeLocParser.createGenomeLoc(chr1, i, i+1), "ABCDEFGHIJKL"); - if ( i % 1000 == 0 ) { - System.out.printf("%s\t%d\t%d\t%.4f\t%.4f%n", name, i, timer.currentTime(), timer.getElapsedTime(), delta.getElapsedTime() ); - delta.restart(); - } - } - } - } - - System.out.printf("name\tcycle\tcurrent.time\telapsed.time\tdelta%n"); - new MyTest("in-memory", new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock())).execute(cycles); - new MyTest("nio", new FileBackedGenomeLocProcessingTracker(file, genomeLocParser, new ClosableReentrantLock(), null)).execute(cycles); - new MyTest("nio-file-lock", new FileBackedGenomeLocProcessingTracker(file, genomeLocParser, new SharedFileThreadSafeLock(file,1), null)).execute(cycles); - } - catch(FileNotFoundException ex) { - throw new UserException.CouldNotReadInputFile(referenceFile,ex); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java deleted file mode 100644 index ad2a6d31b..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/NoOpGenomeLocProcessingTracker.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - * Base class, and null tracker. Always says that a GenomeLoc is ready for processing. It is - * critical that this class already return that a loc is owned, no matter if it's been seen before, - * etc. ReadShards can differ in their contents but have the same "unmapped" genome loc - */ -public class NoOpGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - public NoOpGenomeLocProcessingTracker() { - super(new ClosableReentrantLock(), null); - } - - @Override - protected void registerNewLocs(Collection loc) { - ; - } - - @Override - protected List readNewLocs() { - return Collections.emptyList(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java b/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java deleted file mode 100644 index ee2283dcf..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/ProcessingLoc.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: 1/19/11 - * Time: 8:06 AM - * - * Information about processing locations and their owners. Contains two basic data, associated - * together. The first is a genome loc, and the second is the name of the owner, as a string. - * - * chr1:1-10 Mark - * chr2:11-20 DePristo - * - * would be two ProcessingLocs that first indicate that the first 10 bp of chr1 are owned by Mark, - * and the second is owned by DePristo. - */ -public class ProcessingLoc implements HasGenomeLocation { - private final GenomeLoc loc; - private final String owner; - - /** - * Create a loc that's already owned - * @param loc - * @param owner - */ - public ProcessingLoc(GenomeLoc loc, String owner) { - if ( loc == null || owner == null ) { - throw new ReviewedStingException("BUG: invalid ProcessingLoc detected: " + loc + " owner " + owner); - } - - this.loc = loc; - this.owner = owner.intern(); // reduce memory consumption by interning the string - } - - public GenomeLoc getLocation() { - return loc; - } - - public String getOwner() { - return owner; - } - - /** - * Returns true iff the owner of this processing loc is name. Can be used to determine - * the owner of this processing location. - * - * @param name - * @return - */ - public boolean isOwnedBy(String name) { - return getOwner().equals(name); - } - - public String toString() { return String.format("ProcessingLoc(%s,%s)", loc, owner); } - - public boolean equals(Object other) { - if (other instanceof ProcessingLoc ) - return this.loc.equals(((ProcessingLoc)other).loc) && this.owner.equals(((ProcessingLoc)other).owner); - else - return false; - } - - public int compareTo(ProcessingLoc other) { - return this.getLocation().compareTo(other.getLocation()); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java deleted file mode 100644 index 0f47da413..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileLock.java +++ /dev/null @@ -1,171 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.apache.lucene.store.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.File; -import java.io.IOException; - -/** - * User: depristo - * Date: 1/19/11 - * Time: 8:24 AM - * - * A reentrant lock for a shared file common file in the file system. Relies on a a Lucene SimpleFSLock - * to manage on disk file locking. - */ -public class SharedFileLock extends ClosableReentrantLock { // todo -- kinda gross inheritance. The super lock is never used - private static Logger logger = Logger.getLogger(SharedFileLock.class); - - private static final String VERIFY_HOST = System.getProperty("verify.host", "gsa1"); - private static final boolean VERIFY = false; - private static final int VERIFY_PORT = 5050; - - // 5 minutes => 360 seconds of trying -> failure - protected static final int DEFAULT_N_TRIES = 1000; - protected static final long DEFAULT_MILLISECONDS_PER_TRY = 360; - - /** The file we are locking */ - private final File file; - - private final LockFactory lockFactory; - private Lock fileLock = null; - - /** - * A counter that indicates the number of 'locks' on this file. - * If locks == 2, then two unlocks are required - * before any resources are freed. - */ - int fileLockReentrantCounter = 0; - - // type of locking - private final int nRetries; - private final long milliSecPerTry; - - /** - * Create a SharedFileThreadSafeLock object locking the file - * @param file - */ - public SharedFileLock(File file, int nRetries, long milliSecPerTry, int ID) { - super(); - this.file = file; - this.nRetries = nRetries; - this.milliSecPerTry = milliSecPerTry; - - File lockDir = new File(file.getParent() == null ? "./" : file.getParent()); - try { - LockFactory factory = new SimpleFSLockFactory(lockDir); - if ( VERIFY ) { // don't forget to start up the VerifyLockServer - this.lockFactory = new VerifyingLockFactory((byte)ID, factory, VERIFY_HOST, VERIFY_PORT); - } else { - this.lockFactory = factory; - } - } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile(lockDir, "Could not create coordination file locking directory " + lockDir, e); - } - } - - public SharedFileLock(File file, int ID) { - this(file, DEFAULT_N_TRIES, DEFAULT_MILLISECONDS_PER_TRY, ID); - } - - @Override - public void close() { - if ( ownsLock() ) throw new ReviewedStingException("closing SharedFileLock while still owned: ownership count " + fileLockReentrantCounter); - } - - @Override - public int getHoldCount() { - return fileLockReentrantCounter; - } - - @Override - public boolean ownsLock() { - return fileLockReentrantCounter > 0; - } - - // ------------------------------------------------------------------------------------------ - // - // workhorse routines -- acquiring file locks - // - // ------------------------------------------------------------------------------------------ - - private boolean obtainFileLock() throws IOException { - // annoying bug work around for verifylockserver - if ( VERIFY ) - try { - return fileLock.obtain(1); - } catch ( LockObtainFailedException e ) { - return false; - } - else - return fileLock.obtain(); - } - - /** - * Two stage [threading then file] locking mechanism. Reenterant in that multiple lock calls will be - * unwound appropriately. Uses file channel lock *after* thread locking. - */ - @Override - public void lock() { - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" lock() " + Thread.currentThread().getName() + ", fileLockReentrantCounter = " + fileLockReentrantCounter); - if ( fileLockReentrantCounter++ == 0 ) { - // Precondition -- lock is always null while we don't have a lock - if ( fileLock != null ) - throw new ReviewedStingException("BUG: lock() function called when a lock already is owned!"); - - int i = 1; - fileLock = lockFactory.makeLock(file.getName() + ".lock"); - try { - boolean obtained = obtainFileLock(); // todo -- maybe use intrinsic lock features - for ( ; ! obtained && i < nRetries; i++ ) { - try { - //logger.warn("tryLock failed on try " + i + ", waiting " + milliSecPerTry + " millseconds for retry"); - Thread.sleep(milliSecPerTry); - } catch ( InterruptedException e ) { - throw new UserException("SharedFileThreadSafeLock interrupted during wait for file lock", e); - } - obtained = obtainFileLock(); // gross workaround for error in verify server - } - - if ( i > 1 ) logger.warn("tryLock required " + i + " tries before completing, waited " + i * milliSecPerTry + " millseconds"); - - if ( ! obtained ) { - fileLock = null; - // filelock == null -> we never managed to acquire the lock! - throw new UserException("SharedFileThreadSafeLock failed to obtain the lock after " + nRetries + " attempts"); - } - - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" lock() " + Thread.currentThread().getName() + ", obtained = " + obtained + ", tries = " + i); - } catch (IOException e) { - fileLock = null; - throw new ReviewedStingException("Coordination file could not be created because a lock could not be obtained.", e); - } - } - } - - @Override - public void unlock() { - // update for reentrant unlocking - if ( fileLock == null ) throw new ReviewedStingException("BUG: file lock is null -- file lock was not obtained"); - if ( fileLockReentrantCounter <= 0 ) throw new ReviewedStingException("BUG: file lock counter < 0"); - - // this unlock counts as 1 unlock. If this is our last unlock, actually do something - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", count = " + fileLockReentrantCounter); - if ( --fileLockReentrantCounter == 0 ) { - try { - if ( ! fileLock.isLocked() ) throw new ReviewedStingException("BUG: call to unlock() when we don't have a valid lock!"); - fileLock.release(); - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", actually releasing"); - } catch ( IOException e ) { - throw new ReviewedStingException("Could not free file lock on file " + file, e); - } finally { // make sure we null out the filelock, regardless of our state - fileLock = null; - } - } else { - if ( SharedFileThreadSafeLock.DEBUG ) logger.warn(" unlock() " + Thread.currentThread().getName() + ", skipping, count = " + fileLockReentrantCounter); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java deleted file mode 100644 index d70879a0a..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedFileThreadSafeLock.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.io.File; - -/** - * User: depristo - * Date: 1/19/11 - * Time: 8:24 AM - * - * A reentrant lock that supports multi-threaded locking as well as a shared file lock on a common - * file in the file system. It itself a shared memory reenterant lock to managed thread safety and - * contains a SharedFileLock to handle the file integrity. - */ -public class SharedFileThreadSafeLock extends ClosableReentrantLock { - private static Logger logger = Logger.getLogger(SharedFileThreadSafeLock.class); - protected static final boolean DEBUG = false; - - private final SharedFileLock fileLock; - - /** - * Create a SharedFileThreadSafeLock object locking the file - * @param file - */ - public SharedFileThreadSafeLock(File file, int nRetries, long milliSecPerTry, int ID) { - super(); - this.fileLock = new SharedFileLock(file, nRetries, milliSecPerTry, ID); - } - - public SharedFileThreadSafeLock(File file, int ID) { - this(file, SharedFileLock.DEFAULT_N_TRIES, SharedFileLock.DEFAULT_MILLISECONDS_PER_TRY, ID); - } - - @Override - public void close() { - super.close(); - fileLock.close(); - } - - @Override - public int getHoldCount() { - if ( super.getHoldCount() != fileLock.getHoldCount() ) - throw new ReviewedStingException("BUG: unequal hold counts. threadlock = " + super.getHoldCount() + ", filelock = " + fileLock.getHoldCount()); - return super.getHoldCount(); - } - - @Override - public boolean ownsLock() { - return super.isHeldByCurrentThread() && fileLock.ownsLock(); - } - - /** - * Two stage [threading then file] locking mechanism. Reenterant in that multiple lock calls will be - * unwound appropriately. Uses file channel lock *after* thread locking. - */ - @Override - public void lock() { - if ( DEBUG ) logger.warn("Attempting SharedFileThreadSafe lock: " + Thread.currentThread().getName()); - if ( DEBUG ) logger.warn(" going for thread lock: " + Thread.currentThread().getName()); - super.lock(); - if ( DEBUG ) logger.warn(" going for file lock: " + Thread.currentThread().getName()); - fileLock.lock(); // todo -- should this be in a try? - } - - @Override - public void unlock() { - if ( DEBUG ) logger.warn(" releasing filelock: " + Thread.currentThread().getName()); - fileLock.unlock(); - if ( DEBUG ) logger.warn(" releasing threadlock: " + Thread.currentThread().getName()); - super.unlock(); - if ( DEBUG ) logger.warn(" unlock() complete: " + Thread.currentThread().getName()); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java b/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java deleted file mode 100644 index 9bf8b58b1..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/threading/SharedMemoryGenomeLocProcessingTracker.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.sting.utils.threading; - -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * Thread-safe shared memory only implementation. Uses a simple list to manage the newly - * added processing locations. - */ -public class SharedMemoryGenomeLocProcessingTracker extends GenomeLocProcessingTracker { - private List newPLocs = new ArrayList(); - - protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock) { - super(lock, null); - } - - protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) { - super(lock, status); - } - - @Override - protected void registerNewLocs(Collection plocs) { - newPLocs.addAll(plocs); - } - - @Override - protected List readNewLocs() { - List r = newPLocs; - newPLocs = new ArrayList(); - return r; - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java deleted file mode 100644 index 78ab916db..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java +++ /dev/null @@ -1,402 +0,0 @@ -// our package -package org.broadinstitute.sting.utils.threading; - - -// the imports for unit testing. - - -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.testng.Assert; -import org.testng.annotations.*; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.*; - -/** - * Basic unit test for GenomeLoc - */ -public class GenomeLocProcessingTrackerUnitTest extends BaseTest { - IndexedFastaSequenceFile fasta = null; - GenomeLocParser genomeLocParser = null; - String chr1 = null; - private final static String FILE_ROOT = "public/testdata/GLPTFile"; - - @BeforeTest - public void before() { - File referenceFile = new File(hg18Reference); - try { - fasta = new IndexedFastaSequenceFile(referenceFile); - chr1 = fasta.getSequenceDictionary().getSequence(1).getSequenceName(); - genomeLocParser = new GenomeLocParser(fasta); - - } - catch(FileNotFoundException ex) { - throw new UserException.CouldNotReadInputFile(referenceFile,ex); - } - } - - @BeforeMethod - public void beforeMethod(Object[] data) { - if ( data.length > 0 ) - ((TestTarget)data[0]).init(); - } - - @AfterMethod - public void afterMethod(Object[] data) { - if ( data.length > 0 ) { - ((TestTarget)data[0]).getTracker().close(); - ((TestTarget)data[0]).cleanup(); - } - } - - abstract private class TestTarget { - String name; - int nShards; - int shardSize; - File file; - - public void init() { cleanup(); } - - public void cleanup() { - if ( file != null && file.exists() ) - file.delete(); - } - - public boolean isThreadSafe() { return true; } - - protected TestTarget(String name, int nShards, int shardSize, File file) { - this.name = name; - this.nShards = nShards; - this.shardSize = shardSize; - this.file = file; - } - - public abstract GenomeLocProcessingTracker getTracker(); - - public List getShards() { - List shards = new ArrayList(); - for ( int i = 0; i < nShards; i++ ) { - int start = shardSize * i; - int stop = start + shardSize; - shards.add(genomeLocParser.createGenomeLoc(chr1, start, stop)); - } - return shards; - } - - public String toString() { - return String.format("TestTarget %s: nShards=%d shardSize=%d", name, nShards, shardSize); - } - } - - @DataProvider(name = "threadData") - public Object[][] createThreadData() { - // gotta keep the tests small... - return createData(Arrays.asList(10, 100), Arrays.asList(10)); - //return createData(Arrays.asList(10, 100, 1000, 10000), Arrays.asList(10)); - } - - public Object[][] createData(List nShards, List shardSizes) { - List params = new ArrayList(); - - int counter = 0; - String name = null; - for ( int nShard : nShards ) { - for ( int shardSize : shardSizes ) { - // shared mem -- canonical implementation - params.add(new TestTarget("ThreadSafeSharedMemory", nShard, shardSize, null) { - GenomeLocProcessingTracker tracker = new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock()); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - final File file1 = new File(String.format("%s_ThreadSafeFileBacked_%d_%d", FILE_ROOT, counter++, nShard, shardSize)); - params.add(new TestTarget("ThreadSafeFileBacked", nShard, shardSize, file1) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file1, genomeLocParser, new ClosableReentrantLock(), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - name = "FileBackedSharedFileThreadSafe"; - final File file2 = new File(String.format("%s_%s_%d_%d", FILE_ROOT, name, counter++, nShard, shardSize)); - params.add(new TestTarget(name, nShard, shardSize, file2) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file2, genomeLocParser, new SharedFileThreadSafeLock(file2, -1), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - }); - - name = "FileBackedSharedFile"; - final File file3 = new File(String.format("%s_%s_%d_%d", FILE_ROOT, name, counter++, nShard, shardSize)); - params.add(new TestTarget(name, nShard, shardSize, file3) { - GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker(file3, genomeLocParser, new SharedFileLock(file3, -1), null); - public GenomeLocProcessingTracker getTracker() { return tracker; } - public boolean isThreadSafe() { return false; } - }); - } - } - - List params2 = new ArrayList(); - for ( TestTarget x : params ) params2.add(new Object[]{x}); - return params2.toArray(new Object[][]{}); - } - - @DataProvider(name = "simpleData") - public Object[][] createSimpleData() { - return createData(Arrays.asList(1000), Arrays.asList(100)); - } - - private static final String NAME_ONE = "name1"; - private static final String NAME_TWO = "name2"; - - @Test(enabled = true) - public void testNoop() { - GenomeLocProcessingTracker tracker = new NoOpGenomeLocProcessingTracker(); - for ( int start = 1; start < 100; start++ ) { - for ( int n = 0; n < 2; n++ ) { - GenomeLoc loc = genomeLocParser.createGenomeLoc(chr1, start, start +1); - ProcessingLoc ploc = tracker.claimOwnership(loc, NAME_ONE); - Assert.assertTrue(ploc.isOwnedBy(NAME_ONE)); - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), 0); - } - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testSingleProcessTracker(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testSingleProcessTracker " + test); - - int counter = 0; - for ( GenomeLoc shard : shards ) { - counter++; - - Assert.assertNull(tracker.findOwner(shard, NAME_ONE)); - Assert.assertFalse(tracker.locIsOwned(shard, NAME_ONE)); - - ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE); - Assert.assertNotNull(proc); - Assert.assertNotNull(proc.getLocation()); - Assert.assertNotNull(proc.getOwner()); - Assert.assertEquals(proc.getLocation(), shard); - Assert.assertEquals(proc.getOwner(), NAME_ONE); - Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc); - Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE)); - Assert.assertNotNull(tracker.updateAndGetProcessingLocs(NAME_ONE)); - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), counter); - - ProcessingLoc badClaimAttempt = tracker.claimOwnership(shard,NAME_TWO); - Assert.assertFalse(badClaimAttempt.getOwner().equals(NAME_TWO)); - Assert.assertEquals(badClaimAttempt.getOwner(), NAME_ONE); - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testIterator(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testIterator " + test); - - List markedShards = new ArrayList(); - List toFind = new ArrayList(); - - for ( int i = 0; i < shards.size(); i++ ) { - if ( ! (i % 10 == 0) ) { - markedShards.add(shards.get(i)); - tracker.claimOwnership(shards.get(i), NAME_TWO); - } else { - toFind.add(shards.get(i)); - } - } - - int nFound = 0; - Iterator it = shards.iterator(); - while ( it.hasNext() ) { - GenomeLoc shard = tracker.claimOwnershipOfNextAvailable(it, NAME_ONE); - - if ( shard == null ) { // everything to get is done - Assert.assertEquals(nFound, toFind.size(), "Didn't find all of the available shards"); - } else { - nFound++; - ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE); - - Assert.assertTrue(proc.isOwnedBy(NAME_ONE)); - Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!"); - Assert.assertTrue(toFind.contains(shard), "Claimed shard wasn't one of the unmarked!"); - } - } - } - - @Test(dataProvider = "simpleData", enabled = true) - public void testMarkedProcesses(TestTarget test) { - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - logger.warn("testMarkedProcesses " + test); - - List markedShards = new ArrayList(); - - for ( int i = 0; i < shards.size(); i++ ) { - if ( i % 2 == 0 ) { - markedShards.add(shards.get(i)); - tracker.claimOwnership(shards.get(i), NAME_TWO); - } - } - - for ( GenomeLoc shard : shards ) { - ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE); - - Assert.assertTrue(proc.isOwnedBy(NAME_ONE) || proc.isOwnedBy(NAME_TWO)); - - if ( proc.isOwnedBy(NAME_ONE) ) - Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!"); - else - Assert.assertTrue(markedShards.contains(shard), "Unran process wasn't marked"); - - if ( ! markedShards.contains(shard) ) { - Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc); - } - } - } - - public class TestThread implements Callable { - public TestTarget test; - public String name; - public List ran, toRun; - boolean useIterator; - - public TestThread(TestTarget test, int count, List toRun, boolean useIterator) { - this.test = test; - this.toRun = toRun; - this.name = "thread" + count; - this.ran = new ArrayList(); - this.useIterator = useIterator; - } - - public Integer call() { - //logger.warn(String.format("Call() Thread %s", name)); - if ( useIterator ) { - for ( GenomeLoc shard : test.getTracker().onlyOwned(toRun.iterator(), name) ) { - if ( shard != null ) { // ignore the unclaimable end of the stream - ran.add(shard); - // do some work here - for ( int sum =0, i = 0; i < 100000; i++) sum += i; - } - } - - } else { - for ( GenomeLoc shard : toRun ) { - //System.out.printf("Claiming ownership in %s on %s%n", name, shard); - ProcessingLoc proc = test.getTracker().claimOwnership(shard,name); - //System.out.printf(" => ownership of %s is %s (I own? %b)%n", shard, proc.getOwner(), proc.isOwnedBy(name)); - if ( proc.isOwnedBy(name) ) { - ran.add(proc.getLocation()); - // do some work here - for ( int sum =0, i = 0; i < 100000; i++) sum += i; - } - //logger.warn(String.format("Thread %s on %s -> owned by %s", name, shard, proc.getOwner())); - } - } - - return 1; - } - } - - private static TestThread findOwner(String name, List threads) { - for ( TestThread thread : threads ) { - if ( thread.name.equals(name) ) - return thread; - } - return null; - } - - private static final void assertAllThreadsFinished(List> futures) { - try { - for ( Future f : futures ) { - Assert.assertTrue(f.isDone(), "Thread never finished running"); - Assert.assertTrue(f.get() != null, "Finished successfully"); - } - } catch (InterruptedException e) { - Assert.fail("Thread failed to run to completion", e); - } catch (ExecutionException e) { - Assert.fail("Thread generated an exception", e); - } - } - - private static final List subList(List l, int i) { - List r = new ArrayList(); - for ( int j = 0; j < l.size(); j++ ) { - if ( j % i == 0 ) - r.add(l.get(j)); - } - - return r; - } - - @Test(dataProvider = "threadData", enabled = true) - public void testThreadedProcessesLowLevelFunctions(TestTarget test) { - testThreading(test, false); - } - - @Test(dataProvider = "threadData", enabled = true) - public void testThreadedProcessesIterator(TestTarget test) { - testThreading(test, true); - } - - private void testThreading(TestTarget test, boolean useIterator) { - if ( ! test.isThreadSafe() ) - // skip tests that aren't thread safe - return; - - // start up 3 threads - logger.warn("ThreadedTesting " + test + " using iterator " + useIterator); - List threads = new ArrayList(); - for ( int i = 0; i < 4; i++) { - List toRun = subList(test.getShards(), i+1); - TestThread thread = new TestThread(test, i, toRun, useIterator); - threads.add(thread); - } - ExecutorService exec = java.util.concurrent.Executors.newFixedThreadPool(threads.size()); - - try { - List> results = exec.invokeAll(threads, 300, TimeUnit.SECONDS); - GenomeLocProcessingTracker tracker = test.getTracker(); - List shards = test.getShards(); - - for ( TestThread thread : threads ) - logger.warn(String.format("TestThread %s ran %d jobs of %d to run", thread.name, thread.ran.size(), thread.toRun.size())); - - assertAllThreadsFinished(results); - - // we ran everything - Assert.assertEquals(tracker.updateAndGetProcessingLocs(NAME_ONE).size(), shards.size(), "Not all shards were run"); - - for ( GenomeLoc shard : shards ) { - Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE), "Unowned shard"); - - ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE); - Assert.assertNotNull(proc, "Proc was null"); - - Assert.assertNotNull(proc.getOwner(), "Owner was null"); - Assert.assertEquals(proc.getLocation(), shard, "Shard loc doesn't make ProcessingLoc"); - - TestThread owner = findOwner(proc.getOwner(), threads); - Assert.assertNotNull(owner, "Couldn't find owner"); - - Assert.assertTrue(owner.ran.contains(shard), "Owner doesn't contain ran shard"); - - for ( TestThread thread : threads ) - if ( ! proc.isOwnedBy(thread.name) && thread.ran.contains(shard) ) - Assert.fail("Shard appears in another run list: proc=" + proc + " shard=" + shard + " also in jobs of " + thread.name + " obj=" + thread.ran.get(thread.ran.indexOf(shard))); - - } - } catch (InterruptedException e) { - Assert.fail("Thread failure", e); - } - } -} \ No newline at end of file diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml index 58da4398e..589cb45f5 100644 --- a/public/packages/Queue.xml +++ b/public/packages/Queue.xml @@ -41,6 +41,7 @@ + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index f8218148e..cbe53db8d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -42,8 +42,8 @@ class RecalibrateBaseQualities extends QScript { val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv") val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv") val recalBam: File = swapExt(bam, ".bam", ".recal.bam") - val path1: String = bam + ".before" - val path2: String = bam + ".after" + val path1: String = recalBam + ".before" + val path2: String = recalBam + ".after" add(cov(bam, recalFile1), recal(bam, recalFile1, recalBam), diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html new file mode 100644 index 000000000..ebc060d0a --- /dev/null +++ b/settings/helpTemplates/common.html @@ -0,0 +1,15 @@ +<#macro makeHeader title> + + ${title} + + + + +<#macro headerInfo> +

See also Main index | GATK wiki | GATK support forum

+

GATK version ${version} built at ${timestamp}.

+ + +<#macro footerInfo> + + diff --git a/settings/helpTemplates/generic.index.template.html b/settings/helpTemplates/generic.index.template.html new file mode 100644 index 000000000..6c9e9f4e8 --- /dev/null +++ b/settings/helpTemplates/generic.index.template.html @@ -0,0 +1,36 @@ +<#include "common.html"/> + +<#macro emitGroup group> + +

${group.name}

+

+ ${group.summary} +

+

+ + + + <#list data as datum> + <#if datum.group == group.name> + + + + + + +
NameSummary
${datum.name}${datum.summary}
+ + + +<@makeHeader title="GATK documentation index"/> + +

GATK documentation index

+ <@headerInfo /> + <#list groups?sort_by("name") as group> + <@emitGroup group=group/> + + + <@footerInfo /> + + + diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html new file mode 100644 index 000000000..ca0d1e76f --- /dev/null +++ b/settings/helpTemplates/generic.template.html @@ -0,0 +1,121 @@ +<#include "common.html"/> + +<#macro argumentlist name myargs> + <#if myargs?size != 0> + ${name} + <#list myargs as arg> + + ${arg.name} + ${arg.type} + ${arg.defaultValue!"No default"} + ${arg.summary} + + <#-- + ${arg.required} + --> + + + + +<#macro argumentDetails arg> +

${arg.name}<#if arg.synonyms??> / ${arg.synonyms} + (<#if arg.attributes??>${arg.attributes} ${arg.type}<#if arg.defaultValue??> with default value ${arg.defaultValue})

+ ${arg.summary}. ${arg.fulltext}
+ <#if arg.options??> +

The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:

+
+ <#list arg.options as option> +
${option.name} +
${option.summary} + +
+ + + +<#macro relatedByType name type> + <#list relatedDocs as relatedDoc> + <#if relatedDoc.relation == type> +

${name}

+
    + <#list relatedDocs as relatedDoc> + <#if relatedDoc.relation == type> +
  • ${relatedDoc.name} is a ${relatedDoc.relation}
  • + + +
+ <#break> + + + + + +<@makeHeader title="${name} documentation"/> + +

${name}

+ <@headerInfo /> +

Brief summary

+ ${summary} + <#if author??> +

Author

+ ${author} + +

Detailed description

+ ${description} + + <#-- Create the argument summary --> + <#if arguments.all?size != 0> +
+

Feature specific arguments

+ + + + + + + + + + + <@argumentlist name="Required" myargs=arguments.required/> + <@argumentlist name="Optional" myargs=arguments.optional/> + <@argumentlist name="Hidden" myargs=arguments.hidden/> + <@argumentlist name="Depreciated" myargs=arguments.depreciated/> + +
NameTypeDefault valueSummary
+ + + <#-- Create references to additional capabilities if appropriate --> + <#if extradocs?size != 0> +
+

Additional capabilities

+ The arguments described in the entries below can be supplied to this tool to modify + its behavior. For example, the -L argument directs the GATK engine restricts processing + to specific genomic intervals. This capability is available to all GATK walkers. + + + + <#-- This class is related to other documented classes via sub/super relationships --> + <#if relatedDocs?size != 0> +
+

Related capabilities

+ <@relatedByType name="Superclasses" type="superclass"/> + <@relatedByType name="Subclasses" type="subclass"/> + + + <#-- List all of the --> + <#if arguments.all?size != 0> +
+ <#-- Create the argument details --> +

Argument details

+ <#list arguments.all as arg> + <@argumentDetails arg=arg/> + + + + <@footerInfo /> + + diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css new file mode 100644 index 000000000..79f409f55 --- /dev/null +++ b/settings/helpTemplates/style.css @@ -0,0 +1,96 @@ +body +{ + background-color: #ffffff; + color: #202020; +} + +body, p, ul, ol, dl +{ + font-family: Corbel, Verdana, "Lucida Grande", "Lucida Sans Unicode", Sans-Serif; +} + +p, ul, ol, dl, dt, dd, td +{ + font-size: 12pt; +} + +p.version, p.see-also +{ + font-size: 8pt; +} + +h1, h2, h3 +{ + font-family: Corbel, Arial, Helvetica, Sans-Serif; + font-weight: bold; + text-align: left; + color: #669; +} + +h1 +{ + font-size: 32pt; + letter-spacing: -2px; +} + +h3 +{ + font-size: 16pt; + font-weight: normal; +} + +/* + * enum DT layout +*/ + +dl { + border: 1px solid #ccc; +} + +dt { + font-weight: bold; + text-decoration: underline; +} + +dd { + margin: 0; + padding: 0 0 0.5em 0; +} + +/* + * clean table layouts +*/ +#hor-minimalist-b +{ + font-family: "Lucida Sans Unicode", "Lucida Grande", Sans-Serif; + font-size: 12px; + background: #fff; + margin: 5px; + width: 100%; + border-collapse: collapse; + text-align: left; +} +#hor-minimalist-b th +{ + font-size: 14px; + font-weight: normal; + color: #039; + padding: 10px 8px; + border-bottom: 2px solid #6678b1; +} +#hor-minimalist-b td +{ + border-bottom: 1px solid #ccc; + color: #669; + padding: 6px 8px; +} +#hor-minimalist-b tbody tr:hover td +{ + color: #009; +} + +th#row-divider +{ + font-weight: bolder; + font-size: larger; +} \ No newline at end of file