From 81d0cab27eff703b473a994b7703630e04f6da87 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 21 Jul 2011 16:01:54 -0400 Subject: [PATCH] Walker index html now emited. --- .../walkers/diffengine/DiffObjectsWalker.java | 74 ++++++++++++++++++- .../sting/utils/help/GATKDoclet.java | 53 +++++++++---- .../{test.html => walker.template.html} | 28 ++++--- 3 files changed, 123 insertions(+), 32 deletions(-) rename settings/helpTemplates/{test.html => walker.template.html} (69%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index 5cd99697c..899c3671c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -38,9 +39,79 @@ import java.util.List; /** * A generic engine for comparing tree-structured objects - * + *

* Compares two record-oriented files, itemizing specific difference between equivalent * records in the two files. Reports both itemized and summarized differences. + *

+ * What are the summarized differences and the DiffObjectsWalker + *

+ * The GATK contains a summarizing difference engine that compares hierarchical data structures to emit: + *

+ * + *

+ * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + * + *

+ * Why? + *

+ * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + * + *

Understanding the output + *

The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named + * nodes. Suppose I have two trees: + *

+ *     Tree1=(A=1 B=(C=2 D=3))
+ *     Tree2=(A=1 B=(C=3 D=3 E=4))
+ *     Tree3=(A=1 B=(C=4 D=3 E=4))
+ * 
+ *

+ * where every node in the tree is named, or is a raw value (here all leaf values are integers). The DiffEngine + * traverses these data structures by name, identifies equivalent nodes by fully qualified names + * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). + * These itemized differences are listed as: + *

+ *     Tree1.B.C=2 != Tree2.B.C=3
+ *     Tree1.B.C=2 != Tree3.B.C=4
+ *     Tree2.B.C=3 != Tree3.B.C=4
+ *     Tree1.B.E=MISSING != Tree2.B.E=4
+ * 
+ *

+ * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though + * is that it computes similarity among the itemized differences and displays the count of differences names + * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs + * only once. So the summary is: + * + *

+ *     *.B.C : 3
+ *     *.B.E : 1
+ * 
+ *

where the * operator indicates that any named field matches. This output is sorted by counts, and provides an + * immediate picture of the commonly occurring differences among the files. + *

+ * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, + * detected by the integrationtest integration (more below). You can see that in the although there are many specific + * instances of these differences between the two files, the summarized differences provide an immediate picture that + * the AC, AF, and AN fields are the major causes of the differences. + *

+ *

+   [testng] path                                                             count
+   [testng] *.*.*.AC                                                         6
+   [testng] *.*.*.AF                                                         6
+   [testng] *.*.*.AN                                                         6
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN  1
+   [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC  1
+
* * @author Mark DePristo * @since 7/4/11 @@ -112,6 +183,7 @@ public class DiffObjectsWalker extends RodWalker { @Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false) boolean showItemizedDifferences = false; + @Hidden @Argument(fullName="testEnum", doc="X", required=false) TestEnum testEnum = TestEnum.ONE; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 18b4266be..b6488cff9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -47,7 +47,7 @@ import java.util.*; * */ public class GATKDoclet extends ResourceBundleExtractorDoclet { - RootDoc root; + RootDoc rootDoc; /** * Extracts the contents of certain types of javadoc and adds them to an XML file. @@ -69,7 +69,7 @@ public class GATKDoclet extends ResourceBundleExtractorDoclet { @Override protected void processDocs(RootDoc rootDoc, PrintStream ignore) { // setup the global access to the root - root = rootDoc; + this.rootDoc = rootDoc; try { /* ------------------------------------------------------------------- */ @@ -83,15 +83,14 @@ public class GATKDoclet extends ResourceBundleExtractorDoclet { // but just use this: cfg.setObjectWrapper(new DefaultObjectWrapper()); + List> indexData = new ArrayList>(); for ( ClassDoc doc : rootDoc.classes() ) { if ( ResourceBundleExtractorDoclet.isWalker(doc) ) { // && getClassName(doc).contains("UGCalcLikelihoods")) { System.out.printf("Walker class %s%n", doc); - processWalkerDocs(cfg, doc); - //return; + indexData.add(processWalkerDocs(cfg, doc)); } -// else -// System.out.printf("Excluding non-walker class %s%n", doc); } + processWalkerIndex(indexData,cfg); } catch ( FileNotFoundException e ) { throw new RuntimeException(e); } catch ( IOException e ) { @@ -99,19 +98,15 @@ public class GATKDoclet extends ResourceBundleExtractorDoclet { } } - private void processWalkerDocs(Configuration cfg, ClassDoc doc) throws IOException { - /* ------------------------------------------------------------------- */ - /* You usually do these for many times in the application life-cycle: */ - - // Create the root hash - Map root = buildWalkerDataModel(doc); - + private void processWalkerIndex(List> indexData, Configuration cfg) throws IOException { /* Get or create a template */ - Template temp = cfg.getTemplate("test.html"); + Template temp = cfg.getTemplate("walker.index.template.html"); /* Merge data-model with template */ - Writer out = new OutputStreamWriter(new FileOutputStream(new File("testdoc/" + getClassName(doc).replace(".", "_") + ".html"))); + Writer out = new OutputStreamWriter(new FileOutputStream(new File("testdoc/index.html"))); try { + Map root = new HashMap(); + root.put("walkers", indexData); temp.process(root, out); out.flush(); } catch ( TemplateException e ) { @@ -119,6 +114,32 @@ public class GATKDoclet extends ResourceBundleExtractorDoclet { } } + private Map processWalkerDocs(Configuration cfg, ClassDoc doc) throws IOException { + // Create the root hash + Map root = buildWalkerDataModel(doc); + + /* Get or create a template */ + Template temp = cfg.getTemplate("walker.template.html"); + + /* Merge data-model with template */ + File outputFile = new File(getClassName(doc).replace(".", "_") + ".html"); + File outputPath = new File("testdoc/" + outputFile); + try { + Writer out = new OutputStreamWriter(new FileOutputStream(outputPath)); + temp.process(root, out); + out.flush(); + } catch ( TemplateException e ) { + throw new ReviewedStingException("Failed to create GATK documentation", e); + } + + // add index data + Map indexData = new HashMap(); + indexData.put("filename", outputFile.toString()); + indexData.put("name", doc.name()); + indexData.put("summary", root.get("summary")); + return indexData; + } + private Map buildWalkerDataModel(ClassDoc classdoc) { Map root = new HashMap(); @@ -195,7 +216,7 @@ public class GATKDoclet extends ResourceBundleExtractorDoclet { Field field = getFieldForFieldDoc(fieldDoc); if ( field.isAnnotationPresent(ArgumentCollection.class) ) { - ClassDoc typeDoc = root.classNamed(fieldDoc.type().qualifiedTypeName()); + ClassDoc typeDoc = this.rootDoc.classNamed(fieldDoc.type().qualifiedTypeName()); if ( typeDoc == null ) throw new ReviewedStingException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc"); else { diff --git a/settings/helpTemplates/test.html b/settings/helpTemplates/walker.template.html similarity index 69% rename from settings/helpTemplates/test.html rename to settings/helpTemplates/walker.template.html index f9bfc2b92..ecf24a56e 100644 --- a/settings/helpTemplates/test.html +++ b/settings/helpTemplates/walker.template.html @@ -1,13 +1,6 @@ <#macro argumentlist name myargs> <#if myargs?size != 0> -

${name}

- - - - - - - + <#list myargs as arg> @@ -19,7 +12,6 @@ --> -
NameSynonymsTypeSummary
${name}
${arg.name}${arg.required}
@@ -40,8 +32,6 @@

${name}

Summary

${summary} -

Version

- ${version!"unknown version"} <#if author??>

Author

${author} @@ -50,10 +40,18 @@ ${description} <#-- Create the argument summary -->

Arguments

- <@argumentlist name="Required" myargs=arguments.required/> - <@argumentlist name="Optional" myargs=arguments.optional/> - <@argumentlist name="Hidden" myargs=arguments.hidden/> - <@argumentlist name="Depreciated" myargs=arguments.depreciated/> + + + + + + + + <@argumentlist name="Required" myargs=arguments.required/> + <@argumentlist name="Optional" myargs=arguments.optional/> + <@argumentlist name="Hidden" myargs=arguments.hidden/> + <@argumentlist name="Depreciated" myargs=arguments.depreciated/> +
NameSynonymsTypeSummary
<#-- Create the argument details -->

Argument details