Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-08-20 08:50:43 -04:00 · 2011-08-20 08:50:43 -04:00 · 4939648fd4
parent 01142433d3 ff018c7964
commit 4939648fd4
28 changed files with 998 additions and 246 deletions
--- a/build.xml
+++ b/build.xml
@ -49,7 +49,7 @@
    
    <!-- Contracts for Java -->
    <!-- To disable, run with -Duse.contracts=false -->
-    <property name="use.contracts" value="true" />
+    <property name="use.contracts" value="false" />
    <property name="java.contracts" value="${build.dir}/java/contracts" />
    <property name="contracts.version" value="1.0-20110609" />
    <property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
@ -489,7 +489,7 @@
                 docletpathref="doclet.classpath"
                 classpathref="external.dependencies"
                 classpath="${java.classes}"
-                 additionalparam="-private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005">
+                 additionalparam="-private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> <!-- -test to only do DocumentationTest walker -->
            <sourcefiles>
                <union>
                    <fileset refid="all.java.source.files"/>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFileFactory;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.util.StringUtil;
+import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
@ -42,7 +44,6 @@ import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipreads.ReadClipper;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
-import org.yaml.snakeyaml.events.SequenceStartEvent;

 import java.io.File;
 import java.io.PrintStream;
@ -51,44 +52,158 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
- * This ReadWalker provides simple, yet powerful read clipping capabilities.  It allows the user to clip bases in reads
- * with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
+ * This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.
+ *
+ *
+ * <p>
+ * It allows the user to clip bases in reads with poor quality scores, that match particular
+ * sequences, or that were generated by particular machine cycles.
+ *
+ * <dl>
+ *     <dt>Quality score based clipping</dt>
+ *     <dd>
+ *         Clip bases from the read in clipper from
+ *         <br>argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)</br>
+ *         to the end of the read.  This is blatantly stolen from BWA.
+ *
+ *         Walk through the read from the end (in machine cycle order) to the beginning, calculating the
+ *         running sum of qTrimmingThreshold - qual.  While we do this, we track the maximum value of this
+ *         sum where the delta > 0.  After the loop, clipPoint is either -1 (don't do anything) or the
+ *         clipping index in the read (from the end).
+ *     </dd>
+ *     <dt>Cycle based clipping</dt>
+ *     <dd>Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
+ *     For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions).
+ *     For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12.
+ *     </dd>
+ *     <dt>Sequence matching</dt>
+ *     <dd>Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm,
+ *     filtering only bases whose sequence exactly matches SEQ.</dd>
+ * </dl>
+ *
+ * </p>
+ *
+ * <h2>Input</h2>
+ * <p>
+ *     Any number of BAM files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ *     A new BAM file containing all of the reads from the input BAMs with the user-specified clipping
+ *     operation applied to each read.
+ * </p>
+ * <p>
+ *     <h3>Summary output</h3>
+ *     <pre>
+ *     Number of examined reads              13
+ *     Number of clipped reads               13
+ *     Percent of clipped reads              100.00
+ *     Number of examined bases              988
+ *     Number of clipped bases               126
+ *     Percent of clipped bases              12.75
+ *     Number of quality-score clipped bases 126
+ *     Number of range clipped bases         0
+ *     Number of sequence clipped bases      0
+ *     </pre>
+ * </p>
+ *
+ * <p>
+ *     <h3>Example clipping</h3>
+ *     Suppose we are given this read:
+ *     <pre>
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     </pre>
+ *
+ *     If we are clipping reads with -QT 10 and -CR WRITE_NS, we get:
+ *
+ *     <pre>
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     </pre>
+ *
+ *     Whereas with -CR WRITE_Q0S:
+ *     <pre>
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     </pre>
+ *
+ *     Or -CR SOFTCLIP_BASES:
+ *     <pre>
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3133    29      17S59M  *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     </pre>
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *     -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
+ *     -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
+ * </pre>
+
+ * @author Mark DePristo
+ * @since 2010
 */
@Requires({DataSource.READS})
 public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
-    @Output
-    PrintStream out;
+    /**
+     * If provided, ClipReads will write summary statistics about the clipping operations applied
+     * to the reads to this file.
+     */
+    @Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false)
+    PrintStream out = null;

    /**
-     * an optional argument to dump the reads out to a BAM file
+     * The output SAM/BAM file will be written here
     */
-    @Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
-    StingSAMFileWriter outputBam = null;
+    @Output(doc = "Write BAM output here", required = true)
+    StingSAMFileWriter outputBam;

-    @Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
+    /**
+     * If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this
+     * quality score threshold.
+     */
+    @Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false)
    int qTrimmingThreshold = -1;

-    @Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
+    /**
+     * Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
+     * For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based
+     * values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11,
+     * and 12.
+     */
+    @Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false)
    String cyclesToClipArg = null;

-    @Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false)
+    /**
+     * Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the
+     * sequences in the file.
+     */
+    @Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false)
    String clipSequenceFile = null;

+    /**
+     * Clips bases from the reads matching the provided SEQ.  Can be provided any number of times on the command line
+     */
    @Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
    String[] clipSequencesArgs = null;

-    @Argument(fullName="read", doc="", required=false)
-    String onlyDoRead = null;
-
-    //@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string.  ", required = false)
-    //boolean keepCompletelyClippedReads = false;
-
-//    @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
-//    boolean onlyClipFirstSeqMatch = false;
-
+    /**
+     * The different values for this argument determines how ClipReads applies clips to the reads.  This can range
+     * from writing Ns over the clipped bases to hard clipping away the bases from the BAM.
+     */
    @Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
    ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;

+    @Hidden
+    @Advanced
+    @Argument(fullName="read", doc="", required=false)
+    String onlyDoRead = null;

    /**
     * List of sequence that should be clipped from the reads
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java
@ -22,6 +22,7 @@

 package org.broadinstitute.sting.gatk.walkers.coverage;

+import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -154,6 +155,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
     * If the number of QC+ bases (on reads with MAPQ > minMappingQuality and with base quality > minBaseQuality) exceeds this
     * value and is less than maxDepth the site is considered CALLABLE.
     */
+    @Advanced
    @Argument(fullName = "minDepth", shortName = "minDepth", doc = "Minimum QC+ read depth before a locus is considered callable", required = false)
    int minDepth = 4;

@ -168,6 +170,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
     * won't assign a site to the POOR_MAPPING_QUALITY state unless there are at least minDepthForLowMAPQ reads
     * covering the site.
     */
+    @Advanced
    @Argument(fullName = "minDepthForLowMAPQ", shortName = "mdflmq", doc = "Minimum read depth before a locus is considered a potential candidate for poorly mapped", required = false)
    int minDepthLowMAPQ = 10;

@ -181,6 +184,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
    /**
     * The output of this walker will be written in this format.  The recommended option is BED.
     */
+    @Advanced
    @Argument(fullName = "format", shortName = "format", doc = "Output format", required = false)
    OutputFormat outputFormat;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.qc;
+
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.*;
+
+/**
+ * Summary test
+ *
+ * <p>Body test</p>
+ */
+public class DocumentationTest extends RodWalker<Integer, Integer> {
+    // the docs for the arguments are in the collection
+    @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+
+    /**
+     * dbSNP comparison VCF.  By default, the dbSNP file is used to specify the set of "known" variants.
+     * Other sets can be specified with the -knownName (--known_names) argument.
+     */
+    @ArgumentCollection
+    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+
+    /**
+     * detailed documentation about the argument goes here.
+     */
+    @Input(fullName="listofRodBinding", shortName = "disc", doc="Output variants that were not called in this Feature comparison track", required=false)
+    private List<RodBinding<VariantContext>> listOfRodBinding = Collections.emptyList();
+
+    @Input(fullName="optionalRodBinding", shortName = "conc", doc="Output variants that were also called in this Feature comparison track", required=false)
+    private RodBinding<VariantContext> concordanceTrack;
+
+    @Input(fullName="optionalRodBindingWithoutDefault", shortName = "optionalRodBindingWithoutDefault", doc="Output variants that were also called in this Feature comparison track", required=false)
+    private RodBinding<VariantContext> noDefaultOptionalRodBinding;
+
+    @Input(fullName="optionalRodBindingWithoutDefaultNull", shortName = "shortTest", doc="Output variants that were also called in this Feature comparison track", required=false)
+    private RodBinding<VariantContext> noDefaultOptionalRodBindingNull = null;
+
+    @Input(fullName="featureArg", shortName = "featureArg", doc="A RodBinding of feature", required=false)
+    private RodBinding<Feature> featureArg = null;
+
+    @Output(doc="VCFWriter",required=true)
+    protected VCFWriter vcfWriter = null;
+
+    @Advanced
+    @Argument(fullName="setString", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
+    public Set<String> sampleNames;
+
+    @Argument(fullName="setStringInitialized", shortName="setStringInitialized", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
+    public Set<String> setStringInitialized = new HashSet<String>();
+
+    @Argument(shortName="optionalArgWithMissinglessDefault", doc="One or more criteria to use when selecting the data.  Evaluated *after* the specified samples are extracted and the INFO-field annotations are updated.", required=false)
+    public ArrayList<String> SELECT_EXPRESSIONS = new ArrayList<String>();
+
+    @Argument(shortName="AAAAA", fullName = "AAAAA", doc="Should be the first argument", required=false)
+    public boolean FIRST_ARG = false;
+
+    @Advanced
+    @Argument(fullName="booleanArg", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
+    private boolean EXCLUDE_NON_VARIANTS = false;
+
+    @Advanced
+    @Argument(fullName="booleanArray", shortName="booleanArray", doc="x", required=false)
+    private boolean[] boolArray = null;
+
+    @Argument(fullName="enumTest", shortName="enumTest", doc="Test enum", required=false)
+    private TestEnum TestEnumArg = TestEnum.ENUM2;
+    public enum TestEnum {
+        /** Docs for enum1 */
+        ENUM1,
+        /** Docs for enum2 */
+        ENUM2
+    }
+
+    @Hidden
+    @Argument(fullName="hiddenArg", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
+    private boolean KEEP_AF_SPECTRUM = false;
+
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
+    public Integer reduceInit() { return 0; }
+    public Integer reduce(Integer value, Integer sum) { return value + sum; }
+    public void onTraversalDone(Integer result) { }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java
@ -25,10 +25,7 @@

 package org.broadinstitute.sting.gatk.walkers.validation;

-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -201,30 +198,58 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel;

 public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> implements TreeReducible<GenotypeAndValidateWalker.CountedData> {

+    /**
+     * The optional output file that will have all the variants used in the Genotype and Validation essay.
+     */
    @Output(doc="Generate a VCF file with the variants considered by the walker, with a new annotation \"callStatus\" which will carry the value called in the validation VCF or BAM file", required=false)
    protected VCFWriter vcfWriter = null;

+    /**
+     * The callset to be used as truth (default) or validated (if BAM file is set to truth).
+     */
    @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype", required=true)
    public RodBinding<VariantContext> alleles;

+    /**
+     * Makes the Unified Genotyper calls to the BAM file the truth dataset and validates the alleles ROD binding callset.
+     */
    @Argument(fullName ="set_bam_truth", shortName ="bt", doc="Use the calls on the reads (bam file) as the truth dataset and validate the calls on the VCF", required=false)
    private boolean bamIsTruth = false;

+    /**
+     * The minimum base quality score necessary for a base to be considered when calling a genotype. This argument is passed to the Unified Genotyper.
+     */
    @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
    private int mbq = -1;

+    /**
+     * The maximum deletion fraction allowed in a site for calling a genotype. This argument is passed to the Unified Genotyper.
+     */
    @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false)
    private double deletions = -1;

+    /**
+     * the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. This argument is passed to the Unified Genotyper.
+     */
    @Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false)
    private double callConf = -1;

+    /**
+     * the minimum phred-scaled Qscore threshold to emit low confidence calls. This argument is passed to the Unified Genotyper.
+     */
    @Argument(fullName="standard_min_confidence_threshold_for_emitting", shortName="stand_emit_conf", doc="the minimum phred-scaled Qscore threshold to emit low confidence calls", required=false)
    private double emitConf = -1;

+    /**
+     * Only validate sites that have at least a given depth
+     */
    @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false)
    private int minDepth = -1;

+    /**
+     * If your VCF or BAM file has more than one sample and you only want to validate one, use this parameter to choose it.
+     */
+    @Hidden
    @Argument(fullName ="sample", shortName ="sn", doc="Name of the sample to validate (in case your VCF/BAM has more than one sample)", required=false)
    private String sample = "";

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -95,7 +95,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();

    // Help arguments
-    @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit")
+    @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
    protected Boolean LIST = false;

    // Partitioning the data arguments
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@ -105,12 +105,14 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
     * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered).
     * Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value.
     */
+    @Advanced
    @Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false)
    public boolean showFiltered = false;

    /**
     * If provided, then this tool will exit with success after this number of records have been emitted to the file.
     */
+    @Advanced
    @Argument(fullName="maxRecords", shortName="M", doc="If provided, we will emit at most maxRecord records to the table", required=false)
    public int MAX_RECORDS = -1;
    int nRecords = 0;
@ -121,6 +123,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
     * can make your resulting file unreadable and malformated according to tools like R, as the representation of
     * multi-allelic INFO field values can be lists of values.
     */
+    @Advanced
    @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
    public boolean keepMultiAllelic = false;

@ -131,6 +134,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
     * fields (e.g., AC not being calculated for filtered records, if included).  When provided, this argument
     * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
     */
+    @Advanced
    @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false)
    public boolean ALLOW_MISSING_DATA = false;

--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java
@ -4,9 +4,28 @@ package org.broadinstitute.sting.utils.clipreads;
 * How should we represent a clipped bases in a read?
 */
 public enum ClippingRepresentation {
-    WRITE_NS,           // change the bases to Ns
-    WRITE_Q0S,          // change the quality scores to Q0
-    WRITE_NS_Q0S,       // change the quality scores to Q0 and write Ns
-    SOFTCLIP_BASES,     // change cigar string to S, but keep bases
-    HARDCLIP_BASES      // remove the bases from the read
+    /** Clipped bases are changed to Ns */
+    WRITE_NS,
+
+    /** Clipped bases are changed to have Q0 quality score */
+    WRITE_Q0S,
+
+    /** Clipped bases are change to have both an N base and a Q0 quality score */
+    WRITE_NS_Q0S,
+
+    /**
+     * Change the read's cigar string to soft clip (S, see sam-spec) away the bases.
+     * Note that this can only be applied to cases where the clipped bases occur
+     * at the start or end of a read.
+     */
+    SOFTCLIP_BASES,
+
+    /**
+     * Change the read's cigar string to hard clip (H, see sam-spec) away the bases.
+     * Hard clipping, unlike soft clipping, actually removes bases from the read,
+     * reducing the resulting file's size but introducing an irrevesible (i.e.,
+     * lossy) operation.  Note that this can only be applied to cases where the clipped
+     * bases occur at the start or end of a read.
+     */
+    HARDCLIP_BASES
 }
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java
@ -40,6 +40,29 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.regex.Pattern;

+/**
+ * TODO GUILLERMO DEL ANGEL
+ *
+ * <p>
+ * Codec Description
+ * </p>
+ *
+ * <p>
+ * See also: @see <a href="http://vcftools.sourceforge.net/specs.html">VCF specification</a><br>
+ * </p>
+
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <pre>
+ *     line 1
+ *     line 2
+ *     line 3
+ * </pre>
+ *
+ * @author Mark DePristo
+ * @since 2010
+ */
 public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> {
    private String[] header;
    public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java
@ -33,12 +33,43 @@ import java.io.IOException;
 import java.util.Arrays;

 /**
- * a codec for the file types produced by the HapMap consortium, available on their website:
- * http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/
+ * A codec for the file types produced by the HapMap consortium
 *
- * The format includes eleven standard fields, plus genotypes for each of the samples included
- * in the file
- * 
+ * <p>
+ *     The format includes eleven standard fields, plus genotypes for each of the samples included
+ *     in the file:
+ *
+ * <pre>
+ *     Col1: refSNP rs# identifier at the time of release (NB might merge with another rs# in the future)
+ *     Col2: SNP alleles according to dbSNP
+ *     Col3: chromosome that SNP maps to
+ *     Col4: chromosome position of SNP, in basepairs on reference sequence
+ *     Col5: strand of reference sequence that SNP maps to
+ *     Col6: version of reference sequence assembly
+ *     Col7: HapMap genotype center that produced the genotypes
+ *     Col8: LSID for HapMap protocol used for genotyping
+ *     Col9: LSID for HapMap assay used for genotyping
+ *     Col10: LSID for panel of individuals genotyped
+ *     Col11: QC-code, currently 'QC+' for all entries (for future use)
+ *     Col12 and on: observed genotypes of samples, one per column, sample identifiers in column headers (Coriell catalog numbers, example: NA10847). Duplicate samples have .dup suffix.
+ * </pre>
+ * </p>
+ *
+ * <p>
+ *  See also: @See <a href="http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/">HapMap genotypes download</a>
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * From <a href="http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest/forward/non-redundant/genotypes_chr1_ASW_r27_nr.b36_fwd.txt.gz">genotypes_chr1_ASW_r27_nr.b36_fwd.txt.gz</a>:
+ * <pre>
+ *     rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode NA19625 NA19700 NA19701 NA19702 NA19703 NA19704 NA19705 NA19708 NA19712 NA19711 NA19818 NA19819 NA19828 NA19835 NA19834 NA19836 NA19902 NA19901 NA19900 NA19904 NA19919 NA19908 NA19909 NA19914 NA19915 NA19916 NA19917 NA19918 NA19921 NA20129 NA19713 NA19982 NA19983 NA19714 NA19985 NA20128 NA20126 NA20127 NA20277 NA20276 NA20279 NA20282 NA20281 NA20284 NA20287 NA20288 NA20290 NA20289 NA20291 NA20292 NA20295 NA20294 NA20297 NA20300 NA20301 NA20302 NA20317 NA20319 NA20322 NA20333 NA20332 NA20335 NA20334 NA20337 NA20336 NA20340 NA20341 NA20343 NA20342 NA20344 NA20345 NA20346 NA20347 NA20348 NA20349 NA20350 NA20357 NA20356 NA20358 NA20359 NA20360 NA20363 NA20364
+ *     rs9629043 C/T chr1 554636 + ncbi_b36 broad urn:LSID:affymetrix.hapmap.org:Protocol:GenomeWideSNP_6.0:3 urn:LSID:broad.hapmap.org:Assay:SNP_A-8575115:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ CC CC CC CC CC CC CC CC CC CC CC CC NN CC CC CC CT CT CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CT CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC
+ *     rs28446478 G/T chr1 576058 + ncbi_b36 sanger urn:LSID:illumina.hapmap.org:Protocol:Human_1M_BeadChip:3 urn:LSID:sanger.hapmap.org:Assay:H1Mrs28446478:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ GT TT GT TT TT TT TT GT GT TT TT TT TT GT GT GT GT TT GT TT GT GT TT GT GT TT TT TT GT GT TT TT TT GT TT GT TT GT GT GT GT GT TT GT TT TT GT GT TT TT TT TT TT TT GT GT GT GT TT TT TT TT GT TT GT TT TT GT TT TT TT GT TT TT TT GT GT TT GT TT GT TT TT
+ *     rs12565286 C/G chr1 711153 + ncbi_b36 broad urn:LSID:affymetrix.hapmap.org:Protocol:GenomeWideSNP_6.0:3 urn:LSID:broad.hapmap.org:Assay:SNP_A-8709646:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG CG CG GG GG GG GG GG GG GG GG GG CG CG GG GG GG GG GG GG GG GG GG GG CG NN GG GG GG GG GG GG NN GG NN NN
+ * </pre>
+ *
+ * @author Mark DePristo
+ * @since 2010
 */
 public class RawHapMapCodec implements FeatureCodec {
    // the minimum number of features in the HapMap file line
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java
@ -12,7 +12,23 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
 import java.util.ArrayList;

 /**
- * the ref seq codec
+ * TODO FOR CHRIS HARTL
+ *
+ * <p>
+ * Codec Description
+ * </p>
+ *
+ * <p>
+ * See also: link to file specification
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <p>
+ *     A BAM file containing <b>exactly one sample</b>.
+ * </p>
+ *
+ * @author Mark DePristo
+ * @since 2010
 */
 public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {

--- a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java
@ -38,10 +38,43 @@ import java.util.regex.Pattern;
 import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.VariantType;

 /**
- * A Tribble encoder / decoder for SAM pileup data. 
+ * Decoder for SAM pileup data.  For GATK validation purposes only
 *
- * @author mhanna
- * @version 0.1
+ * <p>
+ *     Pileup format is first used by Tony Cox and Zemin Ning at the Sanger Institute.
+ *     It desribes the base-pair information at each chromosomal position. This format
+ *     facilitates SNP/indel calling and brief alignment viewing by eyes.
+ * </p>
+ * <p>
+ *     Each line consists of chromosome, 1-based coordinate, reference base, the
+ *     number of reads covering the site, read bases and base qualities. At the
+ *     read base column, a dot stands for a match to the reference base on the
+ *     forward strand, a comma for a match on the reverse strand, `ACGTN' for a mismatch
+ *     on the forward strand and `acgtn' for a mismatch on the reverse strand.
+ *     A pattern `\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between
+ *     this reference position and the next reference position. The length of the
+ *     insertion is given by the integer in the pattern, followed by the inserted sequence.
+ * </p>
+ *
+ * <p>
+ *     <br>See also: @see <a href="http://samtools.sourceforge.net/">SAMTools project</a></br>
+ *     <br>See also: @see <a href="http://samtools.sourceforge.net/pileup.shtml">Pileup format</a></br>
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <pre>
+ *     seq1 272 T 24  ,.$.....,,.,.,...,,,.,..^+. <<<+;<<<<<<<<<<<=<;<;7<&
+ *     seq1 273 T 23  ,.....,,.,.,...,,,.,..A <<<;<<<<<<<<<3<=<<<;<<+
+ *     seq1 274 T 23  ,.$....,,.,.,...,,,.,...    7<7;<;<<<<<<<<<=<;<;<<6
+ *     seq1 275 A 23  ,$....,,.,.,...,,,.,...^l.  <+;9*<<<<<<<<<=<<:;<<<<
+ *     seq1 276 G 22  ...T,,.,.,...,,,.,....  33;+<<7=7<<7<&<<1;<<6<
+ *     seq1 277 T 22  ....,,.,.,.C.,,,.,..G.  +7<;<<<<<<<&<=<<:;<<&<
+ *     seq1 278 G 23  ....,,.,.,...,,,.,....^k.   %38*<<;<7<<7<=<<<;<<<<<
+ *     seq1 279 C 23  A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<
+ * </pre>
+ *
+ * @author Matt Hanna
+ * @since 2009
 */
 public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
    // the number of tokens we expect to parse from a pileup line
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java
@ -36,8 +36,21 @@ import org.broad.tribble.util.ParsingUtils;
 /**
 * Decodes a simple SAM text string.
 *
- * @author mhanna
- * @version 0.1
+ * <p>
+ * Reads in the SAM text version of a BAM file as a ROD.  For testing only
+ * </p>
+ *
+ * <p>
+ * See also: @see <a href="http://samtools.sourceforge.net">SAMTools</a> for format specification
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <pre>
+ *     SL-XBC:1:10:628:923#0	16	Escherichia_coli_K12	1	37	76M	=	1	0	AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA	B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB
+ * </pre>
+ *
+ * @author Matt Hanna
+ * @since 2009
 */
 public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
    /* SL-XBC:1:10:628:923#0	16	Escherichia_coli_K12	1	37	76M	=	1	0	AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA	B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java
@ -41,10 +41,11 @@ import java.io.IOException;

 /**
 * Codec for decoding the output format of the SnpEff variant effect predictor tool
- * (http://snpeff.sourceforge.net/).
 *
+ * <p>
 * This format has 23 tab-delimited fields:
 *
+ * <pre>
 * Chromosome
 * Position
 * Reference
@ -68,10 +69,16 @@ import java.io.IOException;
 * Codons Around
 * Amino Acids Around
 * Custom Interval ID
+ * </pre>
+ * Note that we treat all except the Chromosome, Position, and Effect fields as optional.
+ * </p>
 *
- * We treat all except the Chromosome, Position, and Effect fields as optional.
+ * <p>
+ * See also: @see <a href="http://snpeff.sourceforge.net/">SNPEff project page</a>
+ * </p>
 *
 * @author David Roazen
+ * @since 2011
 */
 public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {

--- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java
@ -6,14 +6,19 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
 import java.util.Arrays;

 /**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 3/28/11
- * Time: 2:47 PM
- * To change this template use File | Settings | File Templates.
- */
-/**
- * The standard table codec with a slightly different parsing convention (expects loci as contig start stop, not contig:start-stop)
+ * The standard table codec that expects loci as contig start stop, not contig:start-stop
+ *
+ * <p>
+ * The standard table codec with a slightly different parsing convention
+ * (expects loci as contig start stop, not contig:start-stop)
+ * </p>
+ *
+ * <p>
+ * See also: TableCodec
+ * </p>
+ *
+ * @author Chris Hartl
+ * @since 2010
 */
 public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec {

--- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java
@ -11,13 +11,40 @@ import java.util.ArrayList;
 import java.util.Arrays;

 /**
- * implementation of a simple table (tab or comma delimited format) input files
+ * Reads tab deliminated tabular text files
+ *
+ * <p>
+ *     <ul>
+ *     <li>Header: must begin with line HEADER or track (for IGV), followed by any number of column names,
+ *     separated by whitespace.</li>
+ *     <li>Comment lines starting with # are ignored</li>
+ *     <li>Each non-header and non-comment line is split into parts by whitespace,
+ *     and these parts are assigned as a map to their corresponding column name in the header.
+ *     Note that the first element (corresponding to the HEADER column) must be a valid genome loc
+ *     such as 1, 1:1 or 1:1-10, which is the position of the Table element on the genome.  TableCodec
+ *     requires that there be one value for each column in the header, and no more, on all lines.</li>
+ *     </ul>
+ * </p>
+ *
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <pre>
+ *     HEADER a b c
+ *     1:1  1   2   3
+ *     1:2  4   5   6
+ *     1:3  7   8   9
+ * </pre>
+ *
+ * @author Mark DePristo
+ * @since 2009
 */
 public class TableCodec implements ReferenceDependentFeatureCodec {
-    protected String delimiterRegex = "\\s+";
-    protected String headerDelimiter = "HEADER";
-    protected String igvHeaderDelimiter = "track";
-    protected String commentDelimiter = "#";
+    final static protected String delimiterRegex = "\\s+";
+    final static protected String headerDelimiter = "HEADER";
+    final static protected String igvHeaderDelimiter = "track";
+    final static protected String commentDelimiter = "#";
+
    protected ArrayList<String> header = new ArrayList<String>();

    /**
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -14,10 +14,9 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
+import java.io.*;
 import java.util.*;
+import java.util.zip.GZIPInputStream;


 public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec {
@ -623,9 +622,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,

    public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
        try {
-            char[] buff = new char[MAGIC_HEADER_LINE.length()];
-            new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length());
+            return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
+                    isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
+        } catch ( FileNotFoundException e ) {
+            return false;
+        } catch ( IOException e ) {
+            return false;
+        }
+    }
+
+    private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
+        try {
+            byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
+            stream.read(buff, 0, MAGIC_HEADER_LINE.length());
            String firstLine = new String(buff);
+            stream.close();
            return firstLine.startsWith(MAGIC_HEADER_LINE);
        } catch ( IOException e ) {
            return false;
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
@ -14,8 +14,20 @@ import java.util.*;


 /**
- * a feature codec for the VCF 3 specification.  Our aim is to read in the records and convert to VariantContext as
- * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
+ * A feature codec for the VCF3 specification, to read older VCF files.  VCF3 has been
+ * depreciated in favor of VCF4 (See VCF codec for the latest information)
+ *
+ * <p>
+ * Reads historical VCF3 encoded files (1000 Genomes Pilot results, for example)
+ * </p>
+ *
+ * <p>
+ * See also: @see <a href="http://vcftools.sourceforge.net/specs.html">VCF specification</a><br>
+ * See also: @see <a href="http://www.ncbi.nlm.nih.gov/pubmed/21653522">VCF spec. publication</a>
+ * </p>
+ *
+ * @author Mark DePristo
+ * @since 2010
 */
 public class VCF3Codec extends AbstractVCFCodec {
    public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3";
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
@ -12,12 +12,46 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.util.*;

-
 /**
- * a feature codec for the VCF 4 specification.  Our aim is to read in the records and convert to VariantContext as
- * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
+ * A feature codec for the VCF 4 specification
+ *
+ * <p>
+ * VCF is a text file format (most likely stored in a compressed manner). It contains meta-information lines, a
+ * header line, and then data lines each containing information about a position in the genome.
+ * </p>
+ * <p>One of the main uses of next-generation sequencing is to discover variation amongst large populations
+ * of related samples. Recently the format for storing next-generation read alignments has been
+ * standardised by the SAM/BAM file format specification. This has significantly improved the
+ * interoperability of next-generation tools for alignment, visualisation, and variant calling.
+ * We propose the Variant Call Format (VCF) as a standarised format for storing the most prevalent
+ * types of sequence variation, including SNPs, indels and larger structural variants, together
+ * with rich annotations. VCF is usually stored in a compressed manner and can be indexed for
+ * fast data retrieval of variants from a range of positions on the reference genome.
+ * The format was developed for the 1000 Genomes Project, and has also been adopted by other projects
+ * such as UK10K, dbSNP, or the NHLBI Exome Project. VCFtools is a software suite that implements
+ * various utilities for processing VCF files, including validation, merging and comparing,
+ * and also provides a general Perl and Python API.
+ * The VCF specification and VCFtools are available from http://vcftools.sourceforge.net.</p>
+ *
+ * <p>
+ * See also: @see <a href="http://vcftools.sourceforge.net/specs.html">VCF specification</a><br>
+ * See also: @see <a href="http://www.ncbi.nlm.nih.gov/pubmed/21653522">VCF spec. publication</a>
+ * </p>
+ *
+ * <h2>File format example</h2>
+ * <pre>
+ *     ##fileformat=VCFv4.0
+ *     #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA12878
+ *     chr1    109     .       A       T       0       PASS  AC=1    GT:AD:DP:GL:GQ  0/1:610,327:308:-316.30,-95.47,-803.03:99
+ *     chr1    147     .       C       A       0       PASS  AC=1    GT:AD:DP:GL:GQ  0/1:294,49:118:-57.87,-34.96,-338.46:99
+ * </pre>
+ *
+ * @author Mark DePristo
+ * @since 2010
 */
 public class VCFCodec extends AbstractVCFCodec {
+    // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
+
    public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";

    /**
--- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java
@ -36,8 +36,12 @@ import java.lang.annotation.*;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
 public @interface DocumentedGATKFeature {
+    /** Should we actually document this feature, even through it's annotated? */
    public boolean enable() default true;
+    /** The overall group name (walkers, readfilters) this feature is associated with */
    public String groupName();
+    /** A human readable summary of the purpose of this group of features */
    public String summary() default "";
+    /** Are there links to other docs that we should include?  CommandLineGATK.class for walkers, for example? */
    public Class[] extraDocs() default {};
 }
--- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java
@ -92,9 +92,7 @@ public abstract class DocumentedGATKFeatureHandler {
     *
     * toProcess.setHandlerContent(summary, rootMap);
     *
-     * @param rootDoc
     * @param toProcess
-     * @param all
     */
-    public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set<GATKDocWorkUnit> all);
+    public abstract void processOne(GATKDocWorkUnit toProcess);
 }
--- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java
@ -25,12 +25,15 @@
 package org.broadinstitute.sting.utils.help;

 /**
- * Documentation unit.  Effectively a class version of the DocumentedGATKFeature
+ * Documentation unit.  Effectively a class version of the DocumentedGATKFeature.
+ * Immutable data structure.
 *
 * @author depristo
 */
 class DocumentedGATKFeatureObject {
+    /** Which class are we documenting.  Specific to each class being documented */
    private final Class classToDoc;
+    /** Are we enabled? */
    private final boolean enable;
    private final String groupName, summary;
    private final Class[] extraDocs;
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@ -25,14 +25,33 @@
 package org.broadinstitute.sting.utils.help;

 public class GATKDocUtils {
+    /** The URL root for RELEASED GATKDOC units */
    public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/";
+    /** The URL root for STABLE GATKDOC units */
    public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/";
+    /** The URL root for UNSTABLE GATKDOC units */
    public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/";

+    /**
+     * Return the filename of the GATKDoc HTML that would be generated for Class.  This
+     * does not guarantee that the docs exist, or that docs would actually be generated
+     * for class (might not be annotated for documentation, for example).  But if
+     * this class is documented, GATKDocs will write the docs to a file named as returned
+     * by this function.
+     *
+     * @param c
+     * @return
+     */
    public static String htmlFilenameForClass(Class c) {
        return c.getName().replace(".", "_") + ".html";
    }

+    /**
+     * Returns a full URL http://etc/ linking to the documentation for class (assuming it
+     * exists).  Currently points to the RELEASE doc path only.
+     * @param c
+     * @return
+     */
    public static String helpLinksToGATKDocs(Class c) {
        String classPath = htmlFilenameForClass(c);
        StringBuilder b = new StringBuilder();
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
@ -34,23 +34,74 @@ import org.apache.commons.io.FileUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.walkers.qc.DocumentationTest;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;

 import java.io.*;
 import java.util.*;

 /**
+ * Javadoc Doclet that combines javadoc, GATK ParsingEngine annotations, and FreeMarker
+ * templates to produce html formatted GATKDocs for walkers
+ * and other classes.
 *
+ * This document has the following workflow:
+ *
+ * 1 -- walk the javadoc heirarchy, looking for class that have the
+ *   DocumentedGATKFeature annotation or are in the type heirarchy in the
+ *   static list of things to document, and are to be documented
+ * 2 -- construct for each a GATKDocWorkUnit, resulting in the complete
+ *   set of things to document
+ * 3 -- for each unit, actually generate an html page documenting it
+ *   as well as links to related features via their units.  Writing
+ *   of a specific class HTML is accomplished by a generate DocumentationHandler
+ * 4 -- write out an index of all units, organized by group
+ *
+ * The documented classes are restricted to only those with @DocumentedGATKFeature
+ * annotation or are in the STATIC_DOCS class.
 */
 public class GATKDoclet {
-    final protected static File SETTINGS_DIR = new File("settings/helpTemplates");
-    final protected static File DESTINATION_DIR = new File("gatkdocs");
    final protected static Logger logger = Logger.getLogger(GATKDoclet.class);
+
+    /** Where we find the help FreeMarker templates */
+    final protected static File SETTINGS_DIR = new File("settings/helpTemplates");
+
+    /** Where we write the GATKDoc html directory */
+    final protected static File DESTINATION_DIR = new File("gatkdocs");
+
+    // ----------------------------------------------------------------------
+    //
+    // Global variables that are set on the command line by javadoc
+    //
+    // ----------------------------------------------------------------------
    protected static String buildTimestamp = null, absoluteVersion = null;
    protected static boolean showHiddenFeatures = false;

+    protected static boolean testOnly = false;
+
+    /**
+     * Any class that's in this list will be included in the documentation
+     * when the -test argument is provided.  Useful for debugging.
+     */
+    private static final List<Class<?>> testOnlyKeepers = Arrays.asList(
+            DocumentationTest.class, CommandLineGATK.class, UserException.class);
+
+    /** The javadoc root doc */
    RootDoc rootDoc;

+    /** The set of all things we are going to document */
+    Set<GATKDocWorkUnit> myWorkUnits;
+
+    /**
+     * A static list of DocumentedGATKFeatureObjects.  Any class that is as or extends
+     * one of the DocumentedGATKFeatureObjects.clazz of this collection will also
+     * be documented, even if it doesn't have the @DocumentedGATKFeature annotation.  Useful
+     * when you want to document things that implement an interface (annotations on java
+     * interfaces aren't inherited) or whose base class isn't under your control (tribble
+     * codecs).
+     */
    final static Collection<DocumentedGATKFeatureObject> STATIC_DOCS = new ArrayList<DocumentedGATKFeatureObject>();
    static {
        STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class,
@ -66,7 +117,8 @@ public class GATKDoclet {
     * @throws java.io.IOException if output can't be written.
     */
    public static boolean start(RootDoc rootDoc) throws IOException {
-        logger.setLevel(Level.DEBUG);
+        logger.setLevel(Level.INFO);
+
        // load arguments
        for(String[] options: rootDoc.options()) {
            if(options[0].equals("-build-timestamp"))
@ -75,10 +127,13 @@ public class GATKDoclet {
                absoluteVersion = options[1];
            if (options[0].equals("-include-hidden"))
                showHiddenFeatures = true;
+            if (options[0].equals("-test"))
+                testOnly = true;
        }

-        GATKDoclet doclet = new GATKDoclet();
-        doclet.processDocs(rootDoc);
+        // process the docs
+        new GATKDoclet().processDocs(rootDoc);
+
        return true;
    }

@ -88,23 +143,74 @@ public class GATKDoclet {
     * @return Number of potential parameters; 0 if not supported.
     */
    public static int optionLength(String option) {
-        if(option.equals("-build-timestamp") || option.equals("-absolute-version") || option.equals("-include-hidden")) {
+        if(option.equals("-build-timestamp") ||
+                option.equals("-absolute-version") ||
+                option.equals("-include-hidden")) {
            return 2;
-        }
-        return 0;
+        } else if ( option.equals("-test") )
+            return 1;
+        else
+            return 0;
    }

+    /**
+     * Are we supposed to include @Hidden annotations in our documented output?
+     * @return
+     */
    public boolean showHiddenFeatures() {
        return showHiddenFeatures;
    }

-    public Set<GATKDocWorkUnit> workUnits() {
+    /**
+     *
+     * @param rootDoc
+     */
+    private void processDocs(RootDoc rootDoc) {
+        // setup the global access to the root
+        this.rootDoc = rootDoc;
+
+        try {
+            // basic setup
+            DESTINATION_DIR.mkdirs();
+            FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css"));
+
+            /* ------------------------------------------------------------------- */
+            /* You should do this ONLY ONCE in the whole application life-cycle:   */
+
+            Configuration cfg = new Configuration();
+            // Specify the data source where the template files come from.
+            cfg.setDirectoryForTemplateLoading(SETTINGS_DIR);
+            // Specify how templates will see the data-model. This is an advanced topic...
+            cfg.setObjectWrapper(new DefaultObjectWrapper());
+
+            myWorkUnits = computeWorkUnits();
+            for ( GATKDocWorkUnit workUnit : myWorkUnits ) {
+                processDocWorkUnit(cfg, workUnit);
+            }
+
+            processIndex(cfg, new ArrayList<GATKDocWorkUnit>(myWorkUnits));
+        } catch ( FileNotFoundException e ) {
+            throw new RuntimeException(e);
+        } catch ( IOException e ) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Returns the set of all GATKDocWorkUnits that we are going to generate docs for.
+     * @return
+     */
+    private Set<GATKDocWorkUnit> computeWorkUnits() {
        TreeSet<GATKDocWorkUnit> m = new TreeSet<GATKDocWorkUnit>();

        for ( ClassDoc doc : rootDoc.classes() ) {
            //logger.debug("Considering " + doc);
            Class clazz = getClassForClassDoc(doc);

+            // don't add anything that's not DocumentationTest if we are in test mode
+            if ( clazz != null && testOnly && ! testOnlyKeepers.contains(clazz) )
+                continue;
+
            //if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance"))
            //    logger.debug("foo");

@ -124,37 +230,13 @@ public class GATKDoclet {
        return m;
    }

-    protected void processDocs(RootDoc rootDoc) {
-        // setup the global access to the root
-        this.rootDoc = rootDoc;
-
-        try {
-            // basic setup
-            DESTINATION_DIR.mkdirs();
-            FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css"));
-
-            /* ------------------------------------------------------------------- */
-            /* You should do this ONLY ONCE in the whole application life-cycle:   */
-
-            Configuration cfg = new Configuration();
-            // Specify the data source where the template files come from.
-            cfg.setDirectoryForTemplateLoading(SETTINGS_DIR);
-            // Specify how templates will see the data-model. This is an advanced topic...
-            cfg.setObjectWrapper(new DefaultObjectWrapper());
-
-            Set<GATKDocWorkUnit> myWorkUnits = workUnits();
-            for ( GATKDocWorkUnit workUnit : myWorkUnits ) {
-                processDocWorkUnit(cfg, workUnit, myWorkUnits);
-            }
-
-            processIndex(cfg, new ArrayList<GATKDocWorkUnit>(myWorkUnits));
-        } catch ( FileNotFoundException e ) {
-            throw new RuntimeException(e);
-        } catch ( IOException e ) {
-            throw new RuntimeException(e);
-        }
-    }
-
+    /**
+     * Create a handler capable of documenting the class doc according to feature.  Returns
+     * null if no appropriate handler is found or doc shouldn't be documented at all.
+     * @param doc
+     * @param feature
+     * @return
+     */
    private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) {
        if ( feature != null ) {
            if ( feature.enable() ) {
@ -169,6 +251,13 @@ public class GATKDoclet {
        return null;
    }

+    /**
+     * Returns the instantiated DocumentedGATKFeatureObject that describes the GATKDoc
+     * structure we will apply to Doc.
+     *
+     * @param doc
+     * @return null if this proves inappropriate or doc shouldn't be documented
+     */
    private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) {
        Class<? extends Object> docClass = getClassForClassDoc(doc);

@ -188,6 +277,11 @@ public class GATKDoclet {
        }
    }

+    /**
+     * Return the Java class described by the ClassDoc doc
+     * @param doc
+     * @return
+     */
    private Class<? extends Object> getClassForClassDoc(ClassDoc doc) {
        try {
            // todo -- what do I need the ? extends Object to pass the compiler?
@ -203,10 +297,12 @@ public class GATKDoclet {
        }
    }

-    public static ClassDoc getClassDocForClass(RootDoc rootDoc, Class clazz) {
-        return rootDoc.classNamed(clazz.getName());
-    }
-
+    /**
+     * Create the html index listing all of the GATKDocs features
+     * @param cfg
+     * @param indexData
+     * @throws IOException
+     */
    private void processIndex(Configuration cfg, List<GATKDocWorkUnit> indexData) throws IOException {
        /* Get or create a template */
        Template temp = cfg.getTemplate("generic.index.template.html");
@ -221,6 +317,12 @@ public class GATKDoclet {
        }
    }

+    /**
+     * Helpful function to create the html index.  Given all of the already run GATKDocWorkUnits,
+     * create the high-level grouping data listing individual features by group.
+     * @param indexData
+     * @return
+     */
    private Map<String, Object> groupIndexData(List<GATKDocWorkUnit> indexData) {
        //
        // root -> data -> { summary -> y, filename -> z }, etc
@ -248,6 +350,11 @@ public class GATKDoclet {
        return root;
    }

+    /**
+     * Trivial helper routine that returns the map of name and summary given the annotation
+     * @param annotation
+     * @return
+     */
    private static final Map<String, String> toMap(DocumentedGATKFeatureObject annotation) {
        Map<String, String> root = new HashMap<String, String>();
        root.put("name", annotation.groupName());
@ -255,18 +362,39 @@ public class GATKDoclet {
        return root;
    }

-    public final static GATKDocWorkUnit findWorkUnitForClass(Class c, Set<GATKDocWorkUnit> all) {
-        for ( final GATKDocWorkUnit unit : all )
+    /**
+     * Helper function that finding the GATKDocWorkUnit associated with class from among all of the work units
+     * @param c the class we are looking for
+     * @return the GATKDocWorkUnit whose .clazz.equals(c), or null if none could be found
+     */
+    public final GATKDocWorkUnit findWorkUnitForClass(Class c) {
+        for ( final GATKDocWorkUnit unit : this.myWorkUnits )
            if ( unit.clazz.equals(c) )
                return unit;
        return null;
    }

-    private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit, Set<GATKDocWorkUnit> all)
+    /**
+     * Return the ClassDoc associated with clazz
+     * @param clazz
+     * @return
+     */
+    public ClassDoc getClassDocForClass(Class clazz) {
+        return rootDoc.classNamed(clazz.getName());
+    }
+
+    /**
+     * High-level function that processes a single DocWorkUnit unit using its handler
+     *
+     * @param cfg
+     * @param unit
+     * @throws IOException
+     */
+    private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit)
            throws IOException {
        //System.out.printf("Processing documentation for class %s%n", unit.classDoc);

-        unit.handler.processOne(rootDoc, unit, all);
+        unit.handler.processOne(unit);

        // Get or create a template
        Template temp = cfg.getTemplate(unit.handler.getTemplateName(unit.classDoc));
--- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
@ -24,6 +24,7 @@

 package org.broadinstitute.sting.utils.help;

+import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import com.sun.javadoc.ClassDoc;
 import com.sun.javadoc.FieldDoc;
@ -31,8 +32,10 @@ import com.sun.javadoc.RootDoc;
 import com.sun.javadoc.Tag;
 import org.apache.log4j.Logger;
 import org.broad.tribble.Feature;
+import org.broad.tribble.bed.FullBEDFeature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
 import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -49,14 +52,18 @@ import java.util.*;
 */
 public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
    private static Logger logger = Logger.getLogger(GenericDocumentationHandler.class);
-    GATKDocWorkUnit toProcess;
-    ClassDoc classdoc;
-    Set<GATKDocWorkUnit> all;
-    RootDoc rootDoc;
+
+    /**
+     * The max. length of the longest of --fullName -shortName argument name
+     * before we prefer the shorter option.
+     */
+    private static final int MAX_DISPLAY_NAME = 30;
+
+    /** The Class we are documenting */
+    private GATKDocWorkUnit toProcess;

    @Override
    public boolean includeInDocs(ClassDoc doc) {
-//        return true;
        try {
            Class type = HelpUtils.getClassForDoc(doc);
            return JVMUtils.isConcrete(type);
@ -72,11 +79,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
    }

    @Override
-    public void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcessArg, Set<GATKDocWorkUnit> allArg) {
-        this.rootDoc = rootDoc;
+    public void processOne(GATKDocWorkUnit toProcessArg) {
        this.toProcess = toProcessArg;
-        this.all = allArg;
-        this.classdoc = toProcess.classDoc;

        //System.out.printf("%s class %s%n", toProcess.group, toProcess.classDoc);
        Map<String, Object> root = new HashMap<String, Object>();
@ -88,71 +92,76 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        toProcess.setHandlerContent((String)root.get("summary"), root);
    }

+    /**
+     * Add high-level summary information about toProcess to root, such as its
+     * name, summary, description, version, etc.
+     *
+     * @param root
+     */
    protected void addHighLevelBindings(Map<String, Object> root) {
-        root.put("name", classdoc.name());
+        root.put("name", toProcess.classDoc.name());

        // Extract overrides from the doc tags.
        StringBuilder summaryBuilder = new StringBuilder();
-        for(Tag tag: classdoc.firstSentenceTags())
+        for(Tag tag: toProcess.classDoc.firstSentenceTags())
            summaryBuilder.append(tag.text());
        root.put("summary", summaryBuilder.toString());
-        root.put("description", classdoc.commentText().substring(summaryBuilder.toString().length()));
+        root.put("description", toProcess.classDoc.commentText().substring(summaryBuilder.toString().length()));
        root.put("timestamp", toProcess.buildTimestamp);
        root.put("version", toProcess.absoluteVersion);

-        for(Tag tag: classdoc.tags()) {
+        for(Tag tag: toProcess.classDoc.tags()) {
            root.put(tag.name(), tag.text());
        }
    }

+    /**
+     * Add bindings describing related GATK capabilites to toProcess
+     * @param root
+     */
+    protected void addRelatedBindings(Map<String, Object> root) {
+        List<Map<String, Object>> extraDocsData = new ArrayList<Map<String, Object>>();
+
+        // add in all of the explicitly related items
+        for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) {
+            final GATKDocWorkUnit otherUnit = getDoclet().findWorkUnitForClass(extraDocClass);
+            if ( otherUnit == null )
+                throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass);
+            extraDocsData.add(
+                    new HashMap<String, Object>(){{
+                        put("filename", otherUnit.filename);
+                        put("name", otherUnit.name);}});
+
+        }
+        root.put("extradocs", extraDocsData);
+    }
+
+    /**
+     * Add information about all of the arguments available to toProcess to root
+     *
+     * @param root
+     */
    protected void addArgumentBindings(Map<String, Object> root) {
        ParsingEngine parsingEngine = createStandardGATKParsingEngine();

-        // attempt to instantiate the class
-        Object instance = makeInstanceIfPossible(toProcess.clazz);
-
-        Map<String, List<Map<String, Object>>> args = new HashMap<String, List<Map<String, Object>>>();
+        Map<String, List<Map<String, Object>>> args = createArgumentMap();
        root.put("arguments", args);
-        args.put("all", new ArrayList<Map<String, Object>>());
-        args.put("required", new ArrayList<Map<String, Object>>());
-        args.put("optional", new ArrayList<Map<String, Object>>());
-        args.put("advanced", new ArrayList<Map<String, Object>>());
-        args.put("hidden", new ArrayList<Map<String, Object>>());
-        args.put("depreciated", new ArrayList<Map<String, Object>>());
        try {
-            for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) {
+            // loop over all of the arguments according to the parsing engine
+            for ( final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(toProcess.classDoc)) ) {
+                // todo -- why can you have multiple ones?
                ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0);
-                FieldDoc fieldDoc = getFieldDoc(classdoc, argumentSource.field.getName());
-                Map<String, Object> argBindings = docForArgument(fieldDoc, argumentSource, argDef); // todo -- why can you have multiple ones?
+                FieldDoc fieldDoc = getFieldDoc(toProcess.classDoc, argumentSource.field.getName());
+                Map<String, Object> argBindings = docForArgument(fieldDoc, argumentSource, argDef);
                if ( ! argumentSource.isHidden() || getDoclet().showHiddenFeatures() ) {
-                    logger.debug(String.format("Processing %s", argumentSource));
-                    String kind = "optional";
-                    if ( argumentSource.isRequired() ) kind = "required";
-                    else if ( argumentSource.isAdvanced() ) kind = "advanced";
-                    else if ( argumentSource.isHidden() ) kind = "hidden";
-                    else if ( argumentSource.isDeprecated() ) kind = "depreciated";
+                    final String kind = docKindOfArg(argumentSource);

-                    // get the value of the field
-                    if ( instance != null ) {
-                        Object value = getFieldValue(toProcess.clazz, instance, fieldDoc.name());
-
-                        if ( value == null && argumentSource.createsTypeDefault() ) {
-                            // handle the case where there's an implicit default
-                            try {
-                                value = argumentSource.typeDefaultDocString();
-                            } catch (ReviewedStingException e) {
-                                ; // failed to create type default, don't worry about it
-                            }
-                        }
-
-                        if ( value != null )
-                            argBindings.put("defaultValue", prettyPrintValueString(value));
-                    }
+                    final Object value = argumentValue(toProcess.clazz, argumentSource);
+                    if ( value != null )
+                        argBindings.put("defaultValue", prettyPrintValueString(value));

                    args.get(kind).add(argBindings);
                    args.get("all").add(argBindings);
-                } else {
-                    logger.debug(String.format("Skipping hidden feature %s", argumentSource));
                }
            }

@ -165,11 +174,78 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        }
    }

+    /**
+     * Return the argument kind (required, advanced, hidden, etc) of this argumentSource
+     * @param argumentSource
+     * @return
+     */
+    @Requires("argumentSource != null")
+    @Ensures("result != null")
+    private String docKindOfArg(ArgumentSource argumentSource) {
+        if ( argumentSource.isRequired() ) return "required";
+        else if ( argumentSource.isAdvanced() ) return "advanced";
+        else if ( argumentSource.isHidden() ) return "hidden";
+        else if ( argumentSource.isDeprecated() ) return "depreciated";
+        else return "optional";
+    }
+
+    /**
+     * Attempts to determine the value of argumentSource in an instantiated version of c
+     * @param c
+     * @param argumentSource
+     * @return value of argumentSource, or null if this isn't possible
+     */
+    @Requires({"c != null", "argumentSource != null"})
+    private Object argumentValue(Class c, ArgumentSource argumentSource) {
+        // get the value of the field
+        // attempt to instantiate the class
+        final Object instance = makeInstanceIfPossible(toProcess.clazz);
+        if ( instance != null ) {
+            final Object value = getFieldValue(instance, argumentSource.field.getName());
+            if ( value != null )
+                return value;
+
+            if ( argumentSource.createsTypeDefault() ) {
+                try { // handle the case where there's an implicit default
+                    return argumentSource.typeDefaultDocString();
+                } catch (ReviewedStingException e) {
+                    ; // failed to create type default, don't worry about it
+                }
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Create the argument map for holding class arguments
+     * @return
+     */
+    private Map<String, List<Map<String, Object>>> createArgumentMap() {
+        Map<String, List<Map<String, Object>>> args = new HashMap<String, List<Map<String, Object>>>();
+        args.put("all", new ArrayList<Map<String, Object>>());
+        args.put("required", new ArrayList<Map<String, Object>>());
+        args.put("optional", new ArrayList<Map<String, Object>>());
+        args.put("advanced", new ArrayList<Map<String, Object>>());
+        args.put("hidden", new ArrayList<Map<String, Object>>());
+        args.put("depreciated", new ArrayList<Map<String, Object>>());
+        return args;
+    }
+
+
+    /**
+     * Sorts the individual argument list in unsorted according to CompareArgumentsByName
+     * @param unsorted
+     * @return
+     */
    private List<Map<String, Object>> sortArguments(List<Map<String, Object>> unsorted) {
        Collections.sort(unsorted, new CompareArgumentsByName());
        return unsorted;
    }

+    /**
+     * Sort arguments by case-insensitive comparison ignoring the -- and - prefixes
+     */
    private class CompareArgumentsByName implements Comparator<Map<String, Object>> {
        public int compare(Map<String, Object> x, Map<String, Object> y) {
            return elt(x).compareTo(elt(y));
@ -186,25 +262,32 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        }
    }

-    private Object getFieldValue(Class c, Object instance, String fieldName) {
-        Field field = JVMUtils.findField(c, fieldName);
-        if ( field != null ) {
-            Object value = JVMUtils.getFieldValue(field, instance);
-            //System.out.printf("Fetched value of field %s in class %s: %s%n", fieldName, c, value);
-            return value;
-        } else {
-            return findFieldValueInArgumentCollections(c, instance, fieldName);
-        }
-    }
-
-    private Object findFieldValueInArgumentCollections(Class c, Object instance, String fieldName) {
-        for ( Field field : JVMUtils.getAllFields(c) ) {
+    /**
+     * Utility function that finds the value of fieldName in any fields of ArgumentCollection fields in
+     * instance of class c.
+     *
+     * @param instance the object to query for the field value
+     * @param fieldName the name of the field we are looking for in instance
+     * @return The value assigned to field in the ArgumentCollection, otherwise null
+     */
+    private Object getFieldValue(Object instance, String fieldName) {
+        //
+        // subtle note.  If you have a field named X that is an ArgumentCollection that
+        // contains a field X as well, you need only consider fields in the argumentCollection, not
+        // matching the argument itself.
+        //
+        // @ArgumentCollection
+        // protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+        //
+        for ( Field field : JVMUtils.getAllFields(instance.getClass()) ) {
            if ( field.isAnnotationPresent(ArgumentCollection.class) ) {
                //System.out.printf("Searching for %s in argument collection field %s%n", fieldName, field);
                Object fieldValue = JVMUtils.getFieldValue(field, instance);
-                Object value = getFieldValue(fieldValue.getClass(), fieldValue, fieldName);
+                Object value = getFieldValue(fieldValue, fieldName);
                if ( value != null )
                    return value;
+            } else if ( field.getName().equals(fieldName) ) {
+                return JVMUtils.getFieldValue(field, instance);
            }
        }

@ -212,6 +295,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
    }

    /**
+     * Pretty prints value
+     *
     * Assumes value != null
     * @param value
     * @return
@ -246,6 +331,11 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        return value.toString();
    }

+    /**
+     * Attempt to instantiate class c, if possible.  Returns null if this proves impossible.
+     * @param c
+     * @return
+     */
    private Object makeInstanceIfPossible(Class c) {
        Object instance = null;
        try {
@ -265,47 +355,16 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        // this last one is super dangerous, but some of these methods catch ClassNotFoundExceptions
        // and rethrow then as RuntimeExceptions
        catch (RuntimeException e) {}
-//        finally {
-//            if ( instance == null )
-//                logger.warn(String.format("Unable to create instance of class %s => %s", c, instance));
-//        }

        return instance;
    }

-    protected void addRelatedBindings(Map<String, Object> root) {
-        List<Map<String, Object>> extraDocsData = new ArrayList<Map<String, Object>>();

-        // add in all of the explicitly related items
-        for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) {
-            final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all);
-            if ( otherUnit == null )
-                throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass);
-            extraDocsData.add(
-                    new HashMap<String, Object>(){{
-                        put("filename", otherUnit.filename);
-                        put("name", otherUnit.name);}});
-
-        }
-        root.put("extradocs", extraDocsData);
-    }
-
-    private static final String classRelationship(Class me, Class other) {
-        if ( other.equals(me) )
-            // no circular references
-            return null;
-        else if ( other.isAssignableFrom(me) )
-            // toProcess is a superclass of other.clazz
-            return "superclass";
-        else if ( me.isAssignableFrom(other) )
-            // toProcess inherits from other.clazz
-            return "subclass";
-        else
-            return null;
-
-    }
-
-    protected ParsingEngine createStandardGATKParsingEngine() {
+    /**
+     * Create an instance of the GATK parsing engine, for argument processing with GATKDoclet
+     * @return
+     */
+    private ParsingEngine createStandardGATKParsingEngine() {
        CommandLineProgram clp = new CommandLineGATK();
        try {
            CommandLineProgram.start(clp, new String[]{}, true);
@ -315,10 +374,25 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        }
    }

+    /**
+     * Gets the javadocs associated with field name in classDoc.  Throws a
+     * runtime exception if this proves impossible.
+     *
+     * @param classDoc
+     * @param name
+     * @return
+     */
    private FieldDoc getFieldDoc(ClassDoc classDoc, String name) {
        return getFieldDoc(classDoc, name, true);
    }

+    /**
+     * Recursive helper routine to getFieldDoc()
+     * @param classDoc
+     * @param name
+     * @param primary
+     * @return
+     */
    private FieldDoc getFieldDoc(ClassDoc classDoc, String name, boolean primary) {
        //System.out.printf("Looking for %s in %s%n", name, classDoc.name());
        for ( FieldDoc fieldDoc : classDoc.fields(false) ) {
@ -353,7 +427,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
            return null;
    }

-    private static final int MAX_DISPLAY_NAME = 30;
+    /**
+     * Returns a Pair of (main, synonym) names for argument with fullName s1 and
+     * shortName s2.  The main is selected to be the longest of the two, provided
+     * it doesn't exceed MAX_DISPLAY_NAME, in which case the shorter is taken.
+     * @param s1
+     * @param s2
+     * @return
+     */
    Pair<String, String> displayNames(String s1, String s2) {
        if ( s1 == null ) return new Pair<String, String>(s2, null);
        if ( s2 == null ) return new Pair<String, String>(s1, null);
@ -367,6 +448,15 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
            return new Pair<String, String>(l, s);
    }

+    /**
+     * Returns a human readable string that describes the Type type of a GATK argument.
+     *
+     * This will include parameterized types, so that Set{T} shows up as Set(T) and not
+     * just Set in the docs.
+     *
+     * @param type
+     * @return
+     */
    protected String argumentTypeString(Type type) {
        if (type instanceof ParameterizedType) {
            ParameterizedType parameterizedType = (ParameterizedType)type;
@ -385,6 +475,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        }
    }

+    /**
+     * Helper routine that returns the Feature.class required by a RodBinding,
+     * either T for RodBinding{T} or List{RodBinding{T}}.  Returns null if
+     * the Type doesn't fit either model.
+     * @param type
+     * @return
+     */
    protected Class<? extends Feature> getFeatureTypeIfPossible(Type type) {
        if ( type instanceof ParameterizedType) {
            ParameterizedType paramType = (ParameterizedType)type;
@ -402,6 +499,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        return null;
    }

+    /**
+     * High-level entry point for creating a FreeMarker map describing the GATK argument
+     * source with definition def, with associated javadoc fieldDoc.
+     * @param fieldDoc
+     * @param source
+     * @param def
+     * @return a non-null Map binding argument keys with their values
+     */
    protected Map<String, Object> docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) {
        Map<String, Object> root = new HashMap<String, Object>();
        Pair<String, String> names = displayNames("-" + def.shortName, "--" + def.fullName);
@ -434,27 +539,29 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
        root.put("summary", def.doc != null ? def.doc : "");
        root.put("fulltext", fieldDoc.commentText());

+        // What are our enum options?
+        if ( def.validOptions != null )
+            root.put("options", docForEnumArgument(source.field.getType()));
+
+        // general attributes
        List<String> attributes = new ArrayList<String>();
-        // this one below is just too much.
-        //attributes.add(def.ioType.annotationClass.getSimpleName());
        if ( def.required ) attributes.add("required");
-        // flag is just boolean, not interesting
-        //if ( def.isFlag ) attributes.add("flag");
-        if ( def.isHidden ) attributes.add("hidden");
        if ( source.isDeprecated() ) attributes.add("depreciated");
        if ( attributes.size() > 0 )
            root.put("attributes", Utils.join(", ", attributes));

-        if ( def.validOptions != null ) {
-            root.put("options", docForEnumArgument(source.field.getType()));
-        }
-
        return root;
    }

+    /**
+     * Helper routine that provides a FreeMarker map for an enumClass, grabbing the
+     * values of the enum and their associated javadoc documentation.
+     * @param enumClass
+     * @return
+     */
    @Requires("enumClass.isEnum()")
    private List<Map<String, Object>> docForEnumArgument(Class enumClass) {
-        ClassDoc doc = GATKDoclet.getClassDocForClass(rootDoc, enumClass);
+        ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass);
        if ( doc == null ) //  || ! doc.isEnum() )
            throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc);

@ -468,5 +575,4 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {

        return bindings;
    }
-
 }
--- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
@ -32,14 +32,6 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
 import java.lang.reflect.Field;

 public class HelpUtils {
-
-    protected static boolean implementsInterface(ProgramElementDoc classDoc, Class... interfaceClasses) {
-        for (Class interfaceClass : interfaceClasses)
-            if (assignableToClass(classDoc, interfaceClass, false))
-                return true;
-        return false;
-    }
-
    protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
        try {
            Class type = getClassForDoc(classDoc);
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java
@ -37,8 +37,8 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
                "-R " + hg18Reference +
                        " -T ClipReads " +
                        "-I " + validationDataLocation + "clippingReadsTest.bam " +
-                        "-o %s " +
-                        "-ob %s " + args,
+                        "-os %s " +
+                        "-o %s " + args,
                2, // just one output file
                Arrays.asList("tmp", "bam"),
                Arrays.asList(md51, md52));
@ -72,9 +72,9 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
                        " -I " + validationDataLocation + "originalQuals.chr1.1-1K.bam" +
                        " -L chr1:1-1,000" +
                        " -OQ -QT 4 -CR WRITE_Q0S" +
-                        " -o %s -ob %s",
+                        " -o %s -os %s",
                2,
-                Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "22db22749f811d30216215e047461621"));
+                Arrays.asList("22db22749f811d30216215e047461621", "55c01ccc2e84481b22d3632cdb06c8ba"));
        executeTest("clipOriginalQuals", spec);
    }
 }
--- a/settings/helpTemplates/style.css
+++ b/settings/helpTemplates/style.css
@ -85,25 +85,33 @@ hr
 * enum DT layout
 */

+dl {
+   margin-left: 3em;
+}
+
 dl.enum {
   margin-left: 3em;
   border: 1px dashed #ccc;
 }

-dt.enum {
+dt, dt.enum {
    font-weight: bold;
    text-decoration: underline;
 }

-dd.enum {
+/*
+dt, dd.enum {
    padding: 0 0 0.5em 0;
 }
+*/

 pre {
    border: thin solid lightgray;
    margin-left: 1em;
    margin-right: 4em;
+/*
    background-color: #e0fdff;
+*/
 }
 /*
 * clean table layouts