Merge branch 'master' of ssh://chartl@tin.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-08-17 14:06:25 -04:00 · 2011-08-17 14:06:25 -04:00 · eea8087c73
parent 0c3466d2c3 c405a75f54
commit eea8087c73
24 changed files with 814 additions and 186 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@ -96,24 +96,23 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
            loadArgumentsIntoObject(walker);
            argumentSources.add(walker);

-            Collection<RMDTriplet> newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
+            Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);

            // todo: remove me when the old style system is removed
            if ( getArgumentCollection().RODBindings.size() > 0 ) {
                logger.warn("################################################################################");
                logger.warn("################################################################################");
-                logger.warn("Deprecated -B rod binding syntax detected.  This syntax will be retired in GATK 1.2.");
+                logger.warn("Deprecated -B rod binding syntax detected.  This syntax has been eliminated in GATK 1.2.");
                logger.warn("Please use arguments defined by each specific walker instead.");
                for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
                    logger.warn("  -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
                }
                logger.warn("################################################################################");
                logger.warn("################################################################################");
+                System.exit(1);
            }

-            Collection<RMDTriplet> oldStyle = ListFileUtils.unpackRODBindingsOldStyle(getArgumentCollection().RODBindings, parser);
-            oldStyle.addAll(newStyle);
-            engine.setReferenceMetaDataFiles(oldStyle);
+            engine.setReferenceMetaDataFiles(rodBindings);

            for (ReadFilter filter: filters) {
                loadArgumentsIntoObject(filter);
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
@ -39,8 +39,7 @@ import org.simpleframework.xml.*;
 public class DbsnpArgumentCollection {

    /**
-     * A dbSNP VCF file.  Variants in this track will be treated as "known" variants
-     * in tools using this track.
+     * A dbSNP VCF file.
     */
    @Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
    public RodBinding<VariantContext> dbsnp;
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -101,6 +101,8 @@ public class GATKArgumentCollection {
    @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
    public File referenceFile = null;

+    @Deprecated
+    @Hidden
    @ElementList(required = false)
    @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
    public ArrayList<String> RODBindings = new ArrayList<String>();
@ -340,14 +342,6 @@ public class GATKArgumentCollection {
                return false;
            }
        }
-        if (other.RODBindings.size() != RODBindings.size()) {
-            return false;
-        }
-        for (int x = 0; x < RODBindings.size(); x++) {
-            if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
-                return false;
-            }
-        }
        if (!other.samFiles.equals(this.samFiles)) {
            return false;
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -40,26 +40,65 @@ import java.util.TreeSet;

 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+
 /**
- * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
- * in the input file.  It can dynamically merge the contents of multiple input BAM files, resulting
- * in merged output sorted in coordinate order.  Can also optionally filter reads based on the --read-filter
- * command line argument.
+ * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
+ *
+ * <p>
+ * PrintReads can dynamically merge the contents of multiple input BAM files, resulting
+ * in merged output sorted in coordinate order.  Can also optionally filter reads based on the
+ * --read_filter command line argument.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more bam files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A single processed bam file.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T PrintReads \
+ *   -o output.bam \
+ *   -I input1.bam \
+ *   -I input2.bam \
+ *   --read_filter MappingQualityZero
+ * </pre>
+ *
 */
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
 public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
-    /** an optional argument to dump the reads out to a BAM file */
+
    @Output(doc="Write output to this BAM filename instead of STDOUT")
    SAMFileWriter out;
+
    @Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
    String readGroup = null;
+
+    /**
+     * For example, --platform ILLUMINA or --platform 454.
+     */
    @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
-    String platform = null; // E.g. ILLUMINA, 454
+    String platform = null;
+
    @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
    int nReadsToPrint = -1;
+
+    /**
+     * Only reads from samples listed in the provided file(s) will be included in the output.
+     */
    @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
    public Set<File> sampleFile = new TreeSet<File>();
+
+    /**
+     * Only reads from the sample(s) will be included in the output.
+     */
    @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
    public Set<String> sampleNames = new TreeSet<String>();

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -49,7 +49,34 @@ import java.util.*;


 /**
- * Annotates variant calls with context information.  Users can specify which of the available annotations to use.
+ * Annotates variant calls with context information.
+ *
+ * <p>
+ * VariantAnnotator is a GATK tool for annotating variant calls based on their context.
+ * The tool is modular; new annotations can be written easily without modifying VariantAnnotator itself.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A variant set to annotate and optionally one or more BAM files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * An annotated VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantAnnotator \
+ *   -I input.bam \
+ *   -o output.vcf \
+ *   -A DepthOfCoverage
+ *   --variant input.vcf \
+ *   --dbsnp dbsnp.vcf
+ * </pre>
+ *
 */
@Requires(value={})
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@ -69,8 +96,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }

    /**
-      * A dbSNP VCF file from which to annotate.
-      *
      * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
      */
    @ArgumentCollection
@ -101,15 +126,25 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Output(doc="File to which variants should be written",required=true)
    protected VCFWriter vcfWriter = null;

-    @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false)
-    protected String sampleName = null;
-
+    /**
+     * See the -list argument to view available annotations.
+     */
    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
    protected List<String> annotationsToUse = new ArrayList<String>();

+    /**
+     * See the -list argument to view available groups.
+     */
    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
    protected List<String> annotationGroupsToUse = new ArrayList<String>();

+    /**
+     * This option enables you to add annotations from one VCF to another.
+     *
+     * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource',
+     * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position.
+     * If multiple records in the rod overlap the given position, one is chosen arbitrarily.
+     */
    @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false)
    protected List<String> expressionsToUse = new ArrayList<String>();

@ -127,8 +162,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;

-    private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
-
    private VariantAnnotatorEngine engine;

    private Collection<VariantContext> indelBufferContext;
@ -164,12 +197,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
        List<String> rodName = Arrays.asList(variantCollection.variants.getName());
        Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);

-        // add the non-VCF sample from the command-line, if applicable
-        if ( sampleName != null  ) {
-            nonVCFsampleName.put(sampleName.toUpperCase(), "variant");
-            samples.add(sampleName.toUpperCase());
-        }
-
        // if there are no valid samples, warn the user
        if ( samples.size() == 0 ) {
            logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java
@ -38,12 +38,32 @@ import java.util.List;

 /**
 * Walks along reference and calculates the GC content for each interval.
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more BAM files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * GC content calculations per interval.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T GCContentByInterval \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   -L input.intervals
+ * </pre>
+ *
 */
@Allows(value = {DataSource.REFERENCE})
@Requires(value = {DataSource.REFERENCE})
-
@By(DataSource.REFERENCE)
-
 public class GCContentByIntervalWalker extends LocusWalker<Long, Long> {
    @Output
    protected PrintStream out;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
@ -40,18 +40,48 @@ import java.util.List;


 /**
- * Generates an alternative reference sequence over the specified interval.  Given variant ROD tracks,
- * it replaces the reference bases at variation sites with the bases supplied by the ROD(s).  Additionally,
- * allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ * Generates an alternative reference sequence over the specified interval.
+ *
+ * <p>
+ * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
+ * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * The reference, requested intervals, and any number of variant rod files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A fasta file representing the requested intervals.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T FastaAlternateReferenceMaker \
+ *   -o output.fasta \
+ *   -L input.intervals \
+ *   --variant input.vcf \
+ *   [--snpmask mask.vcf]
+ * </pre>
+ *
 */
@WalkerName("FastaAlternateReferenceMaker")
@Reference(window=@Window(start=-1,stop=50))
@Requires(value={DataSource.REFERENCE})
 public class FastaAlternateReferenceWalker extends FastaReferenceWalker {

+    /**
+     * Variants from these input files are used by this tool to construct an alternate reference.
+     */
    @Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
    public List<RodBinding<VariantContext>> variants = Collections.emptyList();

+    /**
+     * Snps from this file are used as a mask when constructing the alternate reference.
+     */
    @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
    public RodBinding<VariantContext> snpmask;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
@ -38,14 +38,44 @@ import org.broadinstitute.sting.utils.collections.Pair;
 import java.io.PrintStream;

 /**
- * Renders a new reference in FASTA format consisting of only those loci provided in the input data set.  Has optional
- * features to control the output format.
+ * Renders a new reference in FASTA format consisting of only those loci provided in the input data set.
+ *
+ * <p>
+ * The output format can be partially controlled using the provided command-line arguments.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * The reference and requested intervals.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A fasta file representing the requested intervals.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T FastaReference \
+ *   -o output.fasta \
+ *   -L input.intervals
+ * </pre>
+ *
 */
@WalkerName("FastaReferenceMaker")
 public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, String>, GenomeLoc> {
+
    @Output PrintStream out;
-    @Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) public int fastaLineWidth=60;
-    @Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity) - CAUTION: adjacent intervals will automatically be merged", required=false) public boolean fastaRawSeqs=false;
+
+    @Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false)
+    public int fastaLineWidth=60;
+
+    /**
+     *  Please note that when using this argument adjacent intervals will automatically be merged.
+     */
+    @Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity)", required=false)
+    public boolean fastaRawSeqs=false;

    protected FastaSequence fasta;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java
@ -45,6 +45,34 @@ import java.util.*;

 /**
 * Filters variant calls using a number of user-selectable, parameterizable criteria.
+ *
+ * <p>
+ * VariantFiltration is a GATK tool for hard-filtering variant calls based on certain criteria.
+ * Records are hard-filtered by changing the value in the FILTER field to something other than PASS.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A variant set to filter.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A filtered VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantFiltration \
+ *   -o output.vcf \
+ *   --variant input.vcf \
+ *   --filterExpression "AB < 0.2 || MQ0 > 50" \
+ *   --filterName "Nov09filters" \
+ *   --mask mask.vcf \
+ *   --maskName InDel
+ * </pre>
+ *
 */
@Reference(window=@Window(start=-50,stop=50))
 public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@ -52,33 +80,65 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

+    /**
+     * Any variant which overlaps entries from the provided mask rod will be filtered.
+     */
    @Input(fullName="mask", doc="Input ROD mask", required=false)
    public RodBinding<Feature> mask;

    @Output(doc="File to which variants should be written", required=true)
    protected VCFWriter writer = null;

-    @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false)
+    /**
+     * VariantFiltration accepts any number of JEXL expressions (so you can have two named filters by using
+     * --filterName One --filterExpression "X < 1" --filterName Two --filterExpression "X > 2").
+     */
+    @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter", required=false)
    protected ArrayList<String> FILTER_EXPS = new ArrayList<String>();
-    @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
+
+    /**
+     * This name is put in the FILTER field for variants that get filtered.  Note that there must be a 1-to-1 mapping between filter expressions and filter names.
+     */
+    @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters", required=false)
    protected ArrayList<String> FILTER_NAMES = new ArrayList<String>();

+    /**
+     * Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead.
+     * VariantFiltration will add the sample-level FT tag to the FORMAT field of filtered samples (this does not affect the record's FILTER tag).
+     * One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience
+     * methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1").
+     */
    @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false)
    protected ArrayList<String> GENOTYPE_FILTER_EXPS = new ArrayList<String>();
+
+    /**
+     * Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead.
+     */
    @Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
    protected ArrayList<String> GENOTYPE_FILTER_NAMES = new ArrayList<String>();

-    @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]", required=false)
+    /**
+     * Works together with the --clusterWindowSize argument.
+     */
+    @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster", required=false)
    protected Integer clusterSize = 3;
-    @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
+
+    /**
+     * Works together with the --clusterSize argument.  To disable the clustered SNP filter, set this value to less than 1.
+     */
+    @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs", required=false)
    protected Integer clusterWindow = 0;

-    @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
+    @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered", required=false)
    protected Integer MASK_EXTEND = 0;
-    @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
+    @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
    protected String MASK_NAME = "Mask";

-    @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false)
+    /**
+     * By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as PASSing.
+     * Use this argument to have it evaluate as failing filters instead for these cases.
+     */
+    @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false)
    protected Boolean FAIL_MISSING_VALUES = false;

    // JEXL expressions for the filters
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@ -69,7 +69,7 @@ import java.util.*;
 * <p>
 * The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases
 * is minimized across all the reads. In general, a large percent of regions requiring local realignment are due to the presence of an insertion
- * or deletion (indels) in the individualÕs genome with respect to the reference genome.  Such alignment artifacts result in many bases mismatching
+ * or deletion (indels) in the individual's genome with respect to the reference genome.  Such alignment artifacts result in many bases mismatching
 * the reference near the misalignment, which are easily mistaken as SNPs.  Moreover, since read mapping algorithms operate on each read independently,
 * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads.  Consequently, even when some reads are
 * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel,
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
@ -35,16 +35,46 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;

+
 /**
- * Left aligns indels in reads.
+ * Left-aligns indels from reads in a bam file.
+ *
+ * <p>
+ * LeftAlignIndels is a tool that takes a bam file and left-aligns any indels inside it.  The same indel can often be
+ * placed at multiple positions and still represent the same haplotype.  While a standard convention is to place an
+ * indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A bam file to left-align.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A left-aligned bam.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx3g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T LeftAlignIndels \
+ *   -I input.bam \
+ *   -o output.vcf
+ * </pre>
+ *
 */
 public class LeftAlignIndels extends ReadWalker<Integer, Integer> {

    @Output(required=false, doc="Output bam")
    protected StingSAMFileWriter writer = null;

-    @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter. "+
-                "If too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool may run out of memory.", required=false)
+    /**
+     * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
+     * may run out of memory.  We recommend that you additionally tell Java to use a temp directory with plenty of available
+     * space (by setting java.io.tempdir on the command-line).
+     */
+    @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
    protected int MAX_RECORDS_IN_RAM = 500000;

    public void initialize() {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@ -23,7 +23,10 @@
 */
 package org.broadinstitute.sting.gatk.walkers.phasing;

-import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.ArgumentCollection;
+import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -49,16 +52,46 @@ import java.util.*;

 import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;

-
 /**
 * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads).
+ *
+ * <p>
+ * Performs physical phasing of SNP calls, based on sequencing reads.
+ * </p>
+ *
+ * <h2>Input</h2>
+ * <p>
+ * VCF file of SNP calls, BAM file of sequence reads.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Phased VCF file.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ReadBackedPhasing
+ *      -R reference.fasta
+ *      -I reads.bam
+ *      --variant:vcf SNPs.vcf
+ *      -BTI variant
+ *      -BTIMR INTERSECTION
+ *      -o phased_SNPs.vcf
+ *      --phaseQualityThresh 20.0
+ * </pre>
+ *
+ * @author Menachem Fromer
+ * @since July 2010
 */
@Allows(value = {DataSource.READS, DataSource.REFERENCE})
@Requires(value = {DataSource.READS, DataSource.REFERENCE})
@By(DataSource.READS)

-@ReadFilters({MappingQualityZeroReadFilter.class})
 // Filter out all reads with zero mapping quality
+@ReadFilters({MappingQualityZeroReadFilter.class})

 public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
    private static final boolean DEBUG = false;
@ -73,13 +106,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
    @Output(doc = "File to which variants should be written", required = true)
    protected VCFWriter writer = null;

-    @Argument(fullName = "cacheWindowSize", shortName = "cacheWindow", doc = "The window size (in bases) to cache variant sites and their reads; [default:20000]", required = false)
+    @Argument(fullName = "cacheWindowSize", shortName = "cacheWindow", doc = "The window size (in bases) to cache variant sites and their reads for the phasing procedure", required = false)
    protected Integer cacheWindow = 20000;

-    @Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm; [default:10]", required = false)
+    @Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm", required = false)
    protected Integer maxPhaseSites = 10; // 2^10 == 10^3 diploid haplotypes

-    @Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false)
+    @Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing", required = false)
    protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9

    @Hidden
@ -87,10 +120,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
    protected String variantStatsFilePrefix = null;
    private PhasingQualityStatsWriter statsWriter = null;

-    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 17]", required = false)
+    @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing", required = false)
    public int MIN_BASE_QUALITY_SCORE = 17;

-    @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for phasing [default: 20]", required = false)
+    @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for phasing", required = false)
    public int MIN_MAPPING_QUALITY_SCORE = 20;

    @Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
@ -111,10 +144,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph

    public static final String PHASING_INCONSISTENT_KEY = "PhasingInconsistent";

-    @Argument(fullName = "enableMergePhasedSegregatingPolymorphismsToMNP", shortName = "enableMergeToMNP", doc = "Merge consecutive phased sites into MNP records [default:false]", required = false)
+    @Argument(fullName = "enableMergePhasedSegregatingPolymorphismsToMNP", shortName = "enableMergeToMNP", doc = "Merge consecutive phased sites into MNP records", required = false)
    protected boolean enableMergePhasedSegregatingPolymorphismsToMNP = false;

-    @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
+    @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record", required = false)
    protected int maxGenomicDistanceForMNP = 1;

    @Hidden
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java
@ -11,7 +11,31 @@ import java.io.PrintStream;

 /**
 * Walks over the input data set, calculating the total number of covered loci for diagnostic purposes.
+ *
+ * <p>
 * Simplest example of a locus walker.
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more BAM files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Number of loci traversed.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountLoci \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * </pre>
+ *
 */
 public class CountLociWalker extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
    @Output(doc="Write count to this file instead of STDOUT")
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
@ -39,6 +39,26 @@ import java.util.List;
 * query name order.  Breaks counts down by total pairs and number
 * of paired reads.
 *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more bam files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Number of pairs seen.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountPairs \
+ *   -o output.txt \
+ *   -I input.bam
+ * </pre>
+ *
 * @author mhanna
 */
 public class CountPairsWalker extends ReadPairWalker<Integer,Long> {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java
@ -25,7 +25,10 @@

 package org.broadinstitute.sting.gatk.walkers.qc;

+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -33,25 +36,55 @@ import org.broadinstitute.sting.gatk.walkers.RefWalker;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.collections.Pair;

+import java.util.Collections;
+import java.util.List;
+
 /**
- * Prints out counts of the number of reference ordered data objects are
- * each locus for debugging RefWalkers.
+ * Prints out counts of the number of reference ordered data objects encountered.
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more rod files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Number of rods seen.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountRODsByRef \
+ *   -o output.txt \
+ *   --rod input.vcf
+ * </pre>
+ *
 */
-public class CountRodByRefWalker extends RefWalker<CountRodWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> {
-    @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false)
+public class CountRODsByRefWalker extends RefWalker<CountRODsWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> {
+
+    /**
+     * One or more input rod files
+     */
+    @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false)
+    public List<RodBinding<Feature>> rods = Collections.emptyList();
+
+    @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false)
    public boolean verbose = false;

-    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false)
+    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, this tool will print out the skipped locations", required = false)
    public boolean showSkipped = false;

-    CountRodWalker crw = new CountRodWalker();
+    CountRODsWalker crw = new CountRODsWalker();

    public void initialize() {
        crw.verbose = verbose;
        crw.showSkipped = showSkipped;
    }

-    public CountRodWalker.Datum map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+    public CountRODsWalker.Datum map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        return crw.map(tracker, ref, context);
    }

@ -59,7 +92,7 @@ public class CountRodByRefWalker extends RefWalker<CountRodWalker.Datum, Pair<Ex
        return crw.reduceInit();
    }

-    public Pair<ExpandingArrayList<Long>, Long> reduce(CountRodWalker.Datum point, Pair<ExpandingArrayList<Long>, Long> sum) {
+    public Pair<ExpandingArrayList<Long>, Long> reduce(CountRODsWalker.Datum point, Pair<ExpandingArrayList<Long>, Long> sum) {
        return crw.reduce(point, sum);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java
@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.qc;

 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -41,23 +44,46 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.collections.Pair;

 import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;

 /**
- * Prints out counts of the number of reference ordered data objects are
- * each locus for debugging RodWalkers.
+ * Prints out counts of the number of reference ordered data objects encountered.
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more rod files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Number of rods seen.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountRODs \
+ *   -o output.txt \
+ *   --rod input.vcf
+ * </pre>
+ *
 */
-public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
+public class CountRODsWalker extends RodWalker<CountRODsWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
    @Output
    public PrintStream out;

-    @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false)
+    /**
+     * One or more input rod files
+     */
+    @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false)
+    public List<RodBinding<Feature>> rods = Collections.emptyList();
+
+    @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false)
    public boolean verbose = false;

-    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false)
+    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, this tool will print out the skipped locations", required = false)
    public boolean showSkipped = false;

    @Override
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java
@ -9,8 +9,32 @@ import org.broadinstitute.sting.gatk.walkers.Requires;

 /**
 * Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
+ *
+ * <p>
 * Can also count the number of reads matching a given criterion using read filters (see the
 * --read-filter command line argument).  Simplest example of a read-backed analysis.
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more BAM files.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Number of reads seen.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountReads \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * </pre>
+ *
 */
@Requires({DataSource.READS, DataSource.REFERENCE})
 public class CountReadsWalker extends ReadWalker<Integer, Integer> {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -36,20 +36,61 @@ import java.util.*;

 /**
 * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)
+ *
+ * <p>
+ * Given a variant callset, it is common to calculate various quality control metrics. These metrics include the number of
+ * raw or filtered SNP counts; ratio of transition mutations to transversions; concordance of a particular sample's calls
+ * to a genotyping chip; number of singletons per sample; etc. Furthermore, it is often useful to stratify these metrics
+ * by various criteria like functional class (missense, nonsense, silent), whether the site is CpG site, the amino acid
+ * degeneracy of the site, etc. VariantEval facilitates these calculations in two ways: by providing several built-in
+ * evaluation and stratification modules, and by providing a framework that permits the easy development of new evaluation
+ * and stratification modules.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more variant sets to evaluate plus any number of comparison sets.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Evaluation tables.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantEval \
+ *   -o output.eval.gatkreport \
+ *   --eval:set1 set1.vcf \
+ *   --eval:set2 set2.vcf \
+ *   [--comp comp.vcf]
+ * </pre>
+ *
 */
@Reference(window=@Window(start=-50, stop=50))
 public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
-    // Output arguments
+
    @Output
    protected PrintStream out;

+    /**
+     * The variant file(s) to evaluate.
+     */
    @Input(fullName="eval", shortName = "eval", doc="Input evaluation file(s)", required=true)
    public List<RodBinding<VariantContext>> evals;

+    /**
+     * The variant file(s) to compare against.
+     */
    @Input(fullName="comp", shortName = "comp", doc="Input comparison file(s)", required=false)
    public List<RodBinding<VariantContext>> compsProvided = Collections.emptyList();
    private List<RodBinding<VariantContext>> comps = new ArrayList<RodBinding<VariantContext>>();

+    /**
+     * dbSNP comparison VCF.  By default, the dbSNP file is used to specify the set of "known" variants.
+     * Other sets can be specified with the -knownName (--known_names) argument.
+     */
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();

@ -67,6 +108,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    @Argument(fullName="sample", shortName="sn", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
    protected Set<String> SAMPLE_EXPRESSIONS;

+    /**
+     * List of rod tracks to be used for specifying "known" variants other than dbSNP.
+     */
    @Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
    protected HashSet<String> KNOWN_NAMES = new HashSet<String>();
    List<RodBinding<VariantContext>> knowns = new ArrayList<RodBinding<VariantContext>>();
@ -81,7 +125,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
    protected Set<VariantContext.Type> typesToUse = null;

-    // Evaluator arguments
+    /**
+     * See the -list argument to view available modules.
+     */
    @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false)
    protected String[] MODULES_TO_USE = {};

@ -95,7 +141,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
    protected double MIN_PHASE_QUALITY = 10.0;

-    @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
+    /**
+     * This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
+     */
+    @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
    protected String FAMILY_STRUCTURE;

    @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
@ -46,6 +46,31 @@ import java.util.*;

 /**
 * Left-aligns indels from a variants file.
+ *
+ * <p>
+ * LeftAlignVariants is a tool that takes a VCF file and left-aligns any indels inside it.  The same indel can often be
+ * placed at multiple positions and still represent the same haplotype.  While the standard convention with VCF is to
+ * place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A variant set to left-align.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A left-aligned VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T LeftAlignVariants \
+ *   --variant input.vcf \
+ *   -o output.vcf
+ * </pre>
+ *
 */
@Reference(window=@Window(start=-200,stop=200))
 public class LeftAlignVariants extends RodWalker<Integer, Integer> {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
@ -25,7 +25,6 @@

 package org.broadinstitute.sting.gatk.walkers.variantutils;

-import org.broad.tribble.Feature;
 import org.broad.tribble.TribbleException;
 import org.broad.tribble.dbsnp.DbSNPFeature;
 import org.broadinstitute.sting.commandline.*;
@ -34,7 +33,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -48,7 +46,32 @@ import java.util.Set;


 /**
- * Validates a variants file.
+ * Strictly validates a variants file.
+ *
+ * <p>
+ * ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
+ * Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs
+ * when a dbSNP file is provided, and that all alternate alleles are present in at least one sample.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A variant set to filter.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A filtered VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T ValidateVariants \
+ *   --variant input.vcf \
+ *   --dbsnp dbsnp.vcf
+ * </pre>
+ *
 */
@Reference(window=@Window(start=0,stop=100))
 public class ValidateVariants extends RodWalker<Integer, Integer> {
@ -67,10 +90,13 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
    @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false)
    protected ValidationType type = ValidationType.ALL;

-    @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "should we skip validation on filtered records?", required = false)
+    /**
+     * By default, even filtered records are validated.
+     */
+    @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "skip validation on filtered records", required = false)
    protected Boolean DO_NOT_VALIDATE_FILTERED = false;

-    @Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "should we just emit warnings on errors instead of terminating the run?", required = false)
+    @Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "just emit warnings on errors instead of terminating the run at the first instance", required = false)
    protected Boolean WARN_ON_ERROR = false;

    private long numErrors = 0;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
@ -25,10 +25,8 @@

 package org.broadinstitute.sting.gatk.walkers.variantutils;

-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -43,21 +41,57 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 import java.util.*;

 /**
- * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
+ * Annotates a validation (from e.g. Sequenom) VCF with QC metrics (HW-equilibrium, % failed probes)
+ *
+ * <p>
+ * The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
+ * The tool produces a VCF that is annotated with information pertaining to plate quality control and by
+ * default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
+ * If you have .ped files, please first convert them to VCF format
+ * (see http://www.broadinstitute.org/gsa/wiki/index.php/Converting_ped_to_vcf).
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A validation VCF to annotate.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * An annotated VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantValidationAssessor \
+ *   --variant input.vcf \
+ *   -o output.vcf
+ * </pre>
+ *
 */
@Reference(window=@Window(start=0,stop=40))
 public class VariantValidationAssessor extends RodWalker<VariantContext,Integer> {
-    @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
-    public RodBinding<VariantContext> variants;
+
+    @ArgumentCollection
+    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

    @Output(doc="File to which variants should be written",required=true)
    protected VCFWriter vcfwriter = null;

-    @Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid [default:20]", required=false)
+    @Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid", required=false)
    protected double maxHardy = 20.0;
-    @Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid [default:0.05]", required=false)
+
+    /**
+     * To disable, set to a value greater than 1.
+     */
+    @Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid", required=false)
    protected double maxNoCall = 0.05;
-    @Argument(fullName="maxHomVar", doc="Maximum homozygous variant rate (as a fraction) to consider an assay valid [default:1.1, disabled]", required=false)
+
+    /**
+     * To disable, set to a value greater than 1.
+     */
+    @Argument(fullName="maxHomVar", doc="Maximum homozygous variant rate (as a fraction) to consider an assay valid", required=false)
    protected double maxHomNonref = 1.1;

    //@Argument(fullName="populationFile", shortName="populations", doc="A tab-delimited file relating individuals to populations,"+
@ -93,7 +127,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
        if ( tracker == null )
            return null;

-        VariantContext vc = tracker.getFirstValue(variants, ref.getLocus());
+        VariantContext vc = tracker.getFirstValue(variantCollection.variants, ref.getLocus());
        // ignore places where we don't have a variant
        if ( vc == null )
            return null;
@ -101,7 +135,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
        if ( sampleNames == null )
            sampleNames = new TreeSet<String>(vc.getSampleNames());        

-        return addVariantInformationToCall(ref, vc);
+        return addVariantInformationToCall(vc);
    }

    public Integer reduce(VariantContext call, Integer numVariants) {
@ -113,7 +147,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
    }

    public void onTraversalDone(Integer finalReduce) {
-        final List<String> inputNames = Arrays.asList(variants.getName());
+        final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());

        // setup the header fields
        Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -159,7 +193,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
    }


-    private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
+    private VariantContext addVariantInformationToCall(VariantContext vContext) {

        // check possible filters
        double hwPvalue = hardyWeinbergCalculation(vContext);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@ -40,95 +40,105 @@ import java.io.PrintStream;
 import java.util.*;

 /**
- * Emits specific fields as dictated by the user from one or more VCF files.
+ * Emits specific fields from a VCF file to a tab-deliminated table
+ *
+ * <p>
+ * This walker accepts a single VCF file and writes out user-selected fields from the
+ * VCF as a header-containing, tab-deliminated file.  The user specifies one or more
+ * fields to print with the -F NAME, each of which appears as a single column in
+ * the output file, with a header named NAME, and the value of this field in the VCF
+ * one per line.  NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding
+ * in the INFO field (AC=10).  Note that this tool does not support capturing any
+ * GENOTYPE field values.  If a VCF record is missing a value, then the tool by
+ * default throws an error, but the special value NA can be emitted instead with
+ * appropriate tool arguments.
+ *
+ * </p>
+ *
+ * <h2>Input</h2>
+ * <p>
+ * <ul>
+ *     <li>A VCF file</li>
+ *     <li>A list of -F fields to write</li>
+ * </ul>
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A table deliminated file containing the values of the requested fields in the VCF file
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *     -T $WalkerName \
+ *     -V file.vcf \
+ *     -F CHROM -F POS -F ID -F QUAL -F AC \
+ *     -o results.table
+ *
+ *     would produce a file that looks like:
+ *
+ *     CHROM    POS ID      QUAL    AC
+ *     1        10  .       50      1
+ *     1        20  rs10    99      10
+ *     et cetera...
+ * </pre>
+ *
+ * @author Mark DePristo
+ * @since 2010
 */
 public class VariantsToTable extends RodWalker<Integer, Integer> {
-
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

    @Output(doc="File to which results should be written",required=true)
    protected PrintStream out;

-    @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true)
-    public ArrayList<String> fieldsToTake = new ArrayList<String>();
+    /**
+     * -F NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding in the INFO field (e.g., AC=10).
+     * Note that this tool does not support capturing any GENOTYPE field values.  Note this argument
+     * accepts any number of inputs.  So -F CHROM -F POS is allowed.
+     */
+    @Argument(fullName="fields", shortName="F", doc="The name of each field to capture for output in the table", required=true)
+    public List<String> fieldsToTake = new ArrayList<String>();

-    @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records")
+    /**
+     * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered).
+     * Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value.
+     */
+    @Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false)
    public boolean showFiltered = false;

-    @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false)
+    /**
+     * If provided, then this tool will exit with success after this number of records have been emitted to the file.
+     */
+    @Argument(fullName="maxRecords", shortName="M", doc="If provided, we will emit at most maxRecord records to the table", required=false)
    public int MAX_RECORDS = -1;
    int nRecords = 0;

+    /**
+     * By default, only biallelic (REF=A, ALT=B) sites are including in the output.  If this flag is provided, then
+     * VariantsToTable will emit field values for records with multiple ALT alleles.  Note that in general this
+     * can make your resulting file unreadable and malformated according to tools like R, as the representation of
+     * multi-allelic INFO field values can be lists of values.
+     */
    @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
    public boolean keepMultiAllelic = false;

+    /**
+     * By default, this tool throws a UserException when it encounters a field without a value in some record.  This
+     * is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being
+     * found before the tool runs through 40M 1000G records.  However, in some cases you genuinely want to allow such
+     * fields (e.g., AC not being calculated for filtered records, if included).  When provided, this argument
+     * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
+     */
    @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false)
    public boolean ALLOW_MISSING_DATA = false;

    public void initialize() {
+        // print out the header
        out.println(Utils.join("\t", fieldsToTake));
    }

-    public static abstract class Getter { public abstract String get(VariantContext vc); }
-    public static Map<String, Getter> getters = new HashMap<String, Getter>();
-
-    static {
-        // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT
-        getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
-        getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
-        getters.put("REF", new Getter() {
-            public String get(VariantContext vc) {
-                String x = "";
-                if ( vc.hasReferenceBaseForIndel() ) {
-                    Byte refByte = vc.getReferenceBaseForIndel();
-                    x=x+new String(new byte[]{refByte});
-                }
-                return x+vc.getReference().getDisplayString();
-            }
-        });
-        getters.put("ALT", new Getter() {
-            public String get(VariantContext vc) {
-                StringBuilder x = new StringBuilder();
-                int n = vc.getAlternateAlleles().size();
-                if ( n == 0 ) return ".";
-                if ( vc.hasReferenceBaseForIndel() ) {
-                    Byte refByte = vc.getReferenceBaseForIndel();
-                    x.append(new String(new byte[]{refByte}));                    
-                }
-
-                for ( int i = 0; i < n; i++ ) {
-                    if ( i != 0 ) x.append(",");
-                    x.append(vc.getAlternateAllele(i).getDisplayString());
-                }
-                return x.toString();
-            }
-        });
-        getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
-        getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
-            if ( vc.isSNP() && vc.isBiallelic() )
-                return VariantContextUtils.isTransition(vc) ? "1" : "0";
-            else
-                return "-1";
-        }});
-        getters.put("FILTER", new Getter() { public String get(VariantContext vc) {
-            return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } 
-        });
-
-        getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
-        getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
-        getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
-        getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
-        getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
-        getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
-        getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
-        getters.put("GQ", new Getter() { public String get(VariantContext vc) {
-            if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
-            return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError());
-        }});
-    }
-
-
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if ( tracker == null ) // RodWalkers can make funky map calls
            return 0;
@ -155,6 +165,15 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
        return s.endsWith("*");
    }

+    /**
+     * Utility function that returns the list of values for each field in fields from vc.
+     *
+     * @param vc the VariantContext whose field values we can to capture
+     * @param fields a non-null list of fields to capture from VC
+     * @param allowMissingData if false, then throws a UserException if any field isn't found in vc.  Otherwise
+     *   provides a value of NA
+     * @return
+     */
    public static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
        List<String> vals = new ArrayList<String>();

@ -213,13 +232,75 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
        return vals;
    }

-    public Integer reduceInit() {
-        return 0;
-    }
-
-    public Integer reduce(Integer counter, Integer sum) {
-        return counter + sum;
-    }
-
+    //
+    // default reduce -- doesn't do anything at all
+    //
+    public Integer reduceInit() { return 0; }
+    public Integer reduce(Integer counter, Integer sum) { return counter + sum; }
    public void onTraversalDone(Integer sum) {}
+
+    // ----------------------------------------------------------------------------------------------------
+    //
+    // static system for getting values from VC by name.
+    //
+    // ----------------------------------------------------------------------------------------------------
+
+    public static abstract class Getter { public abstract String get(VariantContext vc); }
+    public static Map<String, Getter> getters = new HashMap<String, Getter>();
+
+    static {
+        // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT
+        getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } });
+        getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
+        getters.put("REF", new Getter() {
+            public String get(VariantContext vc) {
+                String x = "";
+                if ( vc.hasReferenceBaseForIndel() ) {
+                    Byte refByte = vc.getReferenceBaseForIndel();
+                    x=x+new String(new byte[]{refByte});
+                }
+                return x+vc.getReference().getDisplayString();
+            }
+        });
+        getters.put("ALT", new Getter() {
+            public String get(VariantContext vc) {
+                StringBuilder x = new StringBuilder();
+                int n = vc.getAlternateAlleles().size();
+                if ( n == 0 ) return ".";
+                if ( vc.hasReferenceBaseForIndel() ) {
+                    Byte refByte = vc.getReferenceBaseForIndel();
+                    x.append(new String(new byte[]{refByte}));
+                }
+
+                for ( int i = 0; i < n; i++ ) {
+                    if ( i != 0 ) x.append(",");
+                    x.append(vc.getAlternateAllele(i).getDisplayString());
+                }
+                return x.toString();
+            }
+        });
+        getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
+        getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
+            if ( vc.isSNP() && vc.isBiallelic() )
+                return VariantContextUtils.isTransition(vc) ? "1" : "0";
+            else
+                return "-1";
+        }});
+        getters.put("FILTER", new Getter() { public String get(VariantContext vc) {
+            return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); }
+        });
+
+        getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } });
+        getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
+        getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
+        getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
+        getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
+        getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
+        getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
+        getters.put("GQ", new Getter() { public String get(VariantContext vc) {
+            if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
+            return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError());
+        }});
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
@ -53,6 +53,30 @@ import java.util.*;

 /**
 * Converts variants from other file formats to VCF format.
+ *
+ * <p>
+ * Note that there must be a Tribble feature/codec for the file format as well as an adaptor.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * A variant file to filter.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A VCF file.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantsToVCF \
+ *   -o output.vcf \
+ *   --variant:RawHapMap input.hapmap \
+ *   --dbsnp dbsnp.vcf
+ * </pre>
+ *
 */
@Reference(window=@Window(start=-40,stop=40))
 public class VariantsToVCF extends RodWalker<Integer, Integer> {
@ -61,15 +85,24 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
    protected VCFWriter baseWriter = null;
    private SortingVCFWriter vcfwriter; // needed because hapmap/dbsnp indel records move

+    /**
+     * Variants from this input file are used by this tool as input.
+     */
    @Input(fullName="variant", shortName = "V", doc="Input variant file", required=true)
    public RodBinding<Feature> variants;

    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();

-    @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false)
+    /**
+     * This argument is used for data (like GELI) with genotypes but no sample names encoded within.
+     */
+    @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
    protected String sampleName = null;

+    /**
+     * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
+     */
    @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
    protected boolean fixReferenceBase = false;

--- a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java
@ -88,14 +88,6 @@ public class GATKArgumentCollectionUnitTest extends BaseTest {
        collect.intervals.add("intervals".toLowerCase());
        collect.excludeIntervals = new ArrayList<String>();
        collect.numberOfThreads = 1;
-
-        // make some rod bindings up
-        ArrayList<String> fakeBindings = new ArrayList<String>();
-        fakeBindings.add("Bind1");
-        fakeBindings.add("Bind2");
-        fakeBindings.add("Bind3");
-
-        collect.RODBindings = fakeBindings;
    }