From a8935c99fc0bedbb7851b1223511dd58a1ad048d Mon Sep 17 00:00:00 2001
From: Chris Hartl <chartl@broadinstitute.org>
Date: Thu, 18 Aug 2011 15:28:35 -0400
Subject: [PATCH 3/4] dding docs for DepthOfCoverage and ValidationAmplicons

---
 .../coverage/DepthOfCoverageWalker.java       | 46 ++++++++++--
 .../validation/ValidationAmplicons.java       | 72 ++++++++++++++++---
 2 files changed, 104 insertions(+), 14 deletions(-)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
index 90036407f..7fe16c9df 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
@@ -51,14 +51,48 @@ import java.io.PrintStream;
 import java.util.*;
 
 /**
- * A parallelizable walker designed to quickly aggregate relevant coverage statistics across samples in the input
- * file. Assesses the mean and median granular coverages of each sample, and generates part of a cumulative
- * distribution of % bases and % targets covered for certain depths. The granularity of DOC can be set by command
- * line arguments.
+ * Toolbox for assessing sequence coverage by a wide array of metrics, partitioned by sample, read group, or library
  *
+ * <p>
+ * DepthOfCoverage processes a set of bam files to determine coverage at different levels of partitioning and
+ * aggregation. Coverage can be analyzed per locus, per interval, per gene, or in total; can be partitioned by
+ * sample, by read group, by technology, by center, or by library; and can be summarized by mean, median, quartiles,
+ * and/or percentage of bases covered to or beyond a threshold.
+ * Additionally, reads and bases can be filtered by mapping or base quality score.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * One or more bam files (with proper headers) to be analyzed for coverage statistics
+ * (Optional) A REFSEQ Rod to aggregate coverage to the gene level
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
+ *  - no suffix: per locus coverage
+ *  - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
+ *  - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
+ *  - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
+ *  - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
+ *  - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
+ *  - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
+ *  - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
+ *  - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantEval \
+ *   -o file_name_base \
+ *   -I input_bams.list
+ *   [-geneList refSeq.sorted.txt] \
+ *   [-pt readgroup] \
+ *   [-ct 4 -ct 6 -ct 10] \
+ *   [-L my_capture_genes.interval_list]
+ * </pre>
  *
- * @Author chartl
- * @Date Feb 22, 2010
  */
 // todo -- cache the map from sample names to means in the print functions, rather than regenerating each time
 // todo -- support for granular histograms for total depth; maybe n*[start,stop], bins*sqrt(n)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index 61149e5d9..cd2891874 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -30,21 +30,77 @@ import java.util.LinkedList;
 import java.util.List;
 
 /**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 6/13/11
- * Time: 2:12 PM
- * To change this template use File | Settings | File Templates.
+ * Creates FASTA sequences for use in Seqenom or PCR utilities for site amplification and subsequent validation
+ *
+ * <p>
+ * ValidationAmplicons consumes a VCF and an Interval list and produces FASTA sequences from which PCR primers or probe
+ * sequences can be designed. In addition, ValidationAmplicons uses BWA to check for specificity of tracts of bases within
+ * the output amplicon, lower-casing non-specific tracts, allows for users to provide sites to mask out, and specifies
+ * reasons why the site may fail validation (nearby variation, for example).
+ * </p>
+ *
+ * <h2>Input</h2>
+ * <p>
+ * Requires a VCF containing alleles to design amplicons towards, a VCF of variants to mask out of the amplicons, and an
+ * interval list defining the size of the amplicons around the sites to be validated
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Output is a FASTA-formatted file with some modifications at probe sites. For instance:
+ * <pre>
+ * >20:207414 INSERTION=1,VARIANT_TOO_NEAR_PROBE=1, 20_207414
+ * CCAACGTTAAGAAAGAGACATGCGACTGGGTgcggtggctcatgcctggaaccccagcactttgggaggccaaggtgggc[A/G*]gNNcacttgaggtcaggagtttgagaccagcctggccaacatggtgaaaccccgtctctactgaaaatacaaaagttagC
+ * >20:792122 Valid 20_792122
+ * TTTTTTTTTagatggagtctcgctcttatcgcccaggcNggagtgggtggtgtgatcttggctNactgcaacttctgcct[-/CCC*]cccaggttcaagtgattNtcctgcctcagccacctgagtagctgggattacaggcatccgccaccatgcctggctaatTT
+ * >20:994145 Valid 20_994145
+ * TCCATGGCCTCCCCCTGGCCCACGAAGTCCTCAGCCACCTCCTTCCTGGAGGGCTCAGCCAAAATCAGACTGAGGAAGAAG[AAG/-*]TGGTGGGCACCCACCTTCTGGCCTTCCTCAGCCCCTTATTCCTAGGACCAGTCCCCATCTAGGGGTCCTCACTGCCTCCC
+ * >20:1074230 SITE_IS_FILTERED=1, 20_1074230
+ * ACCTGATTACCATCAATCAGAACTCATTTCTGTTCCTATCTTCCACCCACAATTGTAATGCCTTTTCCATTTTAACCAAG[T/C*]ACTTATTATAtactatggccataacttttgcagtttgaggtatgacagcaaaaTTAGCATACATTTCATTTTCCTTCTTC
+ * >20:1084330 DELETION=1, 20_1084330
+ * CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
+ *</pre>
+ * are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
+ *
+ * Valid                     // amplicon is valid
+ * SITE_IS_FILTERED=1        // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
+ * VARIANT_TOO_NEAR_PROBE=1  // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
+ * MULTIPLE_PROBES=1,        // multiple variants to be validated found inside the same amplicon
+ * DELETION=6,INSERTION=5,   // 6 deletions and 5 insertions found inside the amplicon region (from the "mask" VCF), will be potentially difficult to validate
+ * DELETION=1,               // deletion found inside the amplicon region, could shift mass-spec peak
+ * START_TOO_CLOSE,          // variant is too close to the start of the amplicon region to give sequenom a good chance to find a suitable primer
+ * END_TOO_CLOSE,            // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
+ * NO_VARIANTS_FOUND,        // no variants found within the amplicon region
+ * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre></pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ValidationAmplicons
+ *      -R /humgen/1kg/reference/human_g1k_v37.fasta
+ *      -BTI ProbeIntervals
+ *      -ProbeIntervals:table interval_table.table
+ *      -ValidateAlleles:vcf sites_to_validate.vcf
+ *      -MaskAlleles:vcf mask_sites.vcf
+ *      --virtualPrimerSize 30
+ *      -o probes.fasta
+ * </pre>
+ *
+ * @author chartl
+ * @since July 2011
  */
 @Requires(value={DataSource.REFERENCE})
 public class ValidationAmplicons extends RodWalker<Integer,Integer> {
-    @Input(fullName = "ProbeIntervals", doc="Chris document me", required=true)
+    @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+
+                                            "intervals surrounding the probe sites amplicons should be designed for", required=true)
     RodBinding<TableFeature> probeIntervals;
 
-    @Input(fullName = "ValidateAlleles", doc="Chris document me", required=true)
+    @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true)
     RodBinding<VariantContext> validateAlleles;
 
-    @Input(fullName = "MaskAlleles", doc="Chris document me", required=true)
+    @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true)
     RodBinding<VariantContext> maskAlleles;
 
 

From 09d099cadaa49b7eff1450ac1dd901e8a1ba6195 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Thu, 18 Aug 2011 20:57:02 -0400
Subject: [PATCH 4/4] Added GATKDocs to the UnifiedGenotyper.

---
 .../AlleleFrequencyCalculationModel.java      |  2 +
 .../GenotypeLikelihoodsCalculationModel.java  |  2 +
 .../genotyper/UnifiedArgumentCollection.java  | 41 +++++++--
 .../walkers/genotyper/UnifiedGenotyper.java   | 83 +++++++++++++++++--
 .../genotyper/UnifiedGenotyperEngine.java     |  3 +
 .../recalibration/CountCovariatesWalker.java  |  6 ++
 .../TableRecalibrationWalker.java             |  2 +
 .../VariantRecalibrator.java                  |  4 +
 8 files changed, 129 insertions(+), 14 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
index 83a8ce7d7..70f3c6a1a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@@ -44,7 +44,9 @@ import java.util.Set;
 public abstract class AlleleFrequencyCalculationModel implements Cloneable {
 
     public enum Model {
+        /** The default model with the best performance in all cases */
         EXACT,
+        /** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */
         GRID_SEARCH
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index 594c1dd28..60dfe4fe7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -53,7 +53,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
     }
 
     public enum GENOTYPING_MODE {
+        /** the default; the Unified Genotyper will choose the most likely alternate allele */
         DISCOVERY,
+        /** only the alleles passed in from a VCF rod bound to the -alleles argument will be used for genotyping */
         GENOTYPE_GIVEN_ALLELES
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 1a76bfd07..e7f89bf08 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -36,31 +36,54 @@ import java.io.File;
 
 public class UnifiedArgumentCollection {
 
-    // control the various models to be used
     @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
     public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
 
+    /**
+     * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
+     */
     @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
     public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
 
+    /**
+     * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
+     * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
+     */
     @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
     public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
 
     @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
     public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
 
+    /**
+     * Specifies how to determine the alternate allele to use for genotyping
+     */
     @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
     public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
 
     @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
     public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
 
+    /**
+     * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
+     * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
+     * is the default). Note that the confidence (QUAL) values for multi-sample low-pass (e.g. 4x per sample) calling might
+     * be significantly smaller with the new EXACT model than with our older GRID_SEARCH model, as the latter tended to
+     * over-estimate the confidence; for low-pass calling we tend to use much smaller thresholds (e.g. 4).
+     */
     @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
     public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
 
+    /**
+     * the minimum phred-scaled Qscore threshold to emit low confidence calls. Genotypes with confidence >= this but less
+     * than the calling threshold are emitted but marked as filtered.
+     */
     @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
     public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
 
+    /**
+     * This argument is not enabled by default because it increases the runtime by an appreciable amount.
+     */
     @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
     public boolean COMPUTE_SLOD = false;
 
@@ -80,7 +103,6 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "abort_at_too_much_coverage", doc = "Don't call a site if the downsampled coverage is greater than this value", required = false)
     public int COVERAGE_AT_WHICH_TO_ABORT = -1;
 
-
     // control the various parameters to be used
     @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
     public int MIN_BASE_QUALTY_SCORE = 17;
@@ -91,11 +113,17 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
     public Double MAX_DELETION_FRACTION = 0.05;
 
-
     // indel-related arguments
+    /**
+     * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
+     * Decreasing this value will increase sensitivity but at the cost of larger calling time and a larger number of false positives.
+     */
     @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
     public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
 
+    /**
+     * This argument informs the prior probability of having an indel at a site.
+     */
     @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
     public double INDEL_HETEROZYGOSITY = 1.0/8000;
 
@@ -126,22 +154,23 @@ public class UnifiedArgumentCollection {
     @Hidden
     @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
     public boolean OUTPUT_DEBUG_INDEL_INFO = false;
+
     @Hidden
     @Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false)
     public boolean dovit = false;
+
     @Hidden
     @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
     public boolean GSA_PRODUCTION_ONLY = false;
+
     @Hidden
- 
     @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
     public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
 
     @Hidden
-     @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
+    @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
     public boolean IGNORE_SNP_ALLELES = false;
 
-
     @Deprecated
     @Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
     private Boolean ALL_BASES_DEPRECATED = false;   
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index d31bb6fb9..8d2101d8f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -45,11 +45,71 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.PrintStream;
 import java.util.*;
 
-
 /**
- * A variant caller which unifies the approaches of several disparate callers.  Works for single-sample and
- * multi-sample data.  The user can choose from several different incorporated calculation models.
+ * A variant caller which unifies the approaches of several disparate callers -- Works for single-sample and multi-sample data.
+ *
+ * <p>
+ * The GATK Unified Genotyper is a multiple-sample, technology-aware SNP and indel caller. It uses a Bayesian genotype
+ * likelihood model to estimate simultaneously the most likely genotypes and allele frequency in a population of N samples,
+ * emitting an accurate posterior probability of there being a segregating variant allele at each locus as well as for the
+ * genotype of each sample. The system can either emit just the variant sites or complete genotypes (which includes
+ * homozygous reference calls) satisfying some phred-scaled confidence value. The genotyper can make accurate calls on
+ * both single sample data and multi-sample data.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * The read data from which to make variant calls.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A raw, unfiltered, highly specific callset in VCF format.
+ * </p>
+ *
+ * <h2>Example generic command for multi-sample SNP calling</h2>
+ * <pre>
+ * java -jar GenomeAnalysisTK.jar \
+ *   -R resources/Homo_sapiens_assembly18.fasta \
+ *   -T UnifiedGenotyper \
+ *   -I sample1.bam [-I sample2.bam ...] \
+ *   --dbsnp dbSNP.vcf \
+ *   -o snps.raw.vcf \
+ *   -stand_call_conf [50.0] \
+ *   -stand_emit_conf 10.0 \
+ *   -dcov [50] \
+ *   [-L targets.interval_list]
+ * </pre>
+ *
+ * <p>
+ * The above command will call all of the samples in your provided BAM files [-I arguments] together and produce a VCF file
+ * with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle. Several
+ * arguments have parameters that should be chosen based on the average coverage per sample in your data. See the detailed
+ * argument descriptions below.
+ * </p>
+ *
+ * <h2>Example command for generating calls at all sites</h2>
+ * <pre>
+ * java -jar /path/to/GenomeAnalysisTK.jar \
+ *   -l INFO \
+ *   -R resources/Homo_sapiens_assembly18.fasta \
+ *   -T UnifiedGenotyper \
+ *   -I /DCC/ftp/pilot_data/data/NA12878/alignment/NA12878.SLX.maq.SRP000031.2009_08.bam \
+ *   -o my.vcf \
+ *   --output_mode EMIT_ALL_SITES
+ * </pre>
+ *
+ * <h2>Caveats</h2>
+ * <ul>
+ * <li>The system is under active and continuous development. All outputs, the underlying likelihood model, arguments, and
+ * file formats are likely to change.</li>
+ * <li>The system can be very aggressive in calling variants. In the 1000 genomes project for pilot 2 (deep coverage of ~35x)
+ * we expect the raw Qscore > 50 variants to contain at least ~10% FP calls. We use extensive post-calling filters to eliminate
+ * most of these FPs. Variant Quality Score Recalibration is a tool to perform this filtering.</li>
+ * <li>We only handle diploid genotypes</li>
+ * </ul>
+ *
  */
+
 @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
 @ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
 @Reference(window=@Window(start=-200,stop=200))
@@ -61,10 +121,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
 
     /**
-      * A dbSNP VCF file from which to annotate.
-      *
-      * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
-      */
+     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
+     * dbSNP is not used in any way for the calculations themselves.
+     */
     @ArgumentCollection
     protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
     public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
@@ -72,7 +131,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
     public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
 
-    // control the output
+    /**
+     * A raw, unfiltered, highly specific callset in VCF format.
+     */
     @Output(doc="File to which variants should be written",required=true)
     protected VCFWriter writer = null;
 
@@ -82,9 +143,15 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
     @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false)
     protected PrintStream metricsWriter = null;
 
+    /**
+     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
+     */
     @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
     protected List<String> annotationsToUse = new ArrayList<String>();
 
+    /**
+     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
+     */
     @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
     protected String[] annotationClassesToUse = { "Standard" };
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index b3f77fc06..06455df6d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -51,8 +51,11 @@ public class UnifiedGenotyperEngine {
     public static final String LOW_QUAL_FILTER_NAME = "LowQual";
 
     public enum OUTPUT_MODE {
+        /** the default */
         EMIT_VARIANTS_ONLY,
+        /** include confident reference sites */
         EMIT_ALL_CONFIDENT_SITES,
+        /** any callable site regardless of confidence */
         EMIT_ALL_SITES
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index 5ffc61fe3..838842869 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -68,6 +68,8 @@ import java.util.Map;
  *
  * <h2>Input</h2>
  * <p>
+ * The input read data whose base quality scores need to be assessed.
+ * <p>
  * A database of known polymorphic sites to skip over.
  * </p>
  *
@@ -134,6 +136,10 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
     @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
     private boolean LIST_ONLY = false;
+
+    /**
+     * See the -list argument to view available covariates.
+     */
     @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
     private String[] COVARIATES = null;
     @Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index 85166d30d..174e810c2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -66,6 +66,8 @@ import java.util.regex.Pattern;
  *
  * <h2>Input</h2>
  * <p>
+ * The input read data whose base quality scores need to be recalibrated.
+ * <p>
  * The recalibration table file in CSV format that was generated by the CountCovariates walker.
  * </p>
  *
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index d81a57aad..517c2362a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -157,6 +157,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
      */
     @Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required=false)
     private double TARGET_TITV = 2.15;
+
+    /**
+     * See the input VCF file's INFO field for a list of all available annotations.
+     */
     @Argument(fullName="use_annotation", shortName="an", doc="The names of the annotations which should used for calculations", required=true)
     private String[] USE_ANNOTATIONS = null;