From a6f632874bd04c383e9172e61a6d312af5e74c4c Mon Sep 17 00:00:00 2001
From: Geraldine Van der Auwera <vdauwera@broadinstitute.org>
Date: Wed, 21 May 2014 12:35:22 -0400
Subject: [PATCH] Various documentation improvements

- Edited intervals merging docs for correctness & clarity
- Edited VQSR arg docs and made mode required (+added -mode SNP to VQSR tests)
- Moved PaperGenotyper to Toy Walkers to declutter the actually useful docs
- Moved GenotypeGVCFs to Variant Discovery category and clarified a few points
- Clarified that the -resource argument depends on using the -V:tag format
- Clarified how the pcr indel model works
- Added caveat for -U ALLOW_N_CIGAR_READS
- Added MathJax support for displaying equations in GATKDocs
- Updated HC example commands and caveats
---
 .../haplotypecaller/HaplotypeCaller.java      | 102 ++++++++++++++++--
 ...VariantRecalibratorArgumentCollection.java |  54 ++++++++--
 .../walkers/variantutils/GenotypeGVCFs.java   |  11 +-
 ...ntRecalibrationWalkersIntegrationTest.java |   6 ++
 .../arguments/GATKArgumentCollection.java     |   2 +-
 .../walkers/annotator/VariantAnnotator.java   |  54 ++++++----
 .../walkers/examples/GATKPaperGenotyper.java  |   2 +-
 .../IntervalArgumentCollection.java           |   9 +-
 settings/helpTemplates/common.html            |   8 ++
 9 files changed, 198 insertions(+), 50 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
index 000671fd3..ad77615bc 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
@@ -101,8 +101,38 @@ import java.io.PrintStream;
 import java.util.*;
 
 /**
- * Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. Haplotypes are evaluated using an affine gap penalty Pair HMM.
+ * Call SNPs and indels simultaneously via local re-assembly of haplotypes in an active region.
  *
+ * <p>The basic operation of the HaplotypeCaller proceeds as follows:   </p>
+ *
+ * <br />
+ * <h4>1. Define active regions </h4>
+ *
+ * <p>The program determines which regions of the genome it needs to operate on, based on the presence of significant
+ * evidence for variation.</p>
+ *
+ * <br />
+ * <h4>2. Determine haplotypes by re-assembly of the active region </h4>
+ *
+ * <p>For each ActiveRegion, the program builds a De Bruijn-like graph to reassemble the ActiveRegion, and identifies
+ * what are the possible haplotypes present in the data. The program then realigns each haplotype against the reference
+ * haplotype using the Smith-Waterman algorithm in order to identify potentially variant sites. </p>
+ *
+ * <br />
+ * <h4>3. Determine likelihoods of the haplotypes given the read data </h4>
+ *
+ * <p>For each ActiveRegion, the program performs a pairwise alignment of each read against each haplotype using the
+ * PairHMM algorithm. This produces a matrix of likelihoods of haplotypes given the read data. These likelihoods are
+ * then marginalized to obtain the likelihoods of alleles for each potentially variant site given the read data.   </p>
+ *
+ * <br />
+ * <h4>4. Assign sample genotypes </h4>
+ *
+ * <p>For each potentially variant site, the program applies Bayes’ rule, using the likelihoods of alleles given the
+ * read data to calculate the likelihoods of each genotype per sample given the read data observed for that
+ * sample. The most likely genotype is then assigned to the sample.    </p>
+ *
+ * <br />
  * <h3>Input</h3>
  * <p>
  * Input bam file(s) from which to make calls
@@ -114,23 +144,71 @@ import java.util.*;
  * </p>
  *
  * <h3>Examples</h3>
+ *
+ * <p>These are example commands that show how to run HaplotypeCaller for typical use cases. Square brackets ("[ ]")
+ * indicate optional arguments. Note that parameter values shown here may not be the latest recommended; see the
+ * Best Practices documentation for detailed recommendations. </p>
+ *
+ * <br />
+ * <h4>Single-sample all-sites calling on DNAseq (for GVCF-based cohort analysis workflow)</h4>
+ * <p>
+ * <pre>
+ *   java
+ *     -jar GenomeAnalysisTK.jar
+ *     -T HaplotypeCaller
+ *     -R reference/human_g1k_v37.fasta
+ *     -I sample1.bam \
+ *     --emitRefConfidence GVCF \
+ *     --variant_index_type LINEAR \
+ *     --variant_index_parameter 128000
+ *     [--dbsnp dbSNP.vcf] \
+ *     [-L targets.interval_list] \
+ *     -o output.raw.snps.indels.g.vcf
+ * </pre>
+ * </p>
+ *
+ * <h4>Variant-only calling on DNAseq</h4>
+ * <p>
  * <pre>
  *   java
  *     -jar GenomeAnalysisTK.jar
  *     -T HaplotypeCaller
  *     -R reference/human_g1k_v37.fasta
  *     -I sample1.bam [-I sample2.bam ...] \
- *     --dbsnp dbSNP.vcf \
- *     -stand_call_conf [50.0] \
- *     -stand_emit_conf 10.0 \
- *     [-L targets.interval_list]
+ *     [--dbsnp dbSNP.vcf] \
+ *     [-stand_call_conf 30] \
+ *     [-stand_emit_conf 10] \
+ *     [-L targets.interval_list] \
  *     -o output.raw.snps.indels.vcf
  * </pre>
+ * </p>
+ *
+ * <h4>Variant-only calling on RNAseq</h4>
+ * <p>
+ * <pre>
+ *   java
+ *     -jar GenomeAnalysisTK.jar
+ *     -T HaplotypeCaller
+ *     -R reference/human_g1k_v37.fasta
+ *     -I sample1.bam \
+ *     -recoverDanglingHeads \
+ *     -dontUseSoftClippedBases \
+ *     [--dbsnp dbSNP.vcf] \
+ *     -stand_call_conf 20 \
+ *     -stand_emit_conf 20 \
+ *     -o output.raw.snps.indels.vcf
+ * </pre>
+ * </p>
  *
  * <h3>Caveats</h3>
  * <ul>
- * <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li>
- * <li>Currently the -ploidy parameter only support the default 2 (diploid). Eventually one will be able to change its value for haploid and polyploid analyses.</li>
+ * <li>Currently the -ploidy parameter only support the default 2 (diploid). Eventually it will be possible to change
+ * its value in order to analyze data from non-diploid organisms.</li>
+ * <li>We have not yet fully tested the interaction between the GVCF-based calling or the multisample calling and the
+ * RNAseq-specific functionalities.
+ * Use those in combination at your own risk.</li>
+ * <li>Many users have reported issues running HaplotypeCaller with the -nct argument, so we recommend using Queue to
+ * parallelize HaplotypeCaller instead of multithreading.</li>
  * </ul>
  *
  * @author rpoplin
@@ -473,9 +551,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
     protected int minObservationsForKmerToBeSolid = 20;
 
     /**
-     * Which PCR indel error model should we use when calculating likelihoods?  If NONE is selected, then the default base
-     * insertion/deletion qualities will be used (or taken from the read if generated through the BaseRecalibrator).
-     * VERY IMPORTANT: when using PCR-free sequencing data we definitely recommend setting this argument to NONE.
+     * When calculating the likelihood of variants, we can try to correct for PCR errors that cause indel artifacts.
+     * The correction is based on the reference context, and acts specifically around repetitive sequences that tend
+     * to cause PCR errors). The variant likelihoods are penalized in increasing scale as the context around a
+     * putative indel is more repetitive (e.g. long homopolymer). The correction can be disabling by specifying
+     * '-pcrModel NONE'; in that case the default base insertion/deletion qualities will be used (or taken from the
+     * read if generated through the BaseRecalibrator). <b>VERY IMPORTANT: when using PCR-free sequencing data we
+     * definitely recommend setting this argument to NONE</b>.
      */
     @Advanced
     @Argument(fullName = "pcr_indel_model", shortName = "pcrModel", doc = "The PCR indel model to use", required = false)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
index 800e93a87..52a311511 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
@@ -71,28 +71,52 @@ public class VariantRecalibratorArgumentCollection {
         if( input.equals("BOTH") ) { return Mode.BOTH; }
         throw new ReviewedGATKException("VariantRecalibrator mode string is unrecognized, input = " + input);
     }
-
-    @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels (emitting SNPs untouched in the output VCF); and 3.) BOTH for recalibrating both SNPs and indels simultaneously (for testing purposes only, not recommended for general use).", required = false)
+    /**
+     * Use either SNP for recalibrating only SNPs (emitting indels untouched in the output VCF) or INDEL for indels (emitting SNPs untouched in the output VCF). There is also a BOTH option for recalibrating both SNPs and indels simultaneously, but this is meant for testing purposes only and should not be used in actual analyses.
+     */
+    @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ", required = true)
     public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
 
+    /**
+     * This parameter determines the maximum number of Gaussians that should be used when building a positive model
+     * using the variational Bayes algorithm.
+     */
     @Advanced
-    @Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians for the positive model to try during variational Bayes algorithm.", required=false)
+    @Argument(fullName="maxGaussians", shortName="mG", doc="Max number of Gaussians for the positive model", required=false)
     public int MAX_GAUSSIANS = 8;
 
+    /**
+     * This parameter determines the maximum number of Gaussians that should be used when building a negative model
+     * using the variational Bayes algorithm. The actual maximum used is the smaller value between the mG and mNG
+     * arguments, meaning that if -mG is smaller than -mNG, -mG will be used for both. Note that this number should
+     * be small (e.g. 4) to achieve the best results.
+     */
     @Advanced
-    @Argument(fullName="maxNegativeGaussians", shortName="mNG", doc="The maximum number of Gaussians for the negative model to try during variational Bayes algorithm.  The actual maximum used is the min of the mG and mNG arguments.  Note that this number should be small (like 4) to achieve the best results", required=false)
+    @Argument(fullName="maxNegativeGaussians", shortName="mNG", doc="Max number of Gaussians for the negative model", required=false)
     public int MAX_GAUSSIANS_FOR_NEGATIVE_MODEL = 2;
 
+    /**
+     * This parameter determines the maximum number of VBEM iterations to be performed in the variational Bayes algorithm.
+     * The procedure will normally end when convergence is detected.
+     */
     @Advanced
-    @Argument(fullName="maxIterations", shortName="mI", doc="The maximum number of VBEM iterations to be performed in variational Bayes algorithm. Procedure will normally end when convergence is detected.", required=false)
+    @Argument(fullName="maxIterations", shortName="mI", doc="Maximum number of VBEM iterations", required=false)
     public int MAX_ITERATIONS = 150;
 
+    /**
+     * This parameter determines the number of k-means iterations to perform in order to initialize the means of
+     * the Gaussians in the Gaussian mixture model.
+     */
     @Advanced
-    @Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false)
+    @Argument(fullName="numKMeans", shortName="nKM", doc="Number of k-means iterations", required=false)
     public int NUM_KMEANS_ITERATIONS = 100;
 
+    /**
+     * If a variant has annotations more than -std standard deviations away from mean, it won't be used for building
+     * the Gaussian mixture model.
+     */
     @Advanced
-    @Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false)
+    @Argument(fullName="stdThreshold", shortName="std", doc="Annotation value divergence threshold (number of standard deviations from the means) ", required=false)
     public double STD_THRESHOLD = 10.0;
 
     @Advanced
@@ -107,16 +131,26 @@ public class VariantRecalibratorArgumentCollection {
     @Argument(fullName="priorCounts", shortName="priorCounts", doc="The number of prior counts to use in the variational Bayes algorithm.", required=false)
     public double PRIOR_COUNTS = 20.0;
 
+    /**
+     * The number of variants to use in building the Gaussian mixture model. Training sets larger than this will be randomly downsampled.
+     */
     @Advanced
-    @Argument(fullName="maxNumTrainingData", shortName="maxNumTrainingData", doc="Maximum number of training data to be used in building the Gaussian mixture model. Training sets large than this will be randomly downsampled.", required=false)
+    @Argument(fullName="maxNumTrainingData", shortName="maxNumTrainingData", doc="Maximum number of training data", required=false)
     protected int MAX_NUM_TRAINING_DATA = 2500000;
 
+    /**
+     * This parameter determines the minimum number of variants that will be selected from the list of worst scoring
+     * variants to use for building the Gaussian mixture model of bad variants.
+     */
     @Advanced
-    @Argument(fullName="minNumBadVariants", shortName="minNumBad", doc="The minimum number of worst scoring variants to use when building the Gaussian mixture model of bad variants.", required=false)
+    @Argument(fullName="minNumBadVariants", shortName="minNumBad", doc="Minimum number of bad variants", required=false)
     public int MIN_NUM_BAD_VARIANTS = 1000;
 
+    /**
+     * Variants scoring lower than this threshold will be used to build the Gaussian model of bad variants.
+     */
     @Advanced
-    @Argument(fullName="badLodCutoff", shortName="badLodCutoff", doc="The LOD score below which to be used when building the Gaussian mixture model of bad variants.", required=false)
+    @Argument(fullName="badLodCutoff", shortName="badLodCutoff", doc="LOD score cutoff for selecting bad variants", required=false)
     public double BAD_LOD_CUTOFF = -5.0;
 
     /////////////////////////////
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
index 47602b064..2374e06f2 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java
@@ -78,14 +78,14 @@ import java.util.*;
  * Genotypes any number of gVCF files that were produced by the Haplotype Caller into a single joint VCF file.
  *
  * <p>
- * GenotypeGVCFs merges gVCF records that were produced as part of the "single sample discovery" pipeline using
- * the '-ERC GVCF' mode of the Haplotype Caller.  This tool performs the multi-sample joint aggregation
+ * GenotypeGVCFs merges gVCF records that were produced as part of the reference model-based variant discovery pipeline (see documentation for more details) using
+ * the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode of the HaplotypeCaller.  This tool performs the multi-sample joint aggregation
  * step and merges the records together in a sophisticated manner.
  *
  * At all positions of the target, this tool will combine all spanning records, produce correct genotype likelihoods,
  * re-genotype the newly merged record, and then re-annotate it.
  *
- * Note that this tool cannot work with just any gVCF files - they must have been produced with the Haplotype Caller,
+ * Note that this tool cannot work with just any gVCF files - they must have been produced with the HaplotypeCaller,
  * which uses a sophisticated reference model to produce accurate genotype likelihoods for every position in the target.
  *
  * <h3>Input</h3>
@@ -109,7 +109,7 @@ import java.util.*;
  * </pre>
  *
  */
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
 @Reference(window=@Window(start=-10,stop=10))
 public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWriter> implements AnnotatorCompatible, TreeReducible<VariantContextWriter> {
 
@@ -137,8 +137,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
     protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"InbreedingCoeff", "FisherStrand", "QualByDepth", "ChromosomeCounts", "GenotypeSummaries"}));
 
     /**
-     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
-     * dbSNP is not used in any way for the calculations themselves.
+     * The rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate. Note that dbSNP is not used in any way for the calculations themselves.
      */
     @ArgumentCollection
     protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
index ddacc93af..4cce675fd 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@@ -124,6 +124,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " -an QD -an HaplotypeScore -an HRun" +
                         " --trustAllPolymorphic" + // for speed
                         " -recalFile %s" +
+                        " -mode SNP" +
                         " -tranchesFile %s",
                 Arrays.asList(params.recalMD5, params.tranchesMD5));
         final List<File> outputFiles = executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
@@ -139,6 +140,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " --no_cmdline_in_header" +
                         " -input " + params.inVCF +
                         " -U LENIENT_VCF_PROCESSING -o %s" +
+                        " -mode SNP" +
                         " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
                         " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
                 Arrays.asList(params.cutVCFMD5));
@@ -163,6 +165,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " -an QD -an HaplotypeScore -an MQ" +
                         " --trustAllPolymorphic" + // for speed
                         " -recalFile %s" +
+                        " -mode SNP" +
                         " -tranchesFile %s",
                 Arrays.asList(params.recalMD5, params.tranchesMD5));
         final List<File> outputFiles = executeTest("testVariantRecalibratorAggregate-"+params.inVCF, spec).getFirst();
@@ -178,6 +181,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " --no_cmdline_in_header" +
                         " -input " + params.inVCF +
                         " -U LENIENT_VCF_PROCESSING -o %s" +
+                        " -mode SNP" +
                         " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
                         " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
                 Arrays.asList(params.cutVCFMD5));
@@ -212,6 +216,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value
                         " -mG 2 "+
                         " -recalFile %s" +
+                        " -mode SNP" +
                         " -tranchesFile %s",
                 2,
                 Arrays.asList("bcf", "txt"),
@@ -229,6 +234,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                         " --no_cmdline_in_header" +
                         " -input " + params.inVCF +
                         " -U LENIENT_VCF_PROCESSING -o %s" +
+                        " -mode SNP" +
                         " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
                         " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
                 Arrays.asList(params.cutVCFMD5));
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
index f13936a1e..b88db9d13 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
@@ -386,7 +386,7 @@ public class GATKArgumentCollection {
     public File sampleRenameMappingFile = null;
 
     /**
-     * For expert users only who know what they are doing. We do not support usage of this argument, so we may refuse to help you if you use it and something goes wrong.
+     * For expert users only who know what they are doing. We do not support usage of this argument, so we may refuse to help you if you use it and something goes wrong. The one exception to this rule is ALLOW_N_CIGAR_READS, which is necessary for RNAseq analysis.
      */
     @Argument(fullName = "unsafe", shortName = "U", doc = "Enable unsafe operations: nothing will be checked at runtime", required = false)
     public ValidationExclusion.TYPE unsafe;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotator.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotator.java
index 51816d6b9..ed0d39761 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotator.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotator.java
@@ -92,7 +92,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
     protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
 
     /**
-     * The INFO field will be annotated with information on the most biologically-significant effect
+     * The INFO field will be annotated with information on the most biologically significant effect
      * listed in the SnpEff output file for each variant.
      */
     @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
@@ -107,9 +107,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
     public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
 
     /**
-      * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated
-      *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).  Records that are filtered in the comp track will be ignored.
-      *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
+      * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be
+      * annotated as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
+      * Records that are filtered in the comp track will be ignored. Note that 'dbSNP' has been special-cased
+      * (see the --dbsnp argument).
       */
     @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
     public List<RodBinding<VariantContext>> comps = Collections.emptyList();
@@ -118,12 +119,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
     /**
       * An external resource VCF file or files from which to annotate.
       *
-      * One can add annotations from one of the resource VCFs to the output.
-      * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource',
-      * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position.
-      * If multiple records in the rod overlap the given position, one is chosen arbitrarily.
+      * Use this option to add annotations from a resource file to the output.
+      * For example, if you want to annotate your callset with the AC field value from a VCF file named
+      * 'resource_file.vcf', you tag it with '-resource:my_resource resource_file.vcf' and you additionally specify
+      * '-E my_resource.AC' (-E is short for --expression, also documented on this page). In the resulting output
+      * VCF, any records for which there is a record at the same position in the resource file will be annotated with
+      * 'my_resource.AC=N'. Note that if there are multiple records in the resource file that overlap the given
+      * position, one is chosen randomly.
       */
-    @Input(fullName="resource", shortName = "resource", doc="external resource VCF file", required=false)
+    @Input(fullName="resource", shortName = "resource", doc="External resource VCF file", required=false)
     public List<RodBinding<VariantContext>> resources = Collections.emptyList();
     public List<RodBinding<VariantContext>> getResourceRodBindings() { return resources; }
 
@@ -144,8 +148,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
     protected List<String> annotationsToExclude = new ArrayList<>();
 
     /**
-     * If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
-     * Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs.
+     * If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument
+     * to view available groups. Keep in mind that RODRequiringAnnotations are not intended to be used as a group,
+     * because they require specific ROD inputs.
      */
     @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
     protected List<String> annotationGroupsToUse = new ArrayList<>();
@@ -153,29 +158,40 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
     /**
      * This option enables you to add annotations from one VCF to another.
      *
-     * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource',
-     * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position.
-     * If multiple records in the rod overlap the given position, one is chosen arbitrarily.
+     * For example, if you want to annotate your callset with the AC field value from a VCF file named
+     * 'resource_file.vcf', you tag it with '-resource:my_resource resource_file.vcf' (see the -resource argument, also
+     * documented on this page) and you specify '-E my_resource.AC'. In the resulting output VCF, any records for
+     * which there is a record at the same position in the resource file will be annotated with 'my_resource.AC=N'.
+     * Note that if there are multiple records in the resource file that overlap the given position, one is chosen
+     * randomly.
      */
-    @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false)
+    @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls", required=false)
     protected Set<String> expressionsToUse = new ObjectOpenHashSet();
 
     /**
-     * Note that the -XL argument can be used along with this one to exclude annotations.
+     * You can use the -XL argument in combination with this one to exclude specific annotations.Note that some
+     * annotations may not be actually applied if they are not applicable to the data provided or if they are
+     * unavailable to the tool (e.g. there are several annotations that are currently not hooked up to
+     * HaplotypeCaller). At present no error or warning message will be provided, the annotation will simply be
+     * skipped silently. You can check the output VCF header to see which annotations were actually applied (although
+     * this does not guarantee that the annotation was applied to all records in the VCF, since some annotations have
+     * additional requirements, e.g. minimum number of samples or heterozygous sites only -- see the documentation
+     * for individual annotations' requirements).
      */
     @Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
     protected Boolean USE_ALL_ANNOTATIONS = false;
 
     /**
-     * Note that the --list argument requires a fully resolved and correct command-line to work. As a simpler alternative, you can use ListAnnotations (see Help Utilities).
+     * Note that the --list argument requires a fully resolved and correct command-line to work. As an alternative, you can use ListAnnotations (see Help Utilities).
      */
     @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit", required=false)
     protected Boolean LIST = false;
 
     /**
-     * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
+     * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty (not already annotated).
+     * This argument allows you to override that behavior. This is used in conjuction with the -dbsnp argument.
      */
-    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated", required=false)
+    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="Append the dbSNP ID even when the variant VCF already has the ID field populated", required=false)
     protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
     public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
 
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
index dcd5747bc..8cfc14e41 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
@@ -49,7 +49,7 @@ import java.util.Comparator;
  *
  * @author aaron
  */
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_TOY, extraDocs = {CommandLineGATK.class} )
 public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
 
     public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/commandline/IntervalArgumentCollection.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/commandline/IntervalArgumentCollection.java
index e31dca30c..717a07708 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/commandline/IntervalArgumentCollection.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/commandline/IntervalArgumentCollection.java
@@ -53,16 +53,19 @@ public class IntervalArgumentCollection {
      * (e.g. -XL myFile.intervals).
      *
      * Additionally, you can also specify a ROD file (such as a VCF file) in order to exclude specific
-     * positions from the analysis based on the records present in the file (e.g. -L file.vcf).
+     * positions from the analysis based on the records present in the file (e.g. -XL file.vcf).
      * */
     @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing", required = false)
     public List<IntervalBinding<Feature>> excludeIntervals = null;
 
     /**
      * By default, the program will take the UNION of all intervals specified using -L and/or -XL. However, you can
-     * change this setting, for example if you want to take the INTERSECTION of the sets instead. E.g. to perform the
+     * change this setting for -L, for example if you want to take the INTERSECTION of the sets instead. E.g. to perform the
      * analysis on positions for which there is a record in a VCF, but restrict this to just those on chromosome 20,
-     * you would do -L chr20 -L file.vcf -isr INTERSECTION.
+     * you would do -L chr20 -L file.vcf -isr INTERSECTION. However, it is not possible to modify the merging approach
+     * for intervals passed using -XL (they will always be merged using UNION).
+     *
+     * Note that if you specify both -L and -XL, the -XL interval set will be subtracted from the -L interval set.
      */
     @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Set merging approach to use for combining interval inputs", required = false)
     public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html
index ff9df5eea..874f354ba 100644
--- a/settings/helpTemplates/common.html
+++ b/settings/helpTemplates/common.html
@@ -53,6 +53,14 @@
                 }
             </style>
 
+            <!-- MathJax / MathML support for displaying equations -->
+            <script type='text/x-mathjax-config'>
+		        MathJax.Hub.Config({
+  			        TeX: { equationNumbers: { autoNumber: 'AMS' } }
+		        });
+	        </script>
+            <script type='text/javascript' src='http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
+
         </head>
         <body>
         <div class="navbar navbar-fixed-top">