From d87345cd1da0f7adc583b4d7024f32ae33c02a4b Mon Sep 17 00:00:00 2001
From: Geraldine Van der Auwera
Date: Fri, 27 May 2016 19:47:32 -0400
Subject: [PATCH] GATKDocs overhaul
- Fixed displaying of default values
- Removed code cruft
- Reorganized tooldoc categories and improved names
- Reorganized tools within categories where applicable
- Touched up various tool docs
- Switched default gatkdocs output to html
- Added parameter in agrregator pom to control output type
- Set gatkdocs publishing script to output php
- Deprecated GenotypeAndValidate walker
- Added back PhoneHome arguments with @Deprecated annotations
---
pom.xml | 3 +-
.../tools/walkers/annotator/ExcessHet.java | 2 +-
.../tools/walkers/bqsr/AnalyzeCovariates.java | 4 +-
.../tools/walkers/bqsr/BaseRecalibrator.java | 2 +-
.../walkers/cancer/AssignSomaticStatus.java | 4 +
.../AnnotatePopulationAFWalker.java | 4 +
.../walkers/cancer/contamination/ContEst.java | 2 +-
...AssemblyBasedCallerArgumentCollection.java | 59 +-
.../walkers/phasing/PhaseByTransmission.java | 2 +-
.../walkers/phasing/ReadBackedPhasing.java | 14 +-
.../SimulateReadsForVariants.java | 2 +-
.../validation/GenotypeAndValidate.java | 551 ------------------
.../ValidationSiteSelector.java | 7 +-
.../ApplyRecalibration.java | 21 +-
.../VariantDataManager.java | 2 +-
...VariantRecalibratorArgumentCollection.java | 3 +-
.../CalculateGenotypePosteriors.java | 2 +-
.../variantutils/RegenotypeVariants.java | 2 +-
.../gatk/engine/CommandLineGATK.java | 2 +-
.../arguments/GATKArgumentCollection.java | 126 ++--
.../filters/ReassignMappingQualityFilter.java | 2 +-
.../ReassignOneMappingQualityFilter.java | 2 +-
.../annotator/FractionInformativeReads.java | 4 +-
.../walkers/coverage/DepthOfCoverage.java | 2 +-
.../walkers/examples/GATKPaperGenotyper.java | 8 +-
.../walkers/filters/VariantFiltration.java | 2 +-
.../tools/walkers/indels/IndelRealigner.java | 2 +-
.../gatk/tools/walkers/qc/QCRef.java | 2 +-
.../tools/walkers/rnaseq/ASEReadCounter.java | 17 +-
.../walkers/varianteval/VariantEval.java | 2 +-
.../variantutils/GenotypeConcordance.java | 2 +-
.../variantutils/ValidateVariants.java | 2 +-
.../gatk/utils/DeprecatedToolChecks.java | 1 +
.../codecs/sampileup/SAMPileupCodec.java | 2 +-
.../utils/codecs/samread/SAMReadCodec.java | 2 +-
.../utils/help/DocumentedGATKFeature.java | 2 -
.../help/DocumentedGATKFeatureObject.java | 10 +-
.../gatk/utils/help/GATKDocUtils.java | 12 +-
.../gatk/utils/help/GATKDoclet.java | 20 +-
.../help/GenericDocumentationHandler.java | 21 +-
.../gatk/utils/help/HelpConstants.java | 43 +-
.../gatk/utils/help/HelpFormatter.java | 1 +
settings/helpTemplates/common.html | 35 +-
.../helpTemplates/generic.index.template.html | 31 +-
settings/helpTemplates/generic.template.html | 55 +-
45 files changed, 220 insertions(+), 876 deletions(-)
delete mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
diff --git a/pom.xml b/pom.xml
index 728c772db..3da58fc29 100644
--- a/pom.xml
+++ b/pom.xml
@@ -32,6 +32,7 @@
false
-build-timestamp "${maven.build.timestamp}"
+ html
${gatk.basedir}/public/src/main/scripts/shell
${gatk.basedir}/public/src/main/assembly
@@ -768,7 +769,7 @@
false
true
private
- -build-timestamp "${maven.build.timestamp}" -absolute-version ${build.version} ${gatkdocs.include.hidden} -settings-dir ${gatk.basedir}/settings/helpTemplates -destination-dir ${project.build.directory}/gatkdocs
+ -build-timestamp "${maven.build.timestamp}" -absolute-version ${build.version} ${gatkdocs.include.hidden} -settings-dir ${gatk.basedir}/settings/helpTemplates -destination-dir ${project.build.directory}/gatkdocs -output-file-extension ${gatkdocs.extension}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java
index 1f83f3ede..063e63560 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ExcessHet.java
@@ -79,7 +79,7 @@ import java.util.*;
/**
* Phred-scaled p-value for exact test of excess heterozygosity
*
- * This annotation estimates excess heterozygosity in a population of samples. It is related to but distinct from InbreedingCoeff, which estimates evidence for inbreeding in a population. ExcessHet scales more reliably to large cohort sizes.
+ * This annotation estimates excess heterozygosity in a population of samples. It is related to but distinct from InbreedingCoeff, which estimates evidence for inbreeding in a population. ExcessHet scales more reliably to large cohort sizes.
*
* Statistical notes
* This annotation is a one-sided phred-scaled p-value using an exact test of the Hardy-Weinberg Equilibrium. The null hypothesis is that the number of heterozygotes follows the Hardy-Weinberg Equilibrium. The p-value is the probability of getting the same or more heterozygotes as was observed, given the null hypothesis.
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java
index c4df9578a..45eb9e052 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/AnalyzeCovariates.java
@@ -78,8 +78,8 @@ import java.util.Map;
/**
* Create plots to visualize base recalibration results
*
- *
- * This tool generates plots for visualizing the quality of a recalibration run (effected by BaseRecalibrator).
+ *
+ * This tool generates plots for visualizing the quality of a recalibration run (effected by BaseRecalibrator).
*
*
* Input
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java
index 0c807a161..4205ed2d2 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/bqsr/BaseRecalibrator.java
@@ -86,7 +86,7 @@ import java.util.Arrays;
import java.util.List;
/**
- * Generate base recalibration table to compensate for systematic errors in basecalling confidences
+ * Detect systematic errors in base quality scores
*
*
* Variant calling algorithms rely heavily on the quality scores assigned to the individual base calls in each sequence
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java
index 8068e6c3a..3714ba5b4 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/AssignSomaticStatus.java
@@ -54,6 +54,7 @@ package org.broadinstitute.gatk.tools.walkers.cancer;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
+import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@@ -66,6 +67,8 @@ import org.broadinstitute.gatk.engine.SampleUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.gatk.utils.help.HelpConstants;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
@@ -78,6 +81,7 @@ import java.util.*;
/**
* Assigns somatic status to a set of calls
*/
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class}, enable = false )
public class AssignSomaticStatus extends RodWalker implements TreeReducible {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java
index 2674a72ec..604cba9d3 100755
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/AnnotatePopulationAFWalker.java
@@ -52,6 +52,9 @@
package org.broadinstitute.gatk.tools.walkers.cancer.contamination;
+import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.gatk.utils.help.HelpConstants;
+import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Input;
import org.broadinstitute.gatk.utils.commandline.Output;
@@ -78,6 +81,7 @@ import java.util.*;
/**
* Given a input VCF representing a collection of populations, split the input into each population, and annotate each record with population allele frequencies
*/
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class}, enable = false )
// @Requires(DataSource.SAMPLE) <- require the sample data when this works
public class AnnotatePopulationAFWalker extends RodWalker implements TreeReducible {
// control the output
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java
index b9160d71f..a0639181f 100755
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/contamination/ContEst.java
@@ -80,7 +80,7 @@ import java.util.*;
/**
* Estimate cross-sample contamination
*
- * This tool determine the percent contamination of an input bam by sample, by lane, or in aggregate across all the input reads.
+ * This tool determine the percent contamination of an input bam by sample, by lane, or in aggregate across all the input reads.
*
* Usage examples
* These are example commands that show how to run ContEst for typical use cases. Square brackets ("[ ]")
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java
index da2f684f1..bba75dbfe 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java
@@ -74,10 +74,12 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen
public boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
/**
- * The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
+ * The reference confidence mode makes it possible to emit variant calls in GVCF format, which includes either a per-base
+ * pair (BP_RESOLUTION) or a summarized (GVCF) confidence estimate for each position being strictly homozygous-reference.
* See http://www.broadinstitute.org/gatk/guide/article?id=2940 for more details of how this works.
- * Note that if you set -ERC GVCF, you also need to set -variant_index_type LINEAR and -variant_index_parameter 128000 (with those exact values!).
- * This requirement is a temporary workaround for an issue with index compression.
+ * Note that if you use -ERC to emit a GVCF or BP_RESOLUTION output, you either
+ * need to give the output file the extension .g.vcf or set the parameters -variant_index_type LINEAR
+ * and -variant_index_parameter 128000 (with those exact values!). This has to do with index compression.
*/
@Advanced
@Argument(fullName="emitRefConfidence", shortName="ERC", doc="Mode for emitting reference confidence scores", required = false)
@@ -89,41 +91,39 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen
}
/**
- * The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. Really
- * for debugging purposes only. Note that not every input read is emitted to the bam. To include trimmed, downsampled,
- * filtered and uninformative reads add the "--emitDroppedReads" argument.
+ * The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. This is
+ * intended to be used only for troubleshooting purposes, in specific areas where you want to better understand
+ * why the caller is making specific calls. Turning on this mode may result in serious performance cost for the
+ * caller, so we do NOT recommend using this argument systematically as it will significantly increase runtime.
*
- * Turning on this mode may result in serious performance cost for the caller. It's really only appropriate to
- * use in specific areas where you want to better understand why the caller is making specific calls.
+ * The candidate haplotypes (called or all, depending on mode) are emitted as single reads covering the entire
+ * active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase
+ * the pileup depth compared to what would be expected from the reads only, especially in complex regions.
*
* The reads are written out containing an "HC" tag (integer) that encodes which haplotype each read best matches
* according to the haplotype caller's likelihood calculation. The use of this tag is primarily intended
* to allow good coloring of reads in IGV. Simply go to "Color Alignments By > Tag" and enter "HC" to more
- * easily see which reads go with these haplotype.
+ * easily see which reads go with these haplotype. You can also tell IGV to group reads by sample, which will
+ * separate the potential haplotypes from the reads. These features are illustrated in
+ * this screenshot.
*
- * Note that the haplotypes (called or all, depending on mode) are emitted as single reads covering the entire
- * active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase the
- * pileup depth compared to what would be expected from the reads only, especially in complex regions.
+ * Note that only reads that are actually informative about the haplotypes are emitted with the HC tag.
+ * By informative we mean that there's a meaningful difference in the likelihood of the read coming from one
+ * haplotype compared to the next best haplotype. When coloring reads by HC tag in IGV, uninformative reads will
+ * remain grey.
*
- * Note also that only reads that are actually informative about the haplotypes are emitted. By informative we mean
- * that there's a meaningful difference in the likelihood of the read coming from one haplotype compared to
- * its next best haplotype.
- *
- * If multiple BAMs are passed as input to the tool (as is common for M2), then they will be combined in the bamout
- * output and tagged with the appropriate sample names.
- *
- * The best way to visualize the output of this mode is with IGV. Tell IGV to color the alignments by tag,
- * and give it the "HC" tag, so you can see which reads support each haplotype. Finally, you can tell IGV
- * to group by sample, which will separate the potential haplotypes from the reads. All of this can be seen in
- * this screenshot
+ * Note also that not every input read is emitted to the bam in this mode. To include all trimmed, downsampled,
+ * filtered and uninformative reads, add the --emitDroppedReads argument.
*
+ * If multiple BAMs are passed as input to the tool (as is common for MuTect2), then they will be combined in the
+ * -bamout output and tagged with the appropriate sample names.
*/
@Advanced
@Output(fullName="bamOutput", shortName="bamout", doc="File to which assembled haplotypes should be written", required = false, defaultToStdout = false)
public GATKSAMFileWriter bamWriter = null;
/**
- * The type of BAM output we want to see. This determines whether HC will write out all of the haplotypes it
+ * The type of -bamout output we want to see. This determines whether HC will write out all of the haplotypes it
* considered (top 128 max) or just the ones that were selected as alleles and assigned to samples.
*/
@Advanced
@@ -131,8 +131,8 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen
public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES;
/**
- * Determines whether dropped reads will be tracked and emitted when a bamout is specified. Use this in combination
- * with a specific interval of interest to avoid accumulating a large number of reads int eh bamout.
+ * Determines whether dropped reads will be tracked and emitted when -bamout is specified. Use this in combination
+ * with a specific interval of interest to avoid accumulating a large number of reads in the -bamout file.
*/
@Advanced
@Argument(fullName="emitDroppedReads", shortName="edr", doc="Emit reads that are dropped for filtering, trimming, realignment failure", required = false)
@@ -140,9 +140,10 @@ public class AssemblyBasedCallerArgumentCollection extends StandardCallerArgumen
/**
* If set, certain "early exit" optimizations in HaplotypeCaller, which aim to save compute and time by skipping
- * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be useful if
- * you're using the -bamout argument to examine the placement of reads following reassembly and are interested in seeing the mapping of
- * reads in regions with no variations. Setting the -forceActive and -dontTrimActiveRegions flags may also be necessary.
+ * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be
+ * useful if you're using the -bamout argument to examine the placement of reads following reassembly
+ * and are interested in seeing the mapping of reads in regions with no variations. Setting the -forceActive
+ * and -dontTrimActiveRegions flags may also be helpful.
*/
@Advanced
@Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Don't skip calculations in ActiveRegions with no variants",
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java
index 4da0b59cc..84fc325cb 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhaseByTransmission.java
@@ -140,7 +140,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
public class PhaseByTransmission extends RodWalker, HashMap> {
@ArgumentCollection
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java
index 1bcd851c1..19928c3fd 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/ReadBackedPhasing.java
@@ -112,8 +112,20 @@ import static org.broadinstitute.gatk.engine.GATKVCFUtils.getVCFHeadersFromRods;
*
* Output
*
- * Phased VCF file.
+ * Phased VCF file using HP tags to link alleles at (possibly non-consecutive) genotypes of the same sample.
*
+ * Example
+ *
+ * GT:GQ:HP 0/1:99:17690409-1,17690409-2
+ * GT:GQ:HP 0/1:99:17690409-2,17690409-1:1258.14
+ *
+ * The second site's alternate allele (1) is on the same physical haplotype as the first site's reference allele (0),
+ * and vice versa [second site's 0 goes with first site's 1]. This is based on the fact that the HP pairs line up in
+ * reverse order between these two genotypes.
+ * In an old notation that RBP used to output in much earlier versions, the genotypes would have been: 0/1 and 1|0,
+ * respectively. This was changed because depending on the case it caused ambiguity, incompleteness, and possible
+ * inconsistency with trio-based phasing. In contrast, the HP tag is much more explicitl for linking alleles, especially
+ * if the genotypes are non-consecutive.
*
* Usage example
*
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
index 84aac5f50..2024bb70a 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/simulatereads/SimulateReadsForVariants.java
@@ -106,7 +106,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}, gotoDev = HelpConstants.EB)
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class})
@Reference(window=@Window(start=-200,stop=200))
public class SimulateReadsForVariants extends RodWalker {
private static Logger logger = Logger.getLogger(SimulateReadsForVariants.class);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
deleted file mode 100644
index ab2f38057..000000000
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
-* By downloading the PROGRAM you agree to the following terms of use:
-*
-* BROAD INSTITUTE
-* SOFTWARE LICENSE AGREEMENT
-* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
-*
-* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 ("BROAD") and the LICENSEE and is effective at the date the downloading is completed ("EFFECTIVE DATE").
-*
-* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
-* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
-* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
-*
-* 1. DEFINITIONS
-* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
-*
-* 2. LICENSE
-* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
-* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
-* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
-* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
-*
-* 3. PHONE-HOME FEATURE
-* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system ("PHONE-HOME") which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE'S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
-*
-* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
-* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
-* Copyright 2012-2016 Broad Institute, Inc.
-* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
-* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
-*
-* 5. INDEMNIFICATION
-* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
-*
-* 6. NO REPRESENTATIONS OR WARRANTIES
-* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
-* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
-*
-* 7. ASSIGNMENT
-* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
-*
-* 8. MISCELLANEOUS
-* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
-* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
-* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
-* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
-* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
-* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
-* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
-*/
-
-package org.broadinstitute.gatk.tools.walkers.validation;
-
-import htsjdk.variant.vcf.*;
-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
-import org.broadinstitute.gatk.engine.walkers.*;
-import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
-import org.broadinstitute.gatk.utils.commandline.*;
-import org.broadinstitute.gatk.engine.CommandLineGATK;
-import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
-import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
-import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
-import org.broadinstitute.gatk.tools.walkers.genotyper.*;
-import org.broadinstitute.gatk.engine.SampleUtils;
-import org.broadinstitute.gatk.utils.help.HelpConstants;
-import org.broadinstitute.gatk.engine.GATKVCFUtils;
-import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
-import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
-import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
-import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.variantcontext.writer.VariantContextWriter;
-
-import java.util.Map;
-import java.util.Set;
-
-import static org.broadinstitute.gatk.utils.IndelUtils.isInsideExtendedIndel;
-
-/**
- * Genotype and validate a dataset and the calls of another dataset using the Unified Genotyper
- *
- * Note that this is an old tool that makes use of the UnifiedGenotyper, which has since been
- * deprecated in favor of the HaplotypeCaller.
- *
- * Genotype and Validate is a tool to evaluate the quality of a dataset for calling SNPs
- * and Indels given a secondary (validation) data source. The data sources are BAM or VCF
- * files. You can use them interchangeably (i.e. a BAM to validate calls in a VCF or a VCF
- * to validate calls on a BAM).
- *
- *
- *
- * The simplest scenario is when you have a VCF of hand annotated SNPs and Indels, and you
- * want to know how well a particular technology performs calling these snps. With a
- * dataset (BAM file) generated by the technology in test, and the hand annotated VCF, you
- * can run GenotypeAndValidate to asses the accuracy of the calls with the new technology's
- * dataset.
- *
- *
- *
- * Another option is to validate the calls on a VCF file, using a deep coverage BAM file
- * that you trust the calls on. The GenotypeAndValidate walker will make calls using the
- * reads in the BAM file and take them as truth, then compare to the calls in the VCF file
- * and produce a truth table.
- *
- *
- *
- * Input
- *
- * A BAM file to make calls on and a VCF file to use as truth validation dataset.
- *
- * You also have the option to invert the roles of the files using the command line options listed below.
- *
- *
- * Output
- *
- * GenotypeAndValidate has two outputs. The truth table and the optional VCF file. The truth table is a
- * 2x2 table correlating what was called in the dataset with the truth of the call (whether it's a true
- * positive or a false positive). The table should look like this:
- *
- *
- *
- *
- * |
- * ALT |
- * REF |
- * Predictive Value |
- *
- *
- * | called alt |
- * True Positive (TP) |
- * False Positive (FP) |
- * Positive PV |
- *
- *
- * | called ref |
- * False Negative (FN) |
- * True Negative (TN) |
- * Negative PV |
- *
- *
- *
- *
- *
- * The positive predictive value (PPV) is the proportion of subjects with positive test results
- * who are correctly diagnosed.
- *
- *
- * The negative predictive value (NPV) is the proportion of subjects with a negative test result
- * who are correctly diagnosed.
- *
- *
- * The VCF file will contain only the variants that were called or not called, excluding the ones that
- * were uncovered or didn't pass the filters. This file is useful if you are trying to compare
- * the PPV and NPV of two different technologies on the exact same sites (so you can compare apples to
- * apples).
- *
- *
- *
- * Here is an example of an annotated VCF file (info field clipped for clarity)
- *
- *
- * #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
- * 1 20568807 . C T 0 HapMapHet AC=1;AF=0.50;AN=2;DP=0;GV=T GT 0/1
- * 1 22359922 . T C 282 WG-CG-HiSeq AC=2;AF=0.50;GV=T;AN=4;DP=42 GT:AD:DP:GL:GQ 1/0 ./. 0/1:20,22:39:-72.79,-11.75,-67.94:99 ./.
- * 13 102391461 . G A 341 Indel;SnpCluster AC=1;GV=F;AF=0.50;AN=2;DP=45 GT:AD:DP:GL:GQ ./. ./. 0/1:32,13:45:-50.99,-13.56,-112.17:99 ./.
- * 1 175516757 . C G 655 SnpCluster,WG AC=1;AF=0.50;AN=2;GV=F;DP=74 GT:AD:DP:GL:GQ ./. ./. 0/1:52,22:67:-89.02,-20.20,-191.27:99 ./.
- *
- *
- *
- *
- * Additional Details
- *
- * -
- * You should always use -L on your VCF track, so that the GATK only looks at the sites on the VCF file.
- * This speeds up the process a lot.
- *
- * -
- * The total number of visited bases may be greater than the number of variants in the original
- * VCF file because of extended indels, as they trigger one call per new insertion or deletion.
- * (i.e. ACTG/- will count as 4 genotyper calls, but it's only one line in the VCF).
- *
- *
- *
- * Usage examples
- * Genotypes BAM file from new technology using the VCF as a truth dataset
- *
- * java
- * -jar GenomeAnalysisTK.jar \
- * -T GenotypeAndValidate \
- * -R reference.fasta \
- * -I myNewTechReads.bam \
- * -alleles handAnnotatedVCF.vcf \
- * -L handAnnotatedVCF.vcf \
- * -o output.vcf
- *
- *
- * Genotypes BAM file from new technology a BAM file as the truth dataset
- *
- * java
- * -jar GenomeAnalysisTK.jar \
- * -T GenotypeAndValidate \
- * -R reference.fasta \
- * -I myTruthDataset.bam \
- * -alleles callsToValidate.vcf \
- * -L callsToValidate.vcf \
- * -bt \
- * -o output.vcf
- *
- *
- */
-
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
-@Requires(value={DataSource.READS, DataSource.REFERENCE})
-@Allows(value={DataSource.READS, DataSource.REFERENCE})
-@By(DataSource.REFERENCE)
-@Reference(window=@Window(start=-200,stop=200))
-public class GenotypeAndValidate extends RodWalker implements TreeReducible {
-
- /**
- * The optional output file that will have all the variants used in the Genotype and Validation essay.
- * The new annotation `callStatus` will carry the value called in the validation VCF or BAM file."
- */
- @Output(doc="Output VCF file with annotated variants", required=false)
- protected VariantContextWriter vcfWriter = null;
-
- /**
- * The callset to be used as truth (default) or validated (if BAM file is set to truth).
- */
- @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype", required=true)
- public RodBinding alleles;
-
- /**
- * Makes the Unified Genotyper calls to the BAM file the truth dataset and validates the alleles ROD binding callset.
- */
- @Argument(fullName ="set_bam_truth", shortName ="bt", doc="Use the calls on the reads (bam file) as the truth dataset and validate the calls on the VCF", required=false)
- private boolean bamIsTruth = false;
-
- /**
- * The minimum base quality score necessary for a base to be considered when calling a genotype. This argument is passed to the Unified Genotyper.
- */
- @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
- private int mbq = -1;
-
- /**
- * The maximum deletion fraction allowed in a site for calling a genotype. This argument is passed to the Unified Genotyper.
- */
- @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false)
- private double deletions = -1;
-
- /**
- * the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. This argument is passed to the Unified Genotyper.
- */
- @Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false)
- private double callConf = -1;
-
- /**
- * the minimum phred-scaled Qscore threshold to emit low confidence calls. This argument is passed to the Unified Genotyper.
- */
- @Argument(fullName="standard_min_confidence_threshold_for_emitting", shortName="stand_emit_conf", doc="the minimum phred-scaled Qscore threshold to emit low confidence calls", required=false)
- private double emitConf = -1;
-
- /**
- * Only validate sites that have at least a given depth
- */
- @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false)
- private int minDepth = -1;
-
- /**
- * Print out discordance sites to standard out.
- */
- @Hidden
- @Argument(fullName ="print_interesting_sites", shortName ="print_interesting", doc="Print out interesting sites to standard out", required=false)
- private boolean printInterestingSites = false;
-
- private UnifiedGenotypingEngine snpEngine;
- private UnifiedGenotypingEngine indelEngine;
- private Set samples;
-
- private enum GVstatus {
- T, F, NONE
- }
-
- public static class CountedData {
- private long nAltCalledAlt = 0L;
- private long nAltCalledRef = 0L;
- private long nAltNotCalled = 0L;
- private long nRefCalledAlt = 0L;
- private long nRefCalledRef = 0L;
- private long nRefNotCalled = 0L;
- private long nNoStatusCalledAlt = 0L;
- private long nNoStatusCalledRef = 0L;
- private long nNoStatusNotCalled = 0L;
- private long nNotConfidentCalls = 0L;
- private long nUncovered = 0L;
-
- /**
- * Adds the values of other to this, returning this
- * @param other the other object
- */
- public void add(CountedData other) {
- nAltCalledAlt += other.nAltCalledAlt;
- nAltCalledRef += other.nAltCalledRef;
- nAltNotCalled += other.nAltNotCalled;
- nRefCalledAlt += other.nRefCalledAlt;
- nRefCalledRef += other.nRefCalledRef;
- nRefNotCalled += other.nRefNotCalled;
- nNoStatusCalledAlt += other.nNoStatusCalledAlt;
- nNoStatusCalledRef += other.nNoStatusCalledRef;
- nNoStatusNotCalled += other.nNoStatusNotCalled;
- nUncovered += other.nUncovered;
- nNotConfidentCalls += other.nNotConfidentCalls;
- }
- }
-
-
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // initialize
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public void initialize() {
-
- // Initialize VCF header
- if (vcfWriter != null) {
- Map header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
- samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
- Set headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
- headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
- headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY));
- vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
- }
-
- // Filling in SNP calling arguments for UG
- UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
- uac.outputMode = OutputMode.EMIT_ALL_SITES;
- uac.alleles = alleles;
-
- // TODO -- if we change this tool to actually validate against the called allele, then this if statement is needed;
- // TODO -- for now, though, we need to be able to validate the right allele (because we only test isVariant below) [EB]
- //if (!bamIsTruth)
- uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
-
- if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
- if (deletions >= 0)
- uac.MAX_DELETION_FRACTION = deletions;
- else
- uac.MAX_DELETION_FRACTION = 1.0;
- if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
- if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
-
- final GenomeAnalysisEngine toolkit = getToolkit();
- uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
- snpEngine = new UnifiedGenotypingEngine(uac,
- FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
-
-
- // Adding the INDEL calling arguments for UG
- UnifiedArgumentCollection uac_indel = uac.clone();
- uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
- indelEngine = new UnifiedGenotypingEngine(uac_indel,
- FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
-
- // make sure we have callConf set to the threshold set by the UAC so we can use it later.
- callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;
- }
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // map
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
-
- final CountedData counter = new CountedData();
-
- // For some reason RodWalkers get map calls with null trackers
- if( tracker == null )
- return counter;
-
- VariantContext vcComp = tracker.getFirstValue(alleles);
- if( vcComp == null )
- return counter;
-
- //todo - not sure I want this, may be misleading to filter extended indel events.
- if (isInsideExtendedIndel(vcComp, ref))
- return counter;
-
- // Do not operate on variants that are not covered to the optional minimum depth
- if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
- counter.nUncovered = 1L;
- final GVstatus status = getGVstatus(vcComp);
- if ( status == GVstatus.T )
- counter.nAltNotCalled = 1L;
- else if ( status == GVstatus.F )
- counter.nRefNotCalled = 1L;
- else
- counter.nNoStatusNotCalled = 1L;
-
- return counter;
- }
-
- VariantCallContext call;
- if ( vcComp.isSNP() ) {
- call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
- } else if ( vcComp.isIndel() ) {
- call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
- } else if ( bamIsTruth ) {
- // assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
- call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
- } else {
- logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
- return counter;
- }
-
-
- boolean writeVariant = true;
-
- if (bamIsTruth) {
- if (call.confidentlyCalled) {
- // If truth is a confident REF call
- if (call.isVariant()) {
- if (vcComp.isVariant())
- counter.nAltCalledAlt = 1L;
- else {
- counter.nAltCalledRef = 1L;
- if ( printInterestingSites )
- System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
- }
- }
- // If truth is a confident ALT call
- else {
- if (vcComp.isVariant()) {
- counter.nRefCalledAlt = 1L;
- if ( printInterestingSites )
- System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
- } else
- counter.nRefCalledRef = 1L;
- }
- }
- else {
- counter.nNotConfidentCalls = 1L;
- if ( printInterestingSites )
- System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart());
- writeVariant = false;
- }
- }
- else {
-// if (!vcComp.hasExtendedAttribute("GV"))
-// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart());
-
- final GVstatus status = getGVstatus(vcComp);
- if (call.isCalledAlt(callConf)) {
- if ( status == GVstatus.T )
- counter.nAltCalledAlt = 1L;
- else if ( status == GVstatus.F ) {
- counter.nRefCalledAlt = 1L;
- if ( printInterestingSites )
- System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
- }
- else
- counter.nNoStatusCalledAlt = 1L;
- }
- else if (call.isCalledRef(callConf)) {
- if ( status == GVstatus.T ) {
- counter.nAltCalledRef = 1L;
- if ( printInterestingSites )
- System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
- }
- else if ( status == GVstatus.F )
- counter.nRefCalledRef = 1L;
-
- else
- counter.nNoStatusCalledRef = 1L;
- }
- else {
- counter.nNotConfidentCalls = 1L;
- if ( status == GVstatus.T )
- counter.nAltNotCalled = 1L;
- else if ( status == GVstatus.F )
- counter.nRefNotCalled = 1L;
- else
- counter.nNoStatusNotCalled = 1L;
-
- if ( printInterestingSites )
- System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart());
- writeVariant = false;
- }
- }
-
- if (vcfWriter != null && writeVariant) {
- if (!vcComp.hasAttribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)) {
- vcfWriter.add(new VariantContextBuilder(vcComp).attribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY, call.isCalledAlt(callConf) ? "ALT" : "REF").make());
- }
- else
- vcfWriter.add(vcComp);
- }
- return counter;
- }
-
- private GVstatus getGVstatus(final VariantContext vc) {
- return ( !vc.hasAttribute("GV") ) ? GVstatus.NONE : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F);
- }
-
- //---------------------------------------------------------------------------------------------------------------
- //
- // reduce
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public CountedData reduceInit() {
- return new CountedData();
- }
-
- public CountedData treeReduce( final CountedData sum1, final CountedData sum2) {
- sum2.add(sum1);
- return sum2;
- }
-
- public CountedData reduce( final CountedData mapValue, final CountedData reduceSum ) {
- reduceSum.add(mapValue);
- return reduceSum;
- }
-
- public void onTraversalDone( CountedData reduceSum ) {
- double ppv = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nRefCalledAlt));
- double npv = 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nAltCalledRef));
- double sensitivity = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nAltCalledRef));
- double specificity = (reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt > 0) ? 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt)) : 100;
- logger.info(String.format("Resulting Truth Table Output\n\n" +
- "------------------------------------------------------------------\n" +
- "\t\t|\tALT\t|\tREF\t|\tNo Status\n" +
- "------------------------------------------------------------------\n" +
- "called alt\t|\t%d\t|\t%d\t|\t%d\n" +
- "called ref\t|\t%d\t|\t%d\t|\t%d\n" +
- "not called\t|\t%d\t|\t%d\t|\t%d\n" +
- "------------------------------------------------------------------\n" +
- "positive predictive value: %f%%\n" +
- "negative predictive value: %f%%\n" +
- "------------------------------------------------------------------\n" +
- "sensitivity: %f%%\n" +
- "specificity: %f%%\n" +
- "------------------------------------------------------------------\n" +
- "not confident: %d\n" +
- "not covered: %d\n" +
- "------------------------------------------------------------------\n", reduceSum.nAltCalledAlt, reduceSum.nRefCalledAlt, reduceSum.nNoStatusCalledAlt, reduceSum.nAltCalledRef, reduceSum.nRefCalledRef, reduceSum.nNoStatusCalledRef, reduceSum.nAltNotCalled, reduceSum.nRefNotCalled, reduceSum.nNoStatusNotCalled, ppv, npv, sensitivity, specificity, reduceSum.nNotConfidentCalls, reduceSum.nUncovered));
- }
-}
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java
index 48a8ae837..35fe33d43 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/validationsiteselector/ValidationSiteSelector.java
@@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector;
import org.broadinstitute.gatk.utils.commandline.*;
+import org.broadinstitute.gatk.engine.GATKVCFUtils;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
@@ -60,11 +61,11 @@ import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.engine.SampleUtils;
import org.broadinstitute.gatk.utils.help.HelpConstants;
+import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
+
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
-import org.broadinstitute.gatk.engine.GATKVCFUtils;
-import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
@@ -134,7 +135,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
public class ValidationSiteSelector extends RodWalker {
public enum AF_COMPUTATION_MODE {
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java
index a330a4790..d350af102 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/ApplyRecalibration.java
@@ -119,27 +119,27 @@ import java.util.regex.Pattern;
*
* Usage example for filtering SNPs
*
- * java -Xmx3g -jar GenomeAnalysisTK.jar \
+ * java -jar GenomeAnalysisTK.jar \
* -T ApplyRecalibration \
* -R reference.fasta \
- * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
+ * -input raw_variants.vcf \
* --ts_filter_level 99.0 \
- * -tranchesFile path/to/output.tranches \
- * -recalFile path/to/output.recal \
+ * -tranchesFile output.tranches \
+ * -recalFile output.recal \
* -mode SNP \
* -o path/to/output.recalibrated.filtered.vcf
*
*
* Allele-specific usage
*
- * java -Xmx3g -jar GenomeAnalysisTK.jar \
+ * java -jar GenomeAnalysisTK.jar \
* -T ApplyRecalibration \
* -R reference.fasta \
- * -input rawVariants.withASannotations.vcf \
+ * -input raw_variants.withASannotations.vcf \
* -AS \
* --ts_filter_level 99.0 \
- * -tranchesFile path/to/output.AS.tranches \
- * -recalFile path/to/output.AS.recal \
+ * -tranchesFile output.AS.tranches \
+ * -recalFile output.AS.recal \
* -mode SNP \
* -o path/to/output.recalibrated.ASfiltered.vcf
*
@@ -153,12 +153,11 @@ import java.util.regex.Pattern;
* Caveats
*
*
- * - The tranche values used in the example above is only a general example. You should determine the level of sensitivity
+ *
- The tranche values used in the example above are only meant to be a general example. You should determine the level of sensitivity
* that is appropriate for your specific project. Remember that higher sensitivity (more power to detect variants, yay!) comes
* at the cost of specificity (more false negatives, boo!). You have to choose at what point you want to set the tradeoff.
* - In order to create the tranche reporting plots (which are only generated for SNPs, not indels!) Rscript needs to be
- * in your environment PATH (this is the scripting version of R, not the interactive version).
- * See http://www.r-project.org for more info on how to download and install R.
+ * in your environment PATH (this is the scripting version of R, not the interactive version).
*
*/
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java
index e57c42c5f..b1b19433c 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java
@@ -110,7 +110,7 @@ public class VariantDataManager {
final double theSTD = standardDeviation(theMean, iii, true);
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
if( Double.isNaN(theMean) ) {
- throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpConstants.forumPost("discussion/49/using-variant-annotator"));
+ throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations.");
}
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-5);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
index 98bc91cdd..83b5a3262 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java
@@ -203,7 +203,8 @@ public class VariantRecalibratorArgumentCollection {
/////////////////////////////
// Deprecated Arguments
- // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined" when they use an argument that has been put out of service
+ // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined"
+ // when they use an argument that has been put out of service
/////////////////////////////
@Hidden
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
index e887fe0bf..224caa97f 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
@@ -186,7 +186,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
public class CalculateGenotypePosteriors extends RodWalker {
/**
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
index 21e3e30e3..755bd087f 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/RegenotypeVariants.java
@@ -114,7 +114,7 @@ import java.util.Set;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
public class RegenotypeVariants extends RodWalker implements TreeReducible {
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java
index 0092c30ca..83aab7006 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/CommandLineGATK.java
@@ -43,7 +43,7 @@ import java.net.InetAddress;
import java.util.*;
/**
- * All command line parameters accepted by all tools in the GATK.
+ * Command line parameters accepted by most if not all tools in the GATK
*
* Info for end users
*
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
index 1b4548d38..737a46ba1 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/arguments/GATKArgumentCollection.java
@@ -65,8 +65,12 @@ public class GATKArgumentCollection {
@Input(fullName = "input_file", shortName = "I", doc = "Input file containing sequence data (BAM or CRAM)", required = false)
public List samFiles = new ArrayList<>();
+ /**
+ * This emits a log entry (level INFO) containing the full list of sequence data files to be included in the analysis
+ * (including files inside .bam.list or .cram.list files).
+ */
@Advanced
- @Argument(fullName = "showFullBamList",doc="Emit a log entry (level INFO) containing the full list of sequence data files to be included in the analysis (including files inside .bam.list or .cram.list files).")
+ @Argument(fullName = "showFullBamList",doc="Emit list of input BAM/CRAM files to log")
public Boolean showFullBamList = false;
@Advanced
@@ -404,12 +408,18 @@ public class GATKArgumentCollection {
required = false)
public boolean disableAutoIndexCreationAndLockingWhenReadingRods = false;
+ /**
+ * FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.
+ */
@Hidden
- @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.",
+ @Argument(fullName = "no_cmdline_in_header", shortName = "no_cmdline_in_header", doc = "Don't include the command line in the VCF header",
required = false)
public boolean disableCommandLineInVCF = false;
- @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Just output sites without genotypes (i.e. only the first 8 columns of the VCF)",
+ /**
+ * This produces a VCF with only the first 8 columns of site-level information and without any sample-level info (genotypes etc).
+ */
+ @Argument(fullName = "sites_only", shortName = "sites_only", doc = "Output sites-only VCF",
required = false)
public boolean sitesOnlyVCF = false;
@@ -417,9 +427,9 @@ public class GATKArgumentCollection {
* The VCF specification permits missing records to be dropped from the end of FORMAT fields, so long as GT is always output.
* This option prevents GATK from performing that trimming.
*
- * For example, given a FORMAT of
GT:AD:DP:PL
, GATK will by default emit ./.
for a variant with
+ * For example, given a FORMAT of GT:AD:DP:PL, GATK will by default emit ./. for a variant with
* no reads present (ie, the AD, DP, and PL fields are trimmed). If you specify -writeFullFormat, this record
- * would be emitted as
./.:.:.:.
+ * would be emitted as ./.:.:.:.
*/
@Argument(fullName = "never_trim_vcf_format_field", shortName = "writeFullFormat", doc = "Always output all the records in VCF FORMAT fields, even if some are missing",
required = false)
@@ -435,14 +445,19 @@ public class GATKArgumentCollection {
minValue = 0, maxValue = 9, required = false)
public Integer bamCompression = null;
+ /**
+ * If provided, output BAM/CRAM files will be simplified to include only key reads for downstream variation
+ * discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the
+ * kept reads except the read group identifier
+ */
@Advanced
@Argument(fullName = "simplifyBAM", shortName = "simplifyBAM",
- doc = "If provided, output BAM/CRAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier",
+ doc = "Strip down read content and tags",
required = false)
public boolean simplifyBAM = false;
@Advanced
- @Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files.",
+ @Argument(fullName = "disable_bam_indexing", doc = "Turn off on-the-fly creation of indices for output BAM/CRAM files",
required = false)
public boolean disableBAMIndexing = false;
@@ -500,69 +515,16 @@ public class GATKArgumentCollection {
// --------------------------------------------------------------------------------------------------------------
/**
- * Reads PED file-formatted tabular text files describing meta-data about the samples being
- * processed in the GATK.
- *
- *
- *
- * The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
- *
- *
- * - Family ID
- * - Individual ID
- * - Paternal ID
- * - Maternal ID
- * - Sex (1=male; 2=female; other=unknown)
- * - Phenotype
- *
- *
- * The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
- * A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
- * quantitative trait or an affection status column: GATK will automatically detect which type
- * (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
- *
- * If an individual's sex is unknown, then any character other than 1 or 2 can be used.
- *
- * You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
- * line will be ignored. Do not start any family IDs with this character therefore.
- *
- * Affection status should be coded:
- *
- *
- * - -9 missing
- * - 0 missing
- * - 1 unaffected
- * - 2 affected
- *
- *
- * If any value outside of -9,0,1,2 is detected than the samples are assumed
- * to phenotype values are interpreted as string phenotype values. In this case -9 uniquely
- * represents the missing value.
- *
- * Genotypes (column 7 onwards) cannot be specified to the GATK.
- *
- * For example, here are two individuals (one row = one person):
- *
- *
- * FAM001 1 0 0 1 2
- * FAM001 2 0 0 1 2
- *
- *
- * Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
- * tell the GATK PED parser that the corresponding fields are missing from the ped file.
- *
- * Note that most GATK walkers do not use pedigree information. Walkers that require pedigree
- * data should clearly indicate so in their arguments and will throw errors if required pedigree
- * information is missing.
+ * Reads PED file-formatted tabular text files describing meta-data about the samples being
+ * processed in the GATK. See https://www.broadinstitute.org/gatk/guide/article?id=7696 for more information
+ * on format requirements. Note that most GATK tools do not use pedigree information; for those that do it
+ * is indicated in their documentation.
*/
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
public List pedigreeFiles = Collections.emptyList();
/**
- * Inline PED records (see -ped argument). Each -pedString STRING can contain one or more
+ * Inline PED records. Each -pedString STRING can contain one or more
* valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString
* as -ped supports
*/
@@ -572,7 +534,7 @@ public class GATKArgumentCollection {
/**
* How strict should we be in parsing the PED files?
*/
- @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="Validation strictness for pedigree information",required=false)
+ @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="Validation strictness for pedigree",required=false)
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
// --------------------------------------------------------------------------------------------------------------
@@ -614,17 +576,18 @@ public class GATKArgumentCollection {
* DYNAMIC_SEEK attempts to optimize for minimal seek time by choosing an appropriate strategy and parameter (user-supplied parameter is ignored)
* DYNAMIC_SIZE attempts to optimize for minimal index size by choosing an appropriate strategy and parameter (user-supplied parameter is ignored)
*
- * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value
+ * This argument is no longer necessary when producing GVCF files. Using the output file ".g.vcf" extension will automatically set the appropriate value
*/
@Argument(fullName="variant_index_type",shortName = "variant_index_type",doc="Type of IndexCreator to use for VCF/BCF indices",required=false)
@Advanced
public GATKVCFIndexType variant_index_type = GATKVCFUtils.DEFAULT_INDEX_TYPE;
/**
- * This is either the bin width or the number of features per bin, depending on the indexing strategy
+ * This is either the bin width or the number of features per bin, depending on the indexing strategy.
*
- * This argument is deprecated, using the output file ".g.vcf" extension will automatically set the appropriate value
+ * This argument is no longer necessary when producing GVCF files. Using the output file ".g.vcf" extension will
+ * automatically set the appropriate value
*/
- @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator",required=false)
+ @Argument(fullName="variant_index_parameter",shortName = "variant_index_parameter",doc="Parameter to pass to the VCF/BCF IndexCreator", required=false)
@Advanced
public int variant_index_parameter = GATKVCFUtils.DEFAULT_INDEX_PARAMETER;
@@ -639,5 +602,28 @@ public class GATKArgumentCollection {
@Argument(fullName = "reference_window_stop", shortName = "ref_win_stop", doc = "Reference window stop", minValue = 0, required = false)
@Advanced
public int reference_window_stop = DEFAULT_REFERENCE_WINDOW_STOP;
+
+ // --------------------------------------------------------------------------------------------------------------
+ //
+ // Deprecated Arguments
+ // Keeping them here is meant to provide users with error messages that are more informative than "arg not defined"
+ // when they use an argument that has been put out of service
+ //
+ // -------------------------------------------------------------------------------------------------------------
+
+ @Hidden
+ @Deprecated // argument definition changed to string since the original enum has been removed
+ @Argument(fullName = "phone_home", shortName = "et", doc="Run reporting mode", required = false)
+ public String phoneHomeType = "";
+
+ @Hidden
+ @Deprecated
+ @Argument(fullName = "gatk_key", shortName = "K", doc="GATK key file required to run with -et NO_ET", required = false)
+ public File gatkKeyFile = null;
+
+ @Hidden
+ @Deprecated
+ @Argument(fullName = "tag", shortName = "tag", doc="Tag to identify this GATK run as part of a group of runs", required = false)
+ public String tag = "NA";
}
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java
index e68a31bb0..c29935d32 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignMappingQualityFilter.java
@@ -29,7 +29,7 @@ import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.commandline.Argument;
/**
- * Set the mapping quality of all reads to a given value.
+ * Set the mapping quality of all reads to a given value
*
*
* If a BAM file contains erroneous or missing mapping qualities (MAPQ), this read transformer will set all your
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java
index 44d24de9a..9ebfefadb 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/ReassignOneMappingQualityFilter.java
@@ -29,7 +29,7 @@ import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.commandline.Argument;
/**
- * Set the mapping quality of reads with a given value to another given value.
+ * Set the mapping quality of reads with a given value to another given value
*
*
* This read transformer will change a certain read mapping quality to a different value without affecting reads that
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java
index d2413e3f5..711bc560d 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FractionInformativeReads.java
@@ -45,7 +45,7 @@ import java.util.List;
import java.util.Map;
/**
- * The fraction of reads that were deemed "informative" by the HaplotypeCaller over the entire cohort
+ * The fraction of reads deemed informative over the entire cohort
*
*
The FractionInformativeReads annotation produces a single fraction for each site: sum(AD)/sum(DP). The sum in the numerator
* is over all the samples in the cohort and all the alleles in each sample. The sum in the denominator is over all the samples.
@@ -53,7 +53,7 @@ import java.util.Map;
*
*
Caveats
*
- * - This annotation is generated by HaplotypeCaller or GenotypeGVCFs (it will not work when called from VariantAnnotator).
+ * - This annotation can be generated by HaplotypeCaller, MuTect2 or GenotypeGVCFs (it will not work when called from VariantAnnotator).
*
*
* Related annotations
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java
index 282791d9e..942fadcd6 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/coverage/DepthOfCoverage.java
@@ -106,7 +106,7 @@ import java.util.*;
// todo -- alter logarithmic scaling to spread out bins more
// todo -- allow for user to set linear binning (default is logarithmic)
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class}, gotoDev = HelpConstants.MC)
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class})
@By(DataSource.REFERENCE)
@PartitionBy(PartitionType.NONE)
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
index 69d0d5f2a..29c9dc129 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/examples/GATKPaperGenotyper.java
@@ -44,12 +44,14 @@ import java.util.Arrays;
import java.util.Comparator;
/**
- * A simple Bayesian genotyper, that outputs a text based call format. Intended to be used only as an
- * example in the GATK publication.
+ * Simple Bayesian genotyper used in the original GATK paper
+ *
+ * This is a very simple implementation of a Bayesian genotyper that outputs a text based call format. It was developed
+ * solely to serve as a toy example in the original GATK publication, and should NOT be used in actual analysis work.
*
* @author aaron
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_TOY, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
public class GATKPaperGenotyper extends LocusWalker implements TreeReducible {
public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
index 1e950ece8..fb975c37b 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
@@ -94,7 +94,7 @@ import java.util.*;
* separate commands.
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-50,stop=50))
public class VariantFiltration extends RodWalker {
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java
index afc710c9d..31e29fdcb 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/indels/IndelRealigner.java
@@ -120,7 +120,7 @@ import java.util.*;
*
* @author ebanks
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class})
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT)
public class IndelRealigner extends ReadWalker {
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java
index 818489cca..49983241d 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/qc/QCRef.java
@@ -63,7 +63,7 @@ import java.io.PrintStream;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_REFUTILS, extraDocs = {CommandLineGATK.class} )
public class QCRef extends RefWalker {
@Output
public PrintStream out;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
index 634566b25..db0bfa087 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
@@ -110,30 +110,27 @@ public class ASEReadCounter extends LocusWalker {
/**
* If this argument is enabled, loci with total depth lower than this threshold after all filters have been applied
- * will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+ * will be skipped. This can be set to -1 by default to disable the evaluation and ignore this threshold.
*/
- @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+ @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = -1, maxValue = Integer.MAX_VALUE)
public int minDepthOfNonFilteredBases = -1;
/**
* If this argument is enabled, reads with mapping quality values lower than this threshold will not be counted.
- * This is set to -1 by default to disable the evaluation and ignore this threshold.
+ * This can be set to -1 by default to disable the evaluation and ignore this threshold.
*/
- @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+ @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = -1, maxValue = Integer.MAX_VALUE)
public int minMappingQuality = 0;
/**
* If this argument is enabled, bases with quality scores lower than this threshold will not be counted.
- * This is set to -1 by default to disable the evaluation and ignore this threshold.
+ * This can be set to -1 by default to disable the evaluation and ignore this threshold.
*/
- @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = 0, maxValue = Byte.MAX_VALUE)
+ @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = -1, maxValue = Byte.MAX_VALUE)
public byte minBaseQuality = 0;
/**
- * These options modify how the tool deals with overlapping read pairs.
- * COUNT_READS - Count all reads independently, even if they are from the same fragment.
- * COUNT_FRAGMENTS - Count all fragments, even if the reads that compose the fragment are not consistent at that base.
- * COUNT_FRAGMENTS_REQUIRE_SAME_BASE - Count all fragments, but only if the reads that compose the fragment are consistent at that base (default).
+ * These options modify how the tool deals with overlapping read pairs. The default value is COUNT_FRAGMENTS_REQUIRE_SAME_BASE.
*/
@Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "Handling of overlapping reads from the same fragment", required = false)
public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE;
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java
index b1855ae4d..1e97c388a 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/varianteval/VariantEval.java
@@ -138,7 +138,7 @@ import java.util.*;
* command line, to rapidly ascertain whether it will work or not.
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-50, stop=50))
@PartitionBy(PartitionType.NONE)
public class VariantEval extends RodWalker implements TreeReducible {
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java
index c495eae7a..ccac49b24 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeConcordance.java
@@ -204,7 +204,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} )
public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> {
/**
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java
index 6c9912f06..ce9e84c0f 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ValidateVariants.java
@@ -123,7 +123,7 @@ import java.util.*;
*
*
*/
-@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VALIDATION, extraDocs = {CommandLineGATK.class} )
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VAREVAL, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=0,stop=100))
public class ValidateVariants extends RodWalker {
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java
index 9a951f994..00097db21 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/DeprecatedToolChecks.java
@@ -58,6 +58,7 @@ public class DeprecatedToolChecks {
deprecatedGATKWalkers.put("LiftOverVariants","3.5 (use Picard LiftoverVCF instead; see documentation for usage)");
deprecatedGATKWalkers.put("FilterLiftedVariants","3.5 (use Picard LiftoverVCF instead; see documentation for usage)");
deprecatedGATKWalkers.put("ListAnnotations","3.5 (this tool was impractical; see the online documentation instead)");
+ deprecatedGATKWalkers.put("GenotypeAndValidate","3.6 (this tool was old and untested -- no direct replacement)");
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java
index 993aa5fae..3571dde69 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/sampileup/SAMPileupCodec.java
@@ -37,7 +37,7 @@ import java.util.regex.Pattern;
import static org.broadinstitute.gatk.utils.codecs.sampileup.SAMPileupFeature.VariantType;
/**
- * Decoder for SAM pileup data.
+ * Decoder for SAM pileup data
*
*
* From the SAMTools project documentation:
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java
index 1078bf717..d83dee271 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/samread/SAMReadCodec.java
@@ -34,7 +34,7 @@ import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.util.ParsingUtils;
/**
- * Decodes a simple SAM text string.
+ * Decodes a simple SAM text string
*
*
* Reads in the SAM text version of a BAM file as a ROD. For testing only
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java
index c824407ff..6dd64fb3c 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeature.java
@@ -45,6 +45,4 @@ public @interface DocumentedGATKFeature {
public String summary() default "";
/** Are there links to other docs that we should include? CommandLineGATK.class for walkers, for example? */
public Class[] extraDocs() default {};
- /** Who is the go-to developer for operation/documentation issues? */
- public String gotoDev() default "NA";
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java
index f0b659076..8106caa9a 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/DocumentedGATKFeatureObject.java
@@ -36,20 +36,19 @@ class DocumentedGATKFeatureObject {
private final Class classToDoc;
/** Are we enabled? */
private final boolean enable;
- private final String groupName, summary, gotoDev;
+ private final String groupName, summary;
private final Class[] extraDocs;
- public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs, final String gotoDev) {
+ public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs) {
this.classToDoc = classToDoc;
this.enable = enable;
this.groupName = groupName;
this.summary = summary;
this.extraDocs = extraDocs;
- this.gotoDev = gotoDev;
}
- public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary, final String gotoDev) {
- this(classToDoc, true, groupName, summary, new Class[]{}, gotoDev);
+ public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary) {
+ this(classToDoc, true, groupName, summary, new Class[]{});
}
public Class getClassToDoc() { return classToDoc; }
@@ -57,5 +56,4 @@ class DocumentedGATKFeatureObject {
public String groupName() { return groupName; }
public String summary() { return summary; }
public Class[] extraDocs() { return extraDocs; }
- public String gotoDev() { return gotoDev; }
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java
index 608a9803c..7512a0f99 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDocUtils.java
@@ -30,14 +30,6 @@ public class GATKDocUtils {
* The URL root for RELEASED GATKDOC units
*/
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpConstants.GATK_DOCS_URL;
- /**
- * The URL root for STABLE GATKDOC units //TODO: do sthing with this or remove -- URL goes nowhere
- */
- public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/";
- /**
- * The URL root for UNSTABLE GATKDOC units //TODO: do sthing with this or remove -- URL goes nowhere
- */
- public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/";
/**
* Return the filename of the GATKDoc PHP that would be generated for Class. This
@@ -59,7 +51,7 @@ public class GATKDocUtils {
/**
* Returns a full URL http://etc/ linking to the documentation for class (assuming it
- * exists). Currently points to the RELEASE doc path only. //TODO: do sthing with other paths or remove ?
+ * exists). Currently points to the RELEASE doc path only.
*
* @param c
* @return
@@ -68,8 +60,6 @@ public class GATKDocUtils {
String classPath = phpFilenameForClass(c);
StringBuilder b = new StringBuilder();
b.append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath);
- //b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
- //b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
return b.toString();
}
}
\ No newline at end of file
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java
index 6aaa249f9..e0bd7365e 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GATKDoclet.java
@@ -79,9 +79,9 @@ public abstract class GATKDoclet {
final private static String FORUM_KEY_PATH = "/local/gsa-engineering/gatkdocs_publisher/forum.key";
- final private static String OUTPUT_FILE_EXTENSION = "php";
+ final private static String OUTPUT_FILE_EXTENSION = "html";
- /** Controls the extension of the non-json output files, and also the HREFs to these files. Default: php */
+ /** Controls the extension of the non-json output files, and also the HREFs to these files. Default: html */
final private static String OUTPUT_FILE_EXTENSION_OPTION = "-output-file-extension";
// ----------------------------------------------------------------------
//
@@ -120,8 +120,7 @@ public abstract class GATKDoclet {
static {
STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class,
HelpConstants.DOCS_CAT_RODCODECS,
- "Tribble codecs for reading reference ordered data (ROD) files such as VCF or BED",
- "NA"));
+ "Codecs for reading resource files in reference ordered data (ROD) files such as BED"));
}
/**
@@ -264,7 +263,7 @@ public abstract class GATKDoclet {
List old = ForumAPIUtils.getPostedTools(forumKey);
for (String s : old)
- System.out.println(s);
+ //System.out.println(s);
System.out.printf("Forum has %d items%n", old.size());
System.out.printf("Docs have %d items%n", docWorkUnits.size());
@@ -354,11 +353,11 @@ public abstract class GATKDoclet {
if (docClass.isAnnotationPresent(DocumentedGATKFeature.class)) {
DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class);
- return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs(), f.gotoDev());
+ return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs());
} else {
for (DocumentedGATKFeatureObject staticDocs : STATIC_DOCS) {
if (staticDocs.getClassToDoc().isAssignableFrom(docClass)) {
- return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs(), staticDocs.gotoDev());
+ return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs());
}
}
return null;
@@ -469,11 +468,12 @@ public abstract class GATKDoclet {
if (annotation.groupName().endsWith(" Tools")) supercatValue = "tools";
else if (annotation.groupName().endsWith(" Utilities")) supercatValue = "utilities";
else if (annotation.groupName().startsWith("Engine ")) supercatValue = "engine";
- else if (annotation.groupName().endsWith(" (DevZone)")) supercatValue = "dev";
+ else if (annotation.groupName().endsWith(" (Exclude)")) supercatValue = "exclude";
else supercatValue = "other";
- root.put("supercat", supercatValue);
-
+ //if (!supercatValue.contentEquals("exclude")) {
+ root.put("supercat", supercatValue);
+ //}
return root;
}
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java
index a06edb68b..cb959d0a2 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/GenericDocumentationHandler.java
@@ -117,8 +117,6 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
for (Tag tag : toProcess.classDoc.tags()) {
root.put(tag.name(), tag.text());
}
-
- root.put("gotoDev", toProcess.annotation.gotoDev());
}
/**
@@ -190,7 +188,6 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
argBindings.put("maxValue", "NA");
argBindings.put("minRecValue", "NA");
argBindings.put("maxRecValue", "NA");
- argBindings.put("defaultValue", "NA");
}
// Finalize argument bindings
args.get(kind).add(argBindings);
@@ -276,9 +273,9 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
final Object instance = makeInstanceIfPossible(toProcess.clazz);
if (instance != null) {
final Object value = getFieldValue(instance, argumentSource.field.getName());
- if (value != null)
+ if (value != null) {
return value;
-
+ }
if (argumentSource.createsTypeDefault()) {
try { // handle the case where there's an implicit default
return argumentSource.typeDefaultDocString();
@@ -391,7 +388,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
}
/**
- * Pretty prints value
+ * Pretty prints value TODO I think this is what I need to fix the value problem
*
* Assumes value != null
*
@@ -512,7 +509,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
if (field.isAnnotationPresent(ArgumentCollection.class)) {
ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName());
if (typeDoc == null)
- throw new ReviewedGATKException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but could't find the class in the RootDoc");
+ throw new ReviewedGATKException("Tried to get javadocs for ArgumentCollection field " + fieldDoc + " but couldn't find the class in the RootDoc");
else {
FieldDoc result = getFieldDoc(typeDoc, name, false);
if (result != null)
@@ -563,7 +560,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
/**
* Returns a human readable string that describes the Type type of a GATK argument.
*
- * This will include parameterized types, so that Set{T} shows up as Set(T) and not
+ * This will include parametrized types, so that Set{T} shows up as Set(T) and not
* just Set in the docs.
*
* @param type
@@ -644,9 +641,7 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
FeatureManager manager = new FeatureManager();
List rodTypes = new ArrayList();
for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass)) {
- rodTypes.add(String.format("%s",
- GATKDocUtils.phpFilenameForClass(descriptor.getCodecClass()),
- descriptor.getName()));
+ rodTypes.add(descriptor.getName());
}
root.put("rodTypes", Utils.join(", ", rodTypes));
@@ -658,6 +653,10 @@ public abstract class GenericDocumentationHandler extends DocumentedGATKFeatureH
root.put("summary", def.doc != null ? def.doc : "");
root.put("fulltext", fieldDoc.commentText());
+ // Does this argument interact with any others?
+ root.put("otherArgumentRequired", def.otherArgumentRequired != null ? def.otherArgumentRequired : "NA");
+ root.put("exclusiveOf", def.otherArgumentRequired != null ? def.exclusiveOf : "NA");
+
// What are our enum options?
if (def.validOptions != null) {
root.put("options", docForEnumArgument(source.field.getType()));
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java
index 279af20ed..f9a23dd84 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpConstants.java
@@ -44,44 +44,21 @@ public class HelpConstants {
* The names get parsed to make supercategories in the doc index,
* so be careful when making big changes -- see GATKDoclet.java toMap()
*/
- public final static String DOCS_CAT_DATA = "Sequence Data Processing Tools";
- public final static String DOCS_CAT_QC = "Diagnostics and Quality Control Tools";
public final static String DOCS_CAT_ENGINE = "Engine Parameters (available to all tools)";
- public final static String DOCS_CAT_RF = "Read Filters";
- public final static String DOCS_CAT_REFUTILS = "Reference Utilities";
- public final static String DOCS_CAT_RODCODECS = "ROD Codecs";
- public final static String DOCS_CAT_USRERR = "User Exceptions (DevZone)";
- public final static String DOCS_CAT_VALIDATION = "Validation Utilities";
- public final static String DOCS_CAT_ANNOT = "Variant Annotations";
+ public final static String DOCS_CAT_QC = "Diagnostics and Quality Control Tools";
+ public final static String DOCS_CAT_DATA = "Sequence Data Processing Tools";
public final static String DOCS_CAT_VARDISC = "Variant Discovery Tools";
- public final static String DOCS_CAT_VARMANIP = "Variant Evaluation and Manipulation Tools";
- public final static String DOCS_CAT_TOY = "Toy Walkers (DevZone)";
- public final static String DOCS_CAT_HELPUTILS = "Help Utilities";
-
- public static String forumPost(String post) {
- return GATK_FORUM_URL + post;
- }
+ public final static String DOCS_CAT_VAREVAL = "Variant Evaluation Tools";
+ public final static String DOCS_CAT_VARMANIP = "Variant Manipulation Tools";
+ public final static String DOCS_CAT_ANNOT = "Annotation Modules";
+ public final static String DOCS_CAT_RF = "Read Filters";
+ public final static String DOCS_CAT_RODCODECS = "Resource File Codecs";
+ public final static String DOCS_CAT_REFUTILS = "Reference Utilities";
+ public final static String DOCS_CAT_USRERR = "User Exceptions (Exclude)";
+ public final static String DOCS_CAT_TOY = "Toy Examples (Exclude)";
public static String articlePost(Integer id) {
return GATK_ARTICLE_URL + "?id=" + id.toString();
}
- /**
- * Go-to developer name codes for tracking and display purposes. Only current team members should be in this list.
- * When someone leaves, their charges should be redistributed. The actual string should be closest to the dev's
- * abbreviated name or two/three-letter nickname as possible. The code can be something else if necessary to
- * disambiguate from other variable.
- */
- public final static String MC = "MC"; // Mauricio Carneiro
- public final static String EB = "EB"; // Eric Banks
- public final static String RP = "RP"; // Ryan Poplin
- public final static String GVDA = "GG"; // Geraldine Van der Auwera
- public final static String VRR = "VRR"; // Valentin Ruano-Rubio
- public final static String ALM = "ALM"; // Ami Levy-Moonshine
- public final static String BH = "BH"; // Bertrand Haas
- public final static String JoT = "JT"; // Joel Thibault
- public final static String DR = "DR"; // David Roazen
- public final static String KS = "KS"; // Khalid Shakir
-
-
}
\ No newline at end of file
diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java
index f613b9431..e553ba02d 100644
--- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java
+++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/help/HelpFormatter.java
@@ -204,6 +204,7 @@ public class HelpFormatter {
builder.append(Utils.join("|",argumentDefinition.validOptions));
builder.append(")");
}
+
return builder.toString();
}
diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html
index 54001626d..20e144400 100644
--- a/settings/helpTemplates/common.html
+++ b/settings/helpTemplates/common.html
@@ -1,30 +1,7 @@
-
-
<#--
- This file contains all the theming neccesary to present GATKDocs on the GATK website
- Included are the paths to our bootstrap assets as well as helper functions to generate relevant links
+ This file contains part of the theming neccesary to present GATKDocs on the GATK website. Included are the
+ paths to our bootstrap assets as well as helper functions to generate relevant links. Styling is separated
+ out, so pages will be minimalistic html unless replacement styling is provided.
-->
@@ -37,8 +14,8 @@
Return to top
See also
- Guide Index |
- Tool Documentation Index |
+ GATK Documentation Index |
+ Tool Docs Index |
Support Forum
@@ -62,7 +39,7 @@
}
- <#assign seq = ["engine", "tools", "utilities", "other"]>
+ <#assign seq = ["engine", "tools", "other", "utilities"]>
<#list seq as supercat>
<#list groups?sort_by("name") as group>
diff --git a/settings/helpTemplates/generic.index.template.html b/settings/helpTemplates/generic.index.template.html
index 794e50dc6..490d12007 100644
--- a/settings/helpTemplates/generic.index.template.html
+++ b/settings/helpTemplates/generic.index.template.html
@@ -1,32 +1,9 @@
-
-
@@ -70,9 +47,9 @@
${version}
- <#assign seq = ["engine", "tools", "utilities", "other", "dev"]>
+ <#assign seq = ["engine", "tools", "other", "utilities"]>
<#list seq as supercat>
-
+
<#list groups?sort_by("name") as group>
<#if group.supercat == supercat>
<@emitGroup group=group/>
diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html
index 0141c8673..09fb45f70 100644
--- a/settings/helpTemplates/generic.template.html
+++ b/settings/helpTemplates/generic.template.html
@@ -1,32 +1,9 @@
-
-
@@ -64,12 +41,14 @@
${arg.summary}
${arg.fulltext}
-
-
<#if arg.rodTypes != "NA">
-
${arg.name} binds reference ordered data. This argument supports ROD files of the following types: ${arg.rodTypes}
+
This argument supports reference-ordered data (ROD) files in the following formats: ${arg.rodTypes}
+ #if>
+ <#if arg.otherArgumentRequired != "NA">
+
Dependency: This argument requires that you also specify ${arg.otherArgumentRequired}.
+ #if>
+ <#if arg.exclusiveOf != "NA">
+
Exclusion: This argument cannot be used at the same time as ${arg.exclusiveOf}.
#if>
<#if arg.options?has_content>
@@ -88,7 +67,7 @@
#if>
#if>
${arg.type}
- <#if arg.defaultValue?is_number>
+ <#if arg.defaultValue?has_content>
${arg.defaultValue}
#if>
<#if arg.minValue?is_number>
@@ -140,12 +119,7 @@
${name}
${summary}
- <#-- using goto dev annotation instead, see above footer
- <#if author??>
-
Author
- ${author}
-
- #if> -->
+
<#if group?? >
Category
${group}
@@ -282,7 +256,6 @@
| Argument name(s) |
-
Default value |
Summary |
@@ -306,7 +279,7 @@
#if>
- <#-- List all of the -->
+ <#-- List all of the things -->
<#if arguments.all?size != 0>
<#-- Create the argument details -->
Argument details
@@ -317,10 +290,6 @@
#if>
<@footerInfo />
- <#-- Specify go-to developer (for internal use) -->
- <#if gotoDev??>
- GTD: ${gotoDev}
- #if>
<@footerClose />