diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 5acea12f6..9f90a1308 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -72,11 +72,11 @@ import java.util.Map; /** - * The depth of coverage of each VCF allele in this sample. + * The depth of coverage of each allele per sample * - * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this + *
The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this * sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the - * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of + * caller's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of * REF and ALT fields) is the unfiltered count of all reads that carried with them the * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the * power I have to determine the genotype of the sample at this site, while the AD tells me how many times @@ -86,10 +86,12 @@ import java.util.Map; * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted. * Because of this fact, the sum of AD may be different than the individual sample depth, especially when there are - * many non-informatice reads. - * Because the AD includes reads and bases that were filtered by the Unified Genotyper and in case of indels is based on a statistical computation, + * many non-informative reads.
+ * + *Because the AD includes reads and bases that were filtered by the caller and in case of indels is based on a statistical computation, * one should not base assumptions about the underlying genotype based on it; - * instead, the genotype likelihoods (PLs) are what determine the genotype calls. + * instead, the genotype likelihoods (PLs) are what determine the genotype calls.
+ * */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 48b3593c5..aa5b779da 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -70,7 +70,6 @@ import java.util.Map; /** * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public MapRequires at least 10 samples in order to run. Only genotypes with sufficient quality (>10) will be taken into account.
*/ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index c25cb6820..f9663d33e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -63,7 +63,11 @@ import java.util.List; import java.util.Map; /** - * Largest contiguous homopolymer run of the variant allele in either direction on the reference. Computed only for bi-allelic sites. + * Largest contiguous homopolymer run of the variant allele in either direction on the reference. + * + *Computed only for bi-allelic sites.
+ * + ** Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. + *
+ * + *Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).
*/ public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index cede1e5ee..c3a0618ef 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -65,7 +65,9 @@ import java.util.Map; /** - * Fraction of reads containing spanning deletions at this site. + * Fraction of reads containing spanning deletions at this site + * + *Note that this annotation is currently not compatible with HaplotypeCaller.
*/ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index 2e0e759c2..d976592cb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -66,6 +66,11 @@ import java.util.List; import java.util.Map; +/** + * Annotates variants that are composed of tandem repeats + * + *Note that this annotation is currently not compatible with HaplotypeCaller.
+ */ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation { private static final String STR_PRESENT = "STR"; private static final String REPEAT_UNIT_KEY = "RU"; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index b3f5728a2..f29899f7f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -65,9 +65,9 @@ import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: rpoplin, lfran, ebanks - * Date: 11/14/11 + * Wittkowski transmission disequilibrium test + * + *Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).
*/ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index e1972334b..dde49b7db 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -95,14 +95,14 @@ import java.util.List; * ** - *
* The input read data whose base quality scores need to be assessed. *
* A database of known polymorphic sites to skip over. *
* - ** A GATK Report file with many tables: *
* java -Xmx4g -jar GenomeAnalysisTK.jar \
* -T BaseRecalibrator \
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index 5ab296a5f..ee2edee5a 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -146,38 +146,38 @@ public class RecalibrationArgumentCollection {
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
/**
- * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
+ * The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
*/
- @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "size of the k-mer context to be used for base mismatches", required = false)
+ @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false)
public int MISMATCHES_CONTEXT_SIZE = 2;
/**
- * The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
+ * The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
*/
- @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
+ @Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false)
public int INDELS_CONTEXT_SIZE = 3;
/**
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
* This argument is ignored if the Cycle covariate is not used.
*/
- @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
+ @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false)
public int MAXIMUM_CYCLE_VALUE = 500;
/**
- * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
+ * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is off]
*/
@Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false)
public byte MISMATCHES_DEFAULT_QUALITY = -1;
/**
- * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. (default is on)
+ * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. [default is on]
*/
@Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false)
public byte INSERTIONS_DEFAULT_QUALITY = 45;
/**
- * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
+ * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is on]
*/
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
public byte DELETIONS_DEFAULT_QUALITY = 45;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
index a8a765ddc..36da92b4f 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
@@ -69,15 +69,15 @@ import java.util.Map;
*
* This is a test walker used for asserting that the ReduceReads procedure is not making blatant mistakes when compressing bam files.
*
- * Input
+ * Input
*
* Two BAM files (using -I) with different read group IDs
*
- * Output
+ * Output
*
* [Output description]
*
- * Examples
+ * Examples
*
* java
* -jar GenomeAnalysisTK.jar
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 3df2aef38..bc582fd49 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -86,17 +86,17 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* shown to reduce a typical whole exome BAM file 100x. The higher the coverage, the bigger the
* savings in file size and performance of the downstream tools.
*
- * Input
+ * Input
*
* The BAM file to be compressed
*
*
- * Output
+ * Output
*
* The compressed (reduced) BAM file.
*
*
- * Examples
+ * Examples
*
* java -Xmx4g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
index 37e82a90c..9bd08a020 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
@@ -71,17 +71,17 @@ import java.util.Map;
*
* The BAM file and an optional interval list (works for WGS as well) *
* - ** A GATK Report with the coverage distribution per base * *
- ** java -Xmx4g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index 8b9b37c18..e4310588e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -75,7 +75,7 @@ import java.util.*; * * * - *Input
+ *Input
**
* A modified VCF detailing each interval by sample *
* - ** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index b1a26b7a2..6b4d1f7a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -63,6 +63,31 @@ import org.broadinstitute.sting.utils.help.HelpConstants; import java.io.PrintStream; +/** + * Outputs a list of intervals that are covered above a given threshold. + * + *The list can be used as an interval list for other walkers. Note that if the -uncovered argument is given, the tool will instead output intervals that fail the coverage threshold.
+ * + *Input
+ *+ * One or more BAM files. + *
+ * + *Output
+ *+ * List of covered (or uncovered) intervals. + *
+ * + *Example
+ *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -T FindCoveredIntervals \ + * -R ref.fasta \ + * -I my_file.bam \ + * -o output.list + *+ * + */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) @ActiveRegionTraversalParameters(extension = 0, maxRegion = 50000) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 137a1cfa5..4347a1a84 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -86,17 +86,17 @@ import java.util.*; * both single sample data and multi-sample data. * * - *Input
+ *Input
** The read data from which to make variant calls. *
* - *Output
+ *Output
** A raw, unfiltered, highly sensitive callset in VCF format. *
* - *Example generic command for multi-sample SNP calling
+ *Example generic command for multi-sample SNP calling
** java -jar GenomeAnalysisTK.jar \ * -R resources/Homo_sapiens_assembly18.fasta \ @@ -117,7 +117,7 @@ import java.util.*; * argument descriptions below. * * - *Example command for generating calls at all sites
+ *Example command for generating calls at all sites
** java -jar /path/to/GenomeAnalysisTK.jar \ * -l INFO \ @@ -128,7 +128,7 @@ import java.util.*; * --output_mode EMIT_ALL_SITES ** - *Caveats
+ *Caveats
*
* Input bam file(s) from which to make calls *
* - ** VCF file with raw, unrecalibrated SNP and indel calls. *
* - ** java * -jar GenomeAnalysisTK.jar @@ -120,7 +120,7 @@ import java.util.*; * -o output.raw.snps.indels.vcf ** - *
* 2 variant files to resolve. *
* - ** A single consensus VCF. *
* - ** java -Xmx1g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index c7d24f475..d3a13df29 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -87,7 +87,7 @@ import java.io.IOException; import java.util.*; /** - * Performs local realignment of reads based on misalignments due to the presence of indels. + * Performs local realignment of reads to correct misalignments due to the presence of indels. * ** The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases @@ -100,39 +100,46 @@ import java.util.*; * indel suitable for standard variant discovery approaches. Unlike most mappers, this walker uses the full alignment context to determine whether an * appropriate alternate reference (i.e. indel) exists. Following local realignment, the GATK tool Unified Genotyper can be used to sensitively and * specifically identify indels. - *
+ *
*There are 2 steps to the realignment process: *
- *- Determining (small) suspicious intervals which are likely in need of realignment (see the RealignerTargetCreator tool)
*- Running the realigner over those intervals (IndelRealigner)
*- * An important note: the input bam(s), reference, and known indel file(s) should be the same ones used for the RealignerTargetCreator step. *
- * Another important note: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them - * (or with reads from similar technologies). + * For more details, see http://www.broadinstitute.org/gatk/guide/article?id=38 + *
* - *Input
+ *Input
** One or more aligned BAM files and optionally one or more lists of known indels. *
* - *Output
+ *Output
** A realigned version of your input BAM file(s). *
* - *Examples
+ *Example
** java -Xmx4g -jar GenomeAnalysisTK.jar \ - * -I input.bam \ - * -R ref.fasta \ * -T IndelRealigner \ + * -R ref.fasta \ + * -I input.bam \ * -targetIntervals intervalListFromRTC.intervals \ * -o realignedBam.bam \ * [-known /path/to/indels.vcf] \ * [-compress 0] (this argument recommended to speed up the process *if* this is only a temporary file; otherwise, use the default value) ** + *Caveats
+ * + *
* A bam file to left-align. *
* - ** A left-aligned bam. *
* - ** java -Xmx3g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 1ee04e317..caeb1e8d7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -99,22 +99,22 @@ import java.util.TreeSet; * Important note 3: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them * (or with reads from similar technologies). This tool also ignores MQ0 reads and reads with consecutive indel operators in the CIGAR string. * - *Input
+ *Input
** One or more aligned BAM files and optionally one or more lists of known indels. *
* - *Output
+ *Output
** A list of target intervals to pass to the Indel Realigner. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -I input.bam \ - * -R ref.fasta \ * -T RealignerTargetCreator \ + * -R ref.fasta \ + * -I input.bam \ * -o forIndelRealigner.intervals \ * [--known /path/to/indels.vcf] *@@ -143,7 +143,7 @@ public class RealignerTargetCreator extends RodWalker> known = Collections.emptyList(); /** - * Any two SNP calls and/or high entropy positions are considered clustered when they occur no more than this many basepairs apart. + * Any two SNP calls and/or high entropy positions are considered clustered when they occur no more than this many basepairs apart. Must be > 1. */ @Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false) protected int windowSize = 10; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 54a324411..a4c1caf86 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -90,7 +90,7 @@ import java.util.*; * - In trios: If two individuals are missing, the remaining individual is phased if it is homozygous. No phasing probability is emitted.
*
*
* An VCF with genotypes recalibrated as most likely under the familial constraint and phased by descent where non ambiguous.. *
* - ** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index eb2bb62ef..bb8c14ef7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -65,17 +65,17 @@ import java.util.*; * [Functionality of this walker] * * - *Input
+ *Input
** [Input description] *
* - *Output
+ *Output
** [Output description] *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index 7f2cdd3d0..c1b484542 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -86,17 +86,17 @@ import static org.broadinstitute.sting.utils.variant.GATKVCFUtils.getVCFHeadersF * Performs physical phasing of SNP calls, based on sequencing reads. * * - *Input
+ *Input
** VCF file of SNP calls, BAM file of sequence reads. *
* - *Output
+ *Output
** Phased VCF file. *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java index d6a814ee8..6af39c0b0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java @@ -99,14 +99,14 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; * * * - *Input
+ *Input
** A BAM file to make calls on and a VCF file to use as truth validation dataset. * * You also have the option to invert the roles of the files using the command line options listed below. *
* - *Output
+ *Output
** GenotypeAndValidate has two outputs. The truth table and the optional VCF file. The truth table is a * 2x2 table correlating what was called in the dataset with the truth of the call (whether it's a true @@ -176,7 +176,7 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; * * * - *
Examples
+ *Examples
**
- * Genotypes BAM file from new technology using the VCF as a truth dataset: diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java index 5c216928b..d587c305e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -85,17 +85,17 @@ import java.util.*; * * User can additionally restrict output to a particular type of variant (SNP, Indel, etc.) * - *
* * - *Input
+ *Input
** One or more variant sets to choose from. *
* - *Output
+ *Output
** A sites-only VCF with the desired number of randomly selected sites. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index f2120213a..22425e62e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -81,7 +81,7 @@ import java.util.*; * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a * slightly lower quality level. * - *Input
+ *Input
** The input raw variants to be recalibrated. *
@@ -89,11 +89,11 @@ import java.util.*; *
* The tranches file that was generated by the VariantRecalibrator walker. * - *
Output
+ *Output
** A recalibrated VCF file in which each variant is annotated with its VQSLOD and filtered if the score is below the desired quality level. * - *
Examples
+ *Examples
** java -Xmx3g -jar GenomeAnalysisTK.jar \ * -T ApplyRecalibration \ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 57d9c219c..99d926ea5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -80,6 +80,7 @@ import java.util.*; * ** This walker is the first pass in a two-stage processing step. This walker is designed to be used in conjunction with ApplyRecalibration walker. + *
* ** The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set. @@ -91,24 +92,26 @@ import java.util.*; * error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the * probability that each call is real. The score that gets added to the INFO field of each variant is called the VQSLOD. It is * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. + *
* ** NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. + *
* - *Input
+ *Input
** The input raw variants to be recalibrated. *
* Known, truth, and training sets to be used by the algorithm. How these various sets are used is described below. * - *
Output
+ *Output
** A recalibration table file in VCF format that is used by the ApplyRecalibration walker. *
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * - *
Example
+ *Example
** java -Xmx4g -jar GenomeAnalysisTK.jar \ * -T VariantRecalibrator \ @@ -152,7 +155,7 @@ public class VariantRecalibrator extends RodWalker> resource = Collections.emptyList(); ///////////////////////////// @@ -170,7 +173,7 @@ public class VariantRecalibrator extends RodWalker Input + * Input
** A variant set to regenotype. *
* - *Output
+ *Output
** A re-genotyped VCF. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java index 5469b38c8..a16fdcaa1 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates; * [Functionality of this walker] * * - *Input
+ *Input
** [Input description] *
* - *Output
+ *Output
** [Output description] *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java index bb55ed0c5..4267c1ffd 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates; * [Functionality of this walker] * * - *Input
+ *Input
** [Input description] *
* - *Output
+ *Output
** [Output description] *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java index 9ade37019..045b21527 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates; * [Functionality of this walker] * * - *Input
+ *Input
** [Input description] *
* - *Output
+ *Output
** [Output description] *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index c93f68ef8..5a308928d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -74,10 +74,10 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "aea3d5df32a2acd400da48d06b4dbc60"); - new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35"); - new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "e71e23e7ebfbe768e59527bc62f8918d"); - new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9"); + new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "71869ddf9665773a842a9def4cc5f3c8"); + new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "cec7c644c84ef9c96aacaed604d9ec9b"); + new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "47546e03344103020e49d8037a7e0727"); + new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "d27b37f7a366c8dacca5cd2590d3c6ce"); return TestParams.getTests(TestParams.class); } diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd index dc7a08287..4a49cf932 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd @@ -19,9 +19,11 @@ Medical and Population Genetics Program Maintainer: Kiran Garimella } \references{ -GSA wiki page: http://www.broadinstitute.org/gatk +GATK website: http://www.broadinstitute.org/gatk -GATK help forum: http://www.broadinstitute.org/gatk +GATK documentation guide: http://www.broadinstitute.org/gatk/guide + +GATK help forum: http://gatkforums.broadinstitute.org } \examples{ ## get script arguments in interactive and non-interactive mode diff --git a/public/doc/README b/public/doc/README index ec5fa8500..e70ced0df 100644 --- a/public/doc/README +++ b/public/doc/README @@ -59,7 +59,7 @@ index (.fasta.fai). Instructions for preparing input files are available here: -http://www.broadinstitute.org/gsa/wiki/index.php/Preparing_input_files +http://www.broadinstitute.org/gatk/guide/article?id=1204 The bundled 'resources' directory contains an example BAM and fasta. @@ -69,7 +69,7 @@ The GATK is distributed with a few standard analyses, including PrintReads, Pileup, and DepthOfCoverage. More information on the included walkers is available here: -http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_walkers +http://www.broadinstitute.org/gatk/gatkdocs To print the reads of the included sample data, untar the package into the GenomeAnalysisTK directory and run the following command: @@ -81,6 +81,6 @@ java -jar GenomeAnalysisTK/GenomeAnalysisTK.jar \ Support ------- -Documentation for the GATK is available at http://www.broadinstitute.org/gsa/wiki. +Documentation for the GATK is available at http://www.broadinstitute.org/gatk/guide. For help using the GATK, developing analyses with the GATK, bug reports, -or feature requests, please email gsadevelopers@broadinstitute.org. +or feature requests, please visit our support forum at http://gatkforums.broadinstitute.org/ diff --git a/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java index 93b4d5e6f..d313f35ce 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java +++ b/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java @@ -42,9 +42,14 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; /** - * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them + * Validates consistency of the aligner interface + * + *Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original - * alignment from the input file. + * alignment from the input file.
+ * + *Caveat
+ *This tool requires that BWA be available on the java path.
* * @author mhanna * @version 0.1 diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 08aa5f8b3..cf11bb61c 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -370,7 +370,7 @@ public abstract class CommandLineProgram { errorPrintf("------------------------------------------------------------------------------------------%n"); errorPrintf("A GATK RUNTIME ERROR has occurred (version %s):%n", CommandLineGATK.getVersionNumber()); errorPrintf("%n"); - errorPrintf("Please visit the wiki to see if this is a known problem%n"); + errorPrintf("Please check the documentation guide to see if this is a known problem%n"); errorPrintf("If not, please post the error, with stack trace, to the GATK forum%n"); printDocumentationReference(); if ( msg == null ) // some exceptions don't have detailed messages diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index a3e19b944..a9016708b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -206,7 +206,7 @@ public class GATKArgumentCollection { * Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. * Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). */ - @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration") + @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration (intended for use with BaseRecalibrator and PrintReads)") public File BQSR_RECAL_FILE = null; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java index 362cb202e..fcae3cc68 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -41,17 +41,17 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; * [Functionality of this walker] * * - *Input
+ *Input
** [Input description] *
* - *Output
+ *Output
** [Output description] *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java index e0166ab38..41ab59845 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java @@ -37,18 +37,18 @@ import org.broadinstitute.sting.commandline.Argument; * * * - *Input
+ *Input
** BAM file(s) *
* * - *Output
+ *Output
** BAM file(s) with all reads mapping qualities reassigned *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java index c894dd801..f31313a86 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java @@ -47,18 +47,18 @@ import org.broadinstitute.sting.commandline.Argument; * * * - *Input
+ *Input
** BAM file(s) *
* * - *Output
+ *Output
** BAM file(s) with one read mapping quality selectively reassigned as desired *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 73c31ef66..6e7bc9805 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -46,7 +46,7 @@ import java.util.Map; /** - * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples + * The allele balance (fraction of ref bases over ref + alt bases) across all biallelic het-called samples */ public class AlleleBalance extends InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 826dc9f22..fa3ab885d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -55,17 +55,17 @@ import java.util.*; * VariantAnnotator is a GATK tool for annotating variant calls based on their context. * The tool is modular; new annotations can be written easily without modifying VariantAnnotator itself. * - *Input
+ *Input
** A variant set to annotate and optionally one or more BAM files. *
* - *Output
+ *Output
** An annotated VCF. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ @@ -142,7 +142,8 @@ public class VariantAnnotator extends RodWalkerimplements Ann protected List annotationsToExclude = new ArrayList (); /** - * See the -list argument to view available groups. + * If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups. + * Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs. */ @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) protected List annotationGroupsToUse = new ArrayList (); @@ -166,13 +167,13 @@ public class VariantAnnotator extends RodWalker implements Ann /** * Note that the --list argument requires a fully resolved and correct command-line to work. */ - @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") + @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit", required=false) protected Boolean LIST = false; /** * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty. */ - @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated") + @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated", required=false) protected Boolean ALWAYS_APPEND_DBSNP_ID = false; public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 2e85fe8f9..4b96dbffb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -61,7 +61,7 @@ import static java.lang.Math.log10; * Note that this walker requires all input files produced by Beagle. * * - * Example
+ *Example
** java -Xmx4000m -jar dist/GenomeAnalysisTK.jar \ * -R reffile.fasta -T BeagleOutputToVCF \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index 937c3abc0..618fda0df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -57,7 +57,7 @@ import java.util.*; * Converts the input VCF into a format accepted by the Beagle imputation/analysis program. ** - *
Input
+ *Input
** A VCF with variants to convert to Beagle format *
@@ -70,7 +70,7 @@ import java.util.*; * Optional: A file with a list of markers * * - *Examples
+ *Examples
** java -Xmx2g -jar dist/GenomeAnalysisTK.jar -L 20 \ * -R reffile.fasta -T ProduceBeagleInput \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java index 0681ebf1e..a2efa626c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java @@ -70,12 +70,12 @@ import java.io.PrintStream; * * * - *Input
+ *Input
** A BAM file containing exactly one sample. *
* - *Output
+ *Output
**
*
* * - *- -o: a OutputFormatted (recommended BED) file with the callable status covering each base
@@ -83,7 +83,7 @@ import java.io.PrintStream; *Examples
+ *Examples
** -T CallableLociWalker \ * -I my.bam \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index 3bd114aa1..61574d947 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -66,7 +66,7 @@ import java.util.*; * and/or percentage of bases covered to or beyond a threshold. * Additionally, reads and bases can be filtered by mapping or base quality score. * - *Input
+ *Input
** One or more bam files (with proper headers) to be analyzed for coverage statistics *
@@ -75,7 +75,7 @@ import java.util.*; ** (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation) *
- *Output
+ *Output
** Tables pertaining to different coverage summaries. Suffix on the table files declares the contents: *
@@ -98,7 +98,7 @@ import java.util.*; * - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java index 9a6ef61d8..2975df4a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java @@ -44,21 +44,21 @@ import java.util.List; * Walks along reference and calculates the GC content for each interval. * * - *Input
+ *Input
** A reference file *
* - *Output
+ *Output
** GC content calculations per interval. *
* - *Examples
+ *Example
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -R ref.fasta \ * -T GCContentByInterval \ + * -R ref.fasta \ * -o output.txt \ * -L input.intervals *diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java index a5a8edb0c..169c2708b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java @@ -50,17 +50,17 @@ import java.util.Collection; * CoveredByNSamplesSites is a GATK tool for filter out sites based on their coverage. * The sites that pass the filter are printed out to an intervals file. * - *Input
+ *Input
** A variant file and optionally min coverage and sample percentage values. *
* - *Output
+ *Output
** An intervals file. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index 76f5478a4..86676ca54 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -49,12 +49,12 @@ import java.io.PrintStream; * Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read * group in the input BAMs FOR ONLY THE FIRST OF PAIR READS. * - ** * - *Input
+ *Input
** Any number of BAM files *
* - *Output
+ *Output
** GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate. * @@ -82,7 +82,7 @@ import java.io.PrintStream; *
Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index de7ac3e41..0af1dbed5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -53,12 +53,12 @@ import java.util.Map; * the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate * timeframe. * - ** * - *Input
+ *Input
** Any number of BAM files *
* - *Output
+ *Output
** GATKReport containing read group, sample, library, platform, center, median insert size and median read length. * @@ -86,7 +86,7 @@ import java.util.Map; *
Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java index ccad7f0b2..a269a94bc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java @@ -49,17 +49,17 @@ import java.util.List; * * * - *Input
+ *Input
** A BAM file. *
* - *Output
+ *Output
** A human/R readable table of tab separated values with one column per sample and one row per read. *
* - *Examples
+ *Examples
** java * -jar GenomeAnalysisTK.jar diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 7ac59790c..c909eb2d5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -83,7 +83,7 @@ public class DiffEngine { DiffElement masterElt = master.getElement(name); DiffElement testElt = test.getElement(name); if ( masterElt == null && testElt == null ) { - throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name); + throw new ReviewedStingException("BUG: unexpectedly got two null elements for field: " + name); } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value // todo -- should one of these be a special MISSING item? diffs.add(new Difference(masterElt, testElt)); @@ -283,8 +283,7 @@ public class DiffEngine { // now that we have a specific list of values we want to show, display them GATKReport report = new GATKReport(); final String tableName = "differences"; - // TODO for Geraldine -- link needs to be updated below - report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3); + report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gatk/guide/article?id=1299 for more information", 3); final GATKReportTable table = report.getTable(tableName); table.addColumn("Difference"); table.addColumn("NumberOfOccurrences"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java index d1903c2bb..6b5189dfd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java @@ -68,12 +68,12 @@ import java.util.List; * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. * * - ** + *Input
+ *Input
** The DiffObjectsWalker works with BAM or VCF files. *
* - *Output
+ *Output
** The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named * nodes. Suppose I have two trees: @@ -132,6 +132,10 @@ import java.util.List; [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1
Caveat
+ *Because this is a walker, it requires that you pass a reference file. However the reference is not actually used, so it does not matter what you pass as reference.
+ * + * * @author Mark DePristo * @since 7/4/11 */ @@ -140,8 +144,7 @@ public class DiffObjects extends RodWalker{ /** * Writes out a file of the DiffEngine format: * - * TODO for Geraldine -- link needs to be updated below (and also in SelectVariants and RefSeqCodec GATK docs) - * http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine + * See http://www.broadinstitute.org/gatk/guide/article?id=1299 for details. */ @Output(doc="File to which results should be written",required=true) protected PrintStream out; @@ -169,7 +172,7 @@ public class DiffObjects extends RodWalker { @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) int MAX_OBJECTS_TO_READ = -1; - @Argument(fullName="maxRawDiffsToSummary", shortName="maxRawDiffsToSummary", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + @Argument(fullName="maxRawDiffsToSummarize", shortName="maxRawDiffsToSummarize", doc="Max. number of differences to include in the summary. -1 [default] means unlimited", required=false) int maxRawDiffsToSummary = -1; @Argument(fullName="doPairwise", shortName="doPairwise", doc="If provided, we will compute the minimum pairwise differences to summary, which can be extremely expensive", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java index e881315b9..d2f2e32b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java @@ -60,17 +60,17 @@ import java.util.List; * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * - * Input
+ *Input
** The reference, requested intervals, and any number of variant rod files. *
* - *Output
+ *Output
** A fasta file representing the requested intervals. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java index f2f5fb5fe..fb7941fec 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java @@ -48,17 +48,17 @@ import java.io.PrintStream; * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a * separate fasta sequence (named numerically in order). * - *Input
+ *Input
** The reference and requested intervals. *
* - *Output
+ *Output
** A fasta file representing the requested intervals. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index 9fbaca14e..8883523d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -38,7 +38,27 @@ import org.broadinstitute.sting.utils.help.HelpConstants; import java.io.PrintStream; /** - * Calculates basic statistics about the reference sequence itself + * Calculate basic statistics about the reference sequence itself + * + *These are very basic statistics: total number of bases and number of "regular" bases (i.e. A, C, T or G).
+ * + *Input
+ *+ * A FASTA reference file. + *
+ * + *Output
+ *+ * Base counts are written to file if an output file name is given (with -o), otherwise output to stdout. + *
+ * + *Example
+ *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -T FastaStats \ + * -R ref.fasta \ + * [-o output.txt] + **/ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class FastaStats extends RefWalker{ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 61a847f4c..c59c61803 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -55,17 +55,17 @@ import java.util.*; * VariantFiltration is a GATK tool for hard-filtering variant calls based on certain criteria. * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. * - * Input
+ *Input
** A variant set to filter. *
* - *Output
+ *Output
** A filtered VCF. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ @@ -114,7 +114,7 @@ public class VariantFiltration extends RodWalker{ * One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience * methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1"). */ - @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false) + @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info)", required=false) protected ArrayList GENOTYPE_FILTER_EXPS = new ArrayList (); /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java index 503cdb6d6..8b82e50a7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java @@ -38,17 +38,17 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * Walks over the input data set, calculating the number of bases seen for diagnostic purposes. * - * Input
+ *Input
** One or more BAM files. *
* - *Output
+ *Output
** Number of bases seen. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index 3b8eba398..e7b6df623 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -45,9 +45,42 @@ import java.util.Collections; import java.util.List; /** - * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but - * very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to. - * This was its very first use. + * Count contiguous regions in an interval list. + * + *When the GATK reads in intervals from an intervals list, any intervals that overlap each other get merged into + * a single interval spanning the original ones. For example, if you have the following intervals: + *
+ * They will be merged into a single interval: + *
- + * 20:1-2000 + *
- + * 20:1500-3000 + *
+ * + * This tool allows you to check, for a given list of intervals, how many separate intervals the GATK will actually + * distinguish at runtime. + * + * + *
- 20:1-3000
Input
+ *+ * One or more rod files containing intervals to check. + *
+ * + *Output
+ *+ * Number of separate intervals identified by GATK after merging overlapping intervals. + *
+ * + * You can use the -numOverlaps argument to find out how many cases you have of a specific number of overlaps. + * + *Example
+ *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -T CountIntervals \ + * -R ref.fasta \ + * -0 output.txt \ + * -check intervals.list + **/ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class CountIntervals extends RefWalker{ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java index f2bd791c1..d999dfebf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java @@ -42,33 +42,34 @@ import java.io.PrintStream; * Walks over the input data set, calculating the total number of covered loci for diagnostic purposes. * * - * Simplest example of a locus walker. + * This is the simplest example of a locus walker. + *
* - * - *Input
+ *Input
** One or more BAM files. *
* - *Output
+ *Output
*- * Number of loci traversed. + * Number of loci traversed. If an output file name is provided, then the result will be written to that file. + * Otherwise it will be sent to standard console output. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -R ref.fasta \ * -T CountLoci \ - * -o output.txt \ + * -R ref.fasta \ * -I input.bam \ + * -o output.txt \ * [-L input.intervals] ** */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class CountLoci extends LocusWalkerimplements TreeReducible , NanoSchedulable { - @Output(doc="Write count to this file instead of STDOUT") + @Output PrintStream out; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java index 6fb4b84d6..7279a64a4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -37,12 +38,36 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.io.PrintStream; + /** * Walks over the input data set, calculating the number of reads seen from male samples for diagnostic purposes. + * + * Input
+ *+ * One or more BAM files. + *
+ * + *Output
+ *+ * Number of reads seen from male samples. + *
+ * + *Examples
+ *+ * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -T CountMales \ + * -R ref.fasta \ + * -I samples.bam \ + * -o output.txt + **/ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountMales extends ReadWalker{ + @Output + public PrintStream out; + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { Sample sample = getSampleDB().getSample(read); return sample.getGender() == Gender.MALE ? 1 : 0; @@ -53,4 +78,8 @@ public class CountMales extends ReadWalker { public Integer reduce(Integer value, Integer sum) { return value + sum; } + + public void onTraversalDone( Integer c ) { + out.println(c); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index c01a1df89..65f82efe4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -53,22 +53,32 @@ import java.util.*; /** * Prints out counts of the number of reference ordered data objects encountered. * + * CountRods is a RODWalker, and so traverses the data by ROD. For example if the ROD passed to it is a VCF file, + * it will count the variants in the file.
* - *Input
+ *Note that this tool is different from CountRodsByRef which is a RefWalker, and so traverses the data by + * position along the reference. CountRodsByRef can count ROD elements (such as, but not limited to, variants) found + * at each position or within specific intervals if you use the -L argument (see CommandLineGATK).
+ * + *Both these tools are different from CountVariants in that they are more generic (they can also count RODs that + * are not variants) and CountVariants is more detailed, in that it computes additional statistics (type of variants + * being indels vs. SNPs etc).
+ * + *Input
** One or more rod files. *
* - *Output
+ *Output
** Number of rods seen. *
* - *Examples
+ *Example
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -R ref.fasta \ * -T CountRODs \ + * -R ref.fasta \ * -o output.txt \ * --rod input.vcf *diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java index 303f1704f..594ca239d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java @@ -43,24 +43,34 @@ import java.util.Collections; import java.util.List; /** - * Prints out counts of the number of reference ordered data objects encountered. + * Prints out counts of the number of reference ordered data objects encountered along the reference. * + *CountRodsByRef is a RefWalker, and so traverses the data by position along the reference. It counts ROD + * elements (such as, but not limited to, variants) found at each position or within specific intervals if you use + * the -L argument (see CommandLineGATK).
* - *Input
+ *Note that this tool is different from the basic CountRods, which is a RODWalker, and so traverses the data by + * ROD. For example if the ROD passed to it is a VCF file, CountRods will simply count the variants in the file.
+ * + *Both these tools are different from CountVariants in that they are more generic (they can also count RODs that + * are not variants) and CountVariants is more detailed, in that it computes additional statistics (type of variants + * being indels vs. SNPs etc).
+ * + *Input
** One or more rod files. *
* - *Output
+ *Output
** Number of rods seen. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -R ref.fasta \ * -T CountRODsByRef \ + * -R ref.fasta \ * -o output.txt \ * --rod input.vcf *diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java index 8b0646092..cfb7325a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java @@ -47,22 +47,22 @@ import java.util.Map; /** * Walks over the input data set, counting the number of read events (from the CIGAR operator) * - *Input
+ *Input
** One or more BAM files. *
* - *Output
+ *Output
*- * Number of reads events for each category + * Number of read events for each category, formatted as a GATKReport table. * - *
Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ - * -R ref.fasta \ * -T CountReadEvents \ - * -o output.grp \ + * -R ref.fasta \ * -I input.bam \ + * -o output.grp \ * [-L input.intervals] **/ @@ -70,7 +70,7 @@ import java.util.Map; @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadEvents extends ReadWalkerInput
+ *Input
** One or more variant sets to combine. *
* - *Output
+ *Output
** A combined VCF. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 65ec7a4f0..e6d3e6e94 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -60,17 +60,17 @@ import java.util.*; * place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them. * Note that this tool cannot handle anything other than bi-allelic, simple indels. Complex events are written out unchanged. * - *Input
+ *Input
** A variant set to left-align. *
* - *Output
+ *Output
** A left-aligned VCF. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index 17aaa7513..9bbf728e1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -58,17 +58,17 @@ import java.util.*; * SelectHeaders can be used for this purpose. Given a single VCF file, one or more headers can be extracted from the * file (based on a complete header name or a pattern match). * - *Input
+ *Input
** A set of VCFs. *
* - *Output
+ *Output
** A header selected VCF. *
* - *Examples
+ *Examples
** Select only the FILTER, FORMAT, and INFO headers: * java -Xmx2g -jar GenomeAnalysisTK.jar \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 9c209ae2c..f72ce3bd6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -62,20 +62,20 @@ import java.util.*; * Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a * pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP > 1000" (depth of * coverage greater than 1000x), "AF < 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are - * documented in the Using JEXL expressions section (http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions). + * documented in the Using JEXL expressions section (http://www.broadinstitute.org/gatk/guide/article?id=1255). * One can optionally include concordance or discordance tracks for use in selecting overlapping variants. * - *Input
+ *Input
** A variant set to select from. *
* - *Output
+ *Output
** A selected VCF. *
* - *Examples
+ *Examples
** Select two samples out of a VCF with many samples: * java -Xmx2g -jar GenomeAnalysisTK.jar \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index a242f9310..d11cf5aee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -60,12 +60,12 @@ import java.util.Set; * * If you are looking simply to test the adherence to the VCF specification, use --validationType NONE. * - *Input
+ *Input
** A variant set to validate. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 02089eb6c..0e2a04bf2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -55,12 +55,12 @@ import java.util.*; * default is soft-filtered by high no-call rate or low Hardy-Weinberg probability. * If you have .ped files, please first convert them to VCF format. * - *Input
+ *Input
** A validation VCF to annotate. *
* - *Output
+ *Output
** An annotated VCF. Additionally, a table like the following will be output: *
@@ -74,7 +74,7 @@ import java.util.*; ** * - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index b12f51a1e..444eb745c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -62,14 +62,13 @@ import java.util.*; * genotypes), NO-CALL (count of no-call genotypes), TYPE (the type of event), VAR (count of * non-reference genotypes), NSAMPLES (number of samples), NCALLED (number of called samples), * GQ (from the genotype field; works only for a file with a single sample), and MULTI-ALLELIC - * (is the record from a multi-allelic site). Note that this tool does not support capturing any - * GENOTYPE field values. If a VCF record is missing a value, then the tool by + * (is the record from a multi-allelic site). Note that if a VCF record is missing a value, then the tool by * default throws an error, but the special value NA can be emitted instead with * appropriate tool arguments. * * * - *Input
+ *Input
**
*
* * - *- A VCF file
@@ -77,12 +76,12 @@ import java.util.*; *Output
+ *Output
** A tab-delimited file containing the values of the requested fields in the VCF file *
* - *Examples
+ *Examples
** java -jar GenomeAnalysisTK.jar \ * -R reference.fasta diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index ffe61f76d..7c7f52803 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -62,17 +62,17 @@ import java.util.*; ** Note that there must be a Tribble feature/codec for the file format as well as an adaptor. * - *
Input
+ *Input
** A variant file to filter. *
* - *Output
+ *Output
** A VCF file. *
* - *Examples
+ *Examples
** java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ diff --git a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java index 10fb606f9..e1dd2c255 100644 --- a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java +++ b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java @@ -35,6 +35,9 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFCodec; @@ -51,12 +54,48 @@ import java.util.*; /** * - * Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants[sorted (optional)]"); - * The input files can be of type: VCF (ends in .vcf or .VCF)"); - * BCF2 (ends in .bcf or .BCF)"); - * Output file must be vcf or bcf file (.vcf or .bcf)"); - * If the input files are already sorted, the last argument can indicate that"); + * Concatenates VCF files of non-overlapped genome intervals, all with the same set of samples. + * + * + * The main purpose of this tool is to speed up the gather function when using scatter-gather parallelization. + * This tool concatenates the scattered output VCF files. It assumes that: + * - All the input VCFs (or BCFs) contain the same samples in the same order. + * - The variants in each input file are from non-overlapping (scattered) intervals. + * + * When the input files are already sorted based on the intervals start positions, use -assumeSorted. + * + * Note: Currently the tool is more efficient when working with VCFs; we will work to make it as efficient for BCFs. + * + *
+ * + *Input
+ *+ * One or more variant sets to combine. They should be of non-overlapping genome intervals and with the same samples (in the same order). + * The input files should be 'name.vcf' or 'name.VCF' or 'name.bcf' or 'name.BCF'. + * If the files are ordered according to the appearance of intervals in the ref genome, then one can use the -assumeSorted flag. + *
+ * + *Output
+ *+ * A combined VCF. The output file should be 'name.vcf' or 'name.VCF'. + * <\p> + * + * + *
Examples
+ *+ * java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants \ + * -R ref.fasta \ + * -V input1.vcf \ + * -V input2.vcf \ + * -out output.vcf \ + * -assumeSorted + *+ * + * @author Ami Levy Moonshine + * @since Jan 2012 */ + +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class CatVariants extends CommandLineProgram { // setup the logging system, used by some codecs private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger(); @@ -64,6 +103,14 @@ public class CatVariants extends CommandLineProgram { @Input(fullName = "reference", shortName = "R", doc = "genome reference file.fasta", required = true) private File refFile = null; + /** + * The VCF or BCF files to merge together + * + * CatVariants can take any number of -V arguments on the command line. Each -V argument + * will be included in the final merged output VCF. The order of arguments does not matter, but it runs more + * efficiently if they are sorted based on the intervals and the assumeSorted argument is used. + * + */ @Input(fullName="variant", shortName="V", doc="Input VCF file/s named .vcf or .bcf", required = true) private List variant = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index fb26f6c37..82ee76a81 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -45,8 +45,8 @@ import java.util.ArrayList; * * * - * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here - * http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq + * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the documentation guide here + * http://www.broadinstitute.org/gatk/guide/article?id=1329 *
*Usage
* The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 8a8c76806..e20d285e1 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -171,7 +171,7 @@ class GATKResourcesBundle extends QScript { "CEUTrio.HiSeq.WGS.b37.bestPractices.phased",b37,true,false)) // - // example call set for wiki tutorial + // example call set for documentation guide tutorial // addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf", "NA12878.HiSeq.WGS.bwa.cleaned.raw.subset", b37, true, true)) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala index 344f5fe5b..529615c24 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala @@ -31,7 +31,7 @@ import org.broadinstitute.sting.commandline.{Argument, Output, Input} /** * Basic snpEff support. - * See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator + * See: http://www.broadinstitute.org/gatk/guide/article?id=50 */ class SnpEff extends JavaCommandLineFunction { javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff" diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index 587828d1e..b05ad65c0 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -130,7 +130,7 @@ #if>
-Introduction
+Overview
${description} <#-- Create references to additional capabilities if appropriate -->