From f972963918e3f80df26e49f5f24b925cfc364820 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 5 Mar 2013 13:58:50 -0500 Subject: [PATCH] Fixed issues raised by Appistry QA (mostly small fixes, corrections & clarifications to GATKDocs) GATK-73 updated docs for bqsr args GATK-9 differentiate CountRODs from CountRODsByRef GATK-76 generate GATKDoc for CatVariants GATK-4 made resource arg required GATK-10 added -o, some docs to CountMales; some docs to CountLoci GATK-11 fixed by MC's -o change; straightened out the docs. GATK-77 fixed references to wiki GATK-76 Added Ami's doc block GATK-14 Added note that these annotations can only be used with VariantAnnotator GATK-15 specified required=false for two arguments GATK-23 Added documentation block GATK-33 Added documentation GATK-34 Added documentation GATK-32 Corrected arg name and docstring in DiffObjects GATK-32 Added note to DO doc about reference (required but unused) GATK-29 Added doc block to CountIntervals GATK-31 Added @Output PrintStream to enable -o GATK-35 Touched up docs GATK-36 Touched up docs, specified verbosity is optional GATK-60 Corrected GContent annot module location in gatkdocs GATK-68 touched up docs and arg docstrings GATK-16 Added note of caution about calling RODRequiringAnnotations as a group GATK-61 Added run requirements (num samples, min genotype quality) Tweaked template and generic doc block formatting (h2 to h3 titles) GATK-62 Added a caveat to HR annot Made experimental annotation hidden GATK-75 Added setup info regarding BWA GATK-22 Clarified some argument requirements GATK-48 Clarified -G doc comments GATK-67 Added arg requirement GATK-58 Added annotation and usage docs GSATDG-96 Corrected doc Updated MD5 for DiffObjectsIntegrationTests (only change is link in table title) --- .../annotator/DepthPerAlleleBySample.java | 14 +++-- .../gatk/walkers/annotator/GCContent.java | 1 - .../gatk/walkers/annotator/HardyWeinberg.java | 2 + .../walkers/annotator/HomopolymerRun.java | 6 +- .../walkers/annotator/MVLikelihoodRatio.java | 6 ++ .../walkers/annotator/SpanningDeletions.java | 4 +- .../annotator/TandemRepeatAnnotator.java | 5 ++ .../TransmissionDisequilibriumTest.java | 6 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 6 +- .../bqsr/RecalibrationArgumentCollection.java | 16 +++--- .../compression/reducereads/CompareBAM.java | 6 +- .../compression/reducereads/ReduceReads.java | 6 +- .../targets/BaseCoverageDistribution.java | 6 +- .../diagnostics/targets/DiagnoseTargets.java | 6 +- .../targets/FindCoveredIntervals.java | 25 ++++++++ .../walkers/genotyper/UnifiedGenotyper.java | 15 ++--- .../haplotypecaller/HaplotypeCaller.java | 8 +-- .../haplotypecaller/HaplotypeResolver.java | 6 +- .../gatk/walkers/indels/IndelRealigner.java | 43 ++++++++------ .../gatk/walkers/indels/LeftAlignIndels.java | 6 +- .../indels/RealignerTargetCreator.java | 12 ++-- .../walkers/phasing/PhaseByTransmission.java | 6 +- .../gatk/walkers/phasing/PhasingUtils.java | 6 +- .../walkers/phasing/ReadBackedPhasing.java | 6 +- .../validation/GenotypeAndValidate.java | 6 +- .../ValidationSiteSelector.java | 6 +- .../ApplyRecalibration.java | 6 +- .../VariantRecalibrator.java | 13 +++-- .../variantutils/RegenotypeVariants.java | 6 +- .../covariates/ExperimentalCovariate.java | 6 +- .../covariates/RequiredCovariate.java | 6 +- .../covariates/StandardCovariate.java | 6 +- .../DiffObjectsIntegrationTest.java | 8 +-- .../utils/R/gsalib/man/gsalib-package.Rd | 6 +- public/doc/README | 8 +-- .../sting/alignment/CheckAlignment.java | 9 ++- .../sting/commandline/CommandLineProgram.java | 2 +- .../arguments/GATKArgumentCollection.java | 2 +- .../sting/gatk/examples/GATKDocsExample.java | 6 +- .../filters/ReassignMappingQualityFilter.java | 6 +- .../ReassignOneMappingQualityFilter.java | 6 +- .../gatk/walkers/annotator/AlleleBalance.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 13 +++-- .../walkers/beagle/BeagleOutputToVCF.java | 2 +- .../walkers/beagle/ProduceBeagleInput.java | 4 +- .../gatk/walkers/coverage/CallableLoci.java | 6 +- .../walkers/coverage/DepthOfCoverage.java | 6 +- .../walkers/coverage/GCContentByInterval.java | 8 +-- .../diagnostics/CoveredByNSamplesSites.java | 6 +- .../diagnostics/ErrorRatePerCycle.java | 6 +- .../diagnostics/ReadGroupProperties.java | 6 +- .../diagnostics/ReadLengthDistribution.java | 6 +- .../gatk/walkers/diffengine/DiffEngine.java | 5 +- .../gatk/walkers/diffengine/DiffObjects.java | 13 +++-- .../fasta/FastaAlternateReferenceMaker.java | 6 +- .../walkers/fasta/FastaReferenceMaker.java | 6 +- .../sting/gatk/walkers/fasta/FastaStats.java | 22 ++++++- .../walkers/filters/VariantFiltration.java | 8 +-- .../sting/gatk/walkers/qc/CountBases.java | 6 +- .../sting/gatk/walkers/qc/CountIntervals.java | 39 ++++++++++++- .../sting/gatk/walkers/qc/CountLoci.java | 19 ++++--- .../sting/gatk/walkers/qc/CountMales.java | 29 ++++++++++ .../sting/gatk/walkers/qc/CountRODs.java | 18 ++++-- .../sting/gatk/walkers/qc/CountRODsByRef.java | 20 +++++-- .../gatk/walkers/qc/CountReadEvents.java | 14 ++--- .../sting/gatk/walkers/qc/CountReads.java | 6 +- .../gatk/walkers/qc/CountTerminusEvent.java | 13 +++-- .../sting/gatk/walkers/qc/FlagStat.java | 53 ++++++++--------- .../sting/gatk/walkers/qc/Pileup.java | 28 ++++++++- .../sting/gatk/walkers/qc/QCRef.java | 6 +- .../gatk/walkers/readutils/ClipReads.java | 6 +- .../gatk/walkers/readutils/PrintReads.java | 12 +++- .../validation/ValidationAmplicons.java | 6 +- .../gatk/walkers/varianteval/VariantEval.java | 6 +- .../walkers/variantutils/CombineVariants.java | 8 +-- .../variantutils/LeftAlignVariants.java | 6 +- .../walkers/variantutils/SelectHeaders.java | 6 +- .../walkers/variantutils/SelectVariants.java | 8 +-- .../variantutils/ValidateVariants.java | 4 +- .../VariantValidationAssessor.java | 6 +- .../walkers/variantutils/VariantsToTable.java | 9 ++- .../walkers/variantutils/VariantsToVCF.java | 6 +- .../sting/tools/CatVariants.java | 57 +++++++++++++++++-- .../utils/codecs/refseq/RefSeqCodec.java | 4 +- .../queue/qscripts/GATKResourcesBundle.scala | 2 +- .../queue/extensions/snpeff/SnpEff.scala | 2 +- settings/helpTemplates/generic.template.html | 2 +- 87 files changed, 550 insertions(+), 307 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 5acea12f6..9f90a1308 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -72,11 +72,11 @@ import java.util.Map; /** - * The depth of coverage of each VCF allele in this sample. + * The depth of coverage of each allele per sample * - * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this + *

The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this * sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the - * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of + * caller's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of * REF and ALT fields) is the unfiltered count of all reads that carried with them the * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the * power I have to determine the genotype of the sample at this site, while the AD tells me how many times @@ -86,10 +86,12 @@ import java.util.Map; * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted. * Because of this fact, the sum of AD may be different than the individual sample depth, especially when there are - * many non-informatice reads. - * Because the AD includes reads and bases that were filtered by the Unified Genotyper and in case of indels is based on a statistical computation, + * many non-informative reads.

+ * + *

Because the AD includes reads and bases that were filtered by the caller and in case of indels is based on a statistical computation, * one should not base assumptions about the underlying genotype based on it; - * instead, the genotype likelihoods (PLs) are what determine the genotype calls. + * instead, the genotype likelihoods (PLs) are what determine the genotype calls.

+ * */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 48b3593c5..aa5b779da 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -70,7 +70,6 @@ import java.util.Map; /** * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site */ -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(final RefMetaDataTracker tracker, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 703810025..b349be285 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -69,6 +69,8 @@ import java.util.Map; /** * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium + * + *

Requires at least 10 samples in order to run. Only genotypes with sufficient quality (>10) will be taken into account.

*/ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index c25cb6820..f9663d33e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -63,7 +63,11 @@ import java.util.List; import java.util.Map; /** - * Largest contiguous homopolymer run of the variant allele in either direction on the reference. Computed only for bi-allelic sites. + * Largest contiguous homopolymer run of the variant allele in either direction on the reference. + * + *

Computed only for bi-allelic sites.

+ * + *

Note that this annotation is no longer supported, as we have found that it does not give satisfactory results. Use at your own risk!

*/ public class HomopolymerRun extends InfoFieldAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index 19f32bae0..58d720899 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -65,10 +65,16 @@ import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; /** + * Likelihood of the site being a mendelian violation versus the likelihood of the site transmitting according to mendelian rules. + * + *

* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. + *

+ * + *

Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).

*/ public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index cede1e5ee..c3a0618ef 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -65,7 +65,9 @@ import java.util.Map; /** - * Fraction of reads containing spanning deletions at this site. + * Fraction of reads containing spanning deletions at this site + * + *

Note that this annotation is currently not compatible with HaplotypeCaller.

*/ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index 2e0e759c2..d976592cb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -66,6 +66,11 @@ import java.util.List; import java.util.Map; +/** + * Annotates variants that are composed of tandem repeats + * + *

Note that this annotation is currently not compatible with HaplotypeCaller.

+ */ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation { private static final String STR_PRESENT = "STR"; private static final String REPEAT_UNIT_KEY = "RU"; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index b3f5728a2..f29899f7f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -65,9 +65,9 @@ import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: rpoplin, lfran, ebanks - * Date: 11/14/11 + * Wittkowski transmission disequilibrium test + * + *

Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).

*/ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index e1972334b..dde49b7db 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -95,14 +95,14 @@ import java.util.List; * *

* - *

Input

+ *

Input

*

* The input read data whose base quality scores need to be assessed. *

* A database of known polymorphic sites to skip over. *

* - *

Output

+ *

Output

*

* A GATK Report file with many tables: *

    @@ -116,7 +116,7 @@ import java.util.List; * The GATK Report is intended to be easy to read by humans or computers. Check out the documentation of the GATKReport to learn how to manipulate this table. *

    * - *

    Examples

    + *

    Examples

    *
      * java -Xmx4g -jar GenomeAnalysisTK.jar \
      *   -T BaseRecalibrator \
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
    index 5ab296a5f..ee2edee5a 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
    @@ -146,38 +146,38 @@ public class RecalibrationArgumentCollection {
         public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
     
         /**
    -     * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
    +     * The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
          */
    -    @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "size of the k-mer context to be used for base mismatches", required = false)
    +    @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false)
         public int MISMATCHES_CONTEXT_SIZE = 2;
     
         /**
    -     * The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
    +     * The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
          */
    -    @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
    +    @Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false)
         public int INDELS_CONTEXT_SIZE = 3;
     
         /**
          * The cycle covariate will generate an error if it encounters a cycle greater than this value.
          * This argument is ignored if the Cycle covariate is not used.
          */
    -    @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
    +    @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false)
         public int MAXIMUM_CYCLE_VALUE = 500;
     
         /**
    -     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
    +     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is off]
          */
         @Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false)
         public byte MISMATCHES_DEFAULT_QUALITY = -1;
     
         /**
    -     * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. (default is on)
    +     * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. [default is on]
          */
         @Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false)
         public byte INSERTIONS_DEFAULT_QUALITY = 45;
     
         /**
    -     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
    +     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is on]
          */
         @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
         public byte DELETIONS_DEFAULT_QUALITY = 45;
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
    index a8a765ddc..36da92b4f 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java
    @@ -69,15 +69,15 @@ import java.util.Map;
      * 

    * This is a test walker used for asserting that the ReduceReads procedure is not making blatant mistakes when compressing bam files. *

    - *

    Input

    + *

    Input

    *

    * Two BAM files (using -I) with different read group IDs *

    - *

    Output

    + *

    Output

    *

    * [Output description] *

    - *

    Examples

    + *

    Examples

    *
      *    java
      *      -jar GenomeAnalysisTK.jar
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
    index e89158412..c2c154053 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
    @@ -86,17 +86,17 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
      * shown to reduce a typical whole exome BAM file 100x. The higher the coverage, the bigger the
      * savings in file size and performance of the downstream tools.
      *
    - * 

    Input

    + *

    Input

    *

    * The BAM file to be compressed *

    * - *

    Output

    + *

    Output

    *

    * The compressed (reduced) BAM file. * *

    - *

    Examples

    + *

    Examples

    *
      * java -Xmx4g -jar GenomeAnalysisTK.jar \
      *   -R ref.fasta \
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
    index 37e82a90c..9bd08a020 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java
    @@ -71,17 +71,17 @@ import java.util.Map;
      *  
  1. includes reads with deletions in the loci (optionally can be turned off)
  2. *

    * - *

    Input

    + *

    Input

    *

    * The BAM file and an optional interval list (works for WGS as well) *

    * - *

    Output

    + *

    Output

    *

    * A GATK Report with the coverage distribution per base * *

    - *

    Examples

    + *

    Examples

    *
      * java -Xmx4g -jar GenomeAnalysisTK.jar \
      *   -R ref.fasta \
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
    index 8b9b37c18..e4310588e 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
    @@ -75,7 +75,7 @@ import java.util.*;
      * 

    *

    *

    - *

    Input

    + *

    Input

    *

    *

      *
    • A reference file
    • @@ -84,12 +84,12 @@ import java.util.*; *
    *

    *

    - *

    Output

    + *

    Output

    *

    * A modified VCF detailing each interval by sample *

    *

    - *

    Examples

    + *

    Examples

    *
      *    java
      *      -jar GenomeAnalysisTK.jar
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
    index b1a26b7a2..6b4d1f7a8 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
    @@ -63,6 +63,31 @@ import org.broadinstitute.sting.utils.help.HelpConstants;
     
     import java.io.PrintStream;
     
    +/**
    + * Outputs a list of intervals that are covered above a given threshold.
    + *
    + * 

    The list can be used as an interval list for other walkers. Note that if the -uncovered argument is given, the tool will instead output intervals that fail the coverage threshold.

    + * + *

    Input

    + *

    + * One or more BAM files. + *

    + * + *

    Output

    + *

    + * List of covered (or uncovered) intervals. + *

    + * + *

    Example

    + *
    + * java -Xmx2g -jar GenomeAnalysisTK.jar \
    + *   -T FindCoveredIntervals \
    + *   -R ref.fasta \
    + *   -I my_file.bam \
    + *   -o output.list
    + * 
    + * + */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) @ActiveRegionTraversalParameters(extension = 0, maxRegion = 50000) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 137a1cfa5..4347a1a84 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -86,17 +86,17 @@ import java.util.*; * both single sample data and multi-sample data. *

    * - *

    Input

    + *

    Input

    *

    * The read data from which to make variant calls. *

    * - *

    Output

    + *

    Output

    *

    * A raw, unfiltered, highly sensitive callset in VCF format. *

    * - *

    Example generic command for multi-sample SNP calling

    + *

    Example generic command for multi-sample SNP calling

    *
      * java -jar GenomeAnalysisTK.jar \
      *   -R resources/Homo_sapiens_assembly18.fasta \
    @@ -117,7 +117,7 @@ import java.util.*;
      * argument descriptions below.
      * 

    * - *

    Example command for generating calls at all sites

    + *

    Example command for generating calls at all sites

    *
      * java -jar /path/to/GenomeAnalysisTK.jar \
      *   -l INFO \
    @@ -128,7 +128,7 @@ import java.util.*;
      *   --output_mode EMIT_ALL_SITES
      * 
    * - *

    Caveats

    + *

    Caveats

    *
      *
    • The system is under active and continuous development. All outputs, the underlying likelihood model, arguments, and * file formats are likely to change.
    • @@ -167,7 +167,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif * Records that are filtered in the comp track will be ignored. * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). */ - @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) + @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) public List> comps = Collections.emptyList(); public List> getCompRodBindings() { return comps; } @@ -205,7 +205,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif protected List annotationsToExclude = new ArrayList(); /** - * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. + * If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups. + * Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs. */ @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) protected String[] annotationClassesToUse = { "Standard" }; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 003b8197f..7948b93a9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -96,17 +96,17 @@ import java.util.*; /** * Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. Haplotypes are evaluated using an affine gap penalty Pair HMM. * - *

      Input

      + *

      Input

      *

      * Input bam file(s) from which to make calls *

      * - *

      Output

      + *

      Output

      *

      * VCF file with raw, unrecalibrated SNP and indel calls. *

      * - *

      Examples

      + *

      Examples

      *
        *   java
        *     -jar GenomeAnalysisTK.jar
      @@ -120,7 +120,7 @@ import java.util.*;
        *     -o output.raw.snps.indels.vcf
        * 
      * - *

      Caveats

      + *

      Caveats

      *
        *
      • The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.
      • *
      diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index c7cc84b9c..4de9488e9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -84,17 +84,17 @@ import java.util.*; * From that, it can resolve potential differences in variant calls that are inherently the same (or similar) variants. * Records are annotated with the set and status attributes. * - *

      Input

      + *

      Input

      *

      * 2 variant files to resolve. *

      * - *

      Output

      + *

      Output

      *

      * A single consensus VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx1g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
      index c7d24f475..d3a13df29 100644
      --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
      +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
      @@ -87,7 +87,7 @@ import java.io.IOException;
       import java.util.*;
       
       /**
      - * Performs local realignment of reads based on misalignments due to the presence of indels.
      + * Performs local realignment of reads to correct misalignments due to the presence of indels.
        *
        * 

      * The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases @@ -100,39 +100,46 @@ import java.util.*; * indel suitable for standard variant discovery approaches. Unlike most mappers, this walker uses the full alignment context to determine whether an * appropriate alternate reference (i.e. indel) exists. Following local realignment, the GATK tool Unified Genotyper can be used to sensitively and * specifically identify indels. - *

      + *

      *
        There are 2 steps to the realignment process: *
      1. Determining (small) suspicious intervals which are likely in need of realignment (see the RealignerTargetCreator tool)
      2. *
      3. Running the realigner over those intervals (IndelRealigner)
      4. *
      - *

      - * An important note: the input bam(s), reference, and known indel file(s) should be the same ones used for the RealignerTargetCreator step. *

      - * Another important note: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them - * (or with reads from similar technologies). + * For more details, see http://www.broadinstitute.org/gatk/guide/article?id=38 + *

      * - *

      Input

      + *

      Input

      *

      * One or more aligned BAM files and optionally one or more lists of known indels. *

      * - *

      Output

      + *

      Output

      *

      * A realigned version of your input BAM file(s). *

      * - *

      Examples

      + *

      Example

      *
        * java -Xmx4g -jar GenomeAnalysisTK.jar \
      - *   -I input.bam \
      - *   -R ref.fasta \
        *   -T IndelRealigner \
      + *   -R ref.fasta \
      + *   -I input.bam \
        *   -targetIntervals intervalListFromRTC.intervals \
        *   -o realignedBam.bam \
        *   [-known /path/to/indels.vcf] \
        *   [-compress 0]    (this argument recommended to speed up the process *if* this is only a temporary file; otherwise, use the default value)
        * 
      * + *

      Caveats

      + * + *
      • + * An important note: the input bam(s), reference, and known indel file(s) should be the same ones used for the RealignerTargetCreator step. + *
      • + * Another important note: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them + * (or with reads from similar technologies). + *
      + * * @author ebanks */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} ) @@ -168,7 +175,7 @@ public class IndelRealigner extends ReadWalker { /** * The interval list output from the RealignerTargetCreator tool using the same bam(s), reference, and known indel file(s). */ - @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) + @Input(fullName="targetIntervals", shortName="targetIntervals", doc="Intervals file output from RealignerTargetCreator", required=true) protected IntervalBinding intervalsFile = null; /** @@ -203,7 +210,7 @@ public class IndelRealigner extends ReadWalker { * push the mismatch column to another position). This parameter is just a heuristic and should be adjusted based on your particular data set. */ @Advanced - @Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false) + @Argument(fullName="entropyThreshold", shortName="entropy", doc="Percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0)", required=false) protected double MISMATCH_THRESHOLD = 0.15; /** @@ -225,21 +232,21 @@ public class IndelRealigner extends ReadWalker { * For expert users only! */ @Advanced - @Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false) + @Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="Maximum positional move in basepairs that a read can be adjusted during realignment", required=false) protected int MAX_POS_MOVE_ALLOWED = 200; /** * For expert users only! If you need to find the optimal solution regardless of running time, use a higher number. */ @Advanced - @Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false) + @Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="Max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false) protected int MAX_CONSENSUSES = 30; /** * For expert users only! If you need to find the optimal solution regardless of running time, use a higher number. */ @Advanced - @Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false) + @Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false) protected int MAX_READS_FOR_CONSENSUSES = 120; /** @@ -247,7 +254,7 @@ public class IndelRealigner extends ReadWalker { * If you need to allow more reads (e.g. with very deep coverage) regardless of memory, use a higher number. */ @Advanced - @Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false) + @Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="Max reads allowed at an interval for realignment", required=false) protected int MAX_READS = 20000; @Advanced @@ -263,7 +270,7 @@ public class IndelRealigner extends ReadWalker { * * Note that some GATK arguments do NOT work in conjunction with nWayOut (e.g. --disable_bam_indexing). */ - @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file") + @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file (not compatible with -output)") protected String N_WAY_OUT = null; @Hidden diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index ff21893f1..532d13690 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -68,17 +68,17 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * placed at multiple positions and still represent the same haplotype. While a standard convention is to place an * indel at the left-most position this doesn't always happen, so this tool can be used to left-align them. * - *

      Input

      + *

      Input

      *

      * A bam file to left-align. *

      * - *

      Output

      + *

      Output

      *

      * A left-aligned bam. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx3g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
      index 1ee04e317..caeb1e8d7 100644
      --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
      +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
      @@ -99,22 +99,22 @@ import java.util.TreeSet;
        * Important note 3: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them
        * (or with reads from similar technologies).   This tool also ignores MQ0 reads and reads with consecutive indel operators in the CIGAR string.
        *
      - * 

      Input

      + *

      Input

      *

      * One or more aligned BAM files and optionally one or more lists of known indels. *

      * - *

      Output

      + *

      Output

      *

      * A list of target intervals to pass to the Indel Realigner. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -I input.bam \
      - *   -R ref.fasta \
        *   -T RealignerTargetCreator \
      + *   -R ref.fasta \
      + *   -I input.bam \
        *   -o forIndelRealigner.intervals \
        *   [--known /path/to/indels.vcf]
        * 
      @@ -143,7 +143,7 @@ public class RealignerTargetCreator extends RodWalker> known = Collections.emptyList(); /** - * Any two SNP calls and/or high entropy positions are considered clustered when they occur no more than this many basepairs apart. + * Any two SNP calls and/or high entropy positions are considered clustered when they occur no more than this many basepairs apart. Must be > 1. */ @Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false) protected int windowSize = 10; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 54a324411..a4c1caf86 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -90,7 +90,7 @@ import java.util.*; *
    • In trios: If two individuals are missing, the remaining individual is phased if it is homozygous. No phasing probability is emitted.
    • *
    * - *

    Input

    + *

    Input

    *

    *

      *
    • A VCF variant set containing trio(s) and/or parent/child pair(s).
    • @@ -108,12 +108,12 @@ import java.util.*; *
    *

    * - *

    Output

    + *

    Output

    *

    * An VCF with genotypes recalibrated as most likely under the familial constraint and phased by descent where non ambiguous.. *

    * - *

    Examples

    + *

    Examples

    *
      * java -Xmx2g -jar GenomeAnalysisTK.jar \
      *   -R ref.fasta \
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java
    index eb2bb62ef..bb8c14ef7 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java
    @@ -65,17 +65,17 @@ import java.util.*;
      * [Functionality of this walker]
      * 

    *

    - *

    Input

    + *

    Input

    *

    * [Input description] *

    *

    - *

    Output

    + *

    Output

    *

    * [Output description] *

    *

    - *

    Examples

    + *

    Examples

    *
      *    java
      *      -jar GenomeAnalysisTK.jar
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
    index 7f2cdd3d0..c1b484542 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
    @@ -86,17 +86,17 @@ import static org.broadinstitute.sting.utils.variant.GATKVCFUtils.getVCFHeadersF
      * Performs physical phasing of SNP calls, based on sequencing reads.
      * 

    * - *

    Input

    + *

    Input

    *

    * VCF file of SNP calls, BAM file of sequence reads. *

    * - *

    Output

    + *

    Output

    *

    * Phased VCF file. *

    * - *

    Examples

    + *

    Examples

    *
      *    java
      *      -jar GenomeAnalysisTK.jar
    diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java
    index d6a814ee8..6af39c0b0 100644
    --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java
    +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java
    @@ -99,14 +99,14 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel;
      *  

    * * - *

    Input

    + *

    Input

    *

    * A BAM file to make calls on and a VCF file to use as truth validation dataset. * * You also have the option to invert the roles of the files using the command line options listed below. *

    * - *

    Output

    + *

    Output

    *

    * GenotypeAndValidate has two outputs. The truth table and the optional VCF file. The truth table is a * 2x2 table correlating what was called in the dataset with the truth of the call (whether it's a true @@ -176,7 +176,7 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; * * * - *

    Examples

    + *

    Examples

    *
      *
    1. * Genotypes BAM file from new technology using the VCF as a truth dataset: diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java index 5c216928b..d587c305e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -85,17 +85,17 @@ import java.util.*; * * User can additionally restrict output to a particular type of variant (SNP, Indel, etc.) * - *

      Input

      + *

      Input

      *

      * One or more variant sets to choose from. *

      * - *

      Output

      + *

      Output

      *

      * A sites-only VCF with the desired number of randomly selected sites. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
      index f2120213a..22425e62e 100644
      --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
      +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
      @@ -81,7 +81,7 @@ import java.util.*;
        * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
        * slightly lower quality level.
        *
      - * 

      Input

      + *

      Input

      *

      * The input raw variants to be recalibrated. *

      @@ -89,11 +89,11 @@ import java.util.*; *

      * The tranches file that was generated by the VariantRecalibrator walker. * - *

      Output

      + *

      Output

      *

      * A recalibrated VCF file in which each variant is annotated with its VQSLOD and filtered if the score is below the desired quality level. * - *

      Examples

      + *

      Examples

      *
        * java -Xmx3g -jar GenomeAnalysisTK.jar \
        *   -T ApplyRecalibration \
      diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
      index 57d9c219c..99d926ea5 100644
      --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
      +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
      @@ -80,6 +80,7 @@ import java.util.*;
        *
        * 

      * This walker is the first pass in a two-stage processing step. This walker is designed to be used in conjunction with ApplyRecalibration walker. + *

      * *

      * The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set. @@ -91,24 +92,26 @@ import java.util.*; * error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the * probability that each call is real. The score that gets added to the INFO field of each variant is called the VQSLOD. It is * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. + *

      * *

      * NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. + *

      * - *

      Input

      + *

      Input

      *

      * The input raw variants to be recalibrated. *

      * Known, truth, and training sets to be used by the algorithm. How these various sets are used is described below. * - *

      Output

      + *

      Output

      *

      * A recalibration table file in VCF format that is used by the ApplyRecalibration walker. *

      * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * - *

      Example

      + *

      Example

      *
        * java -Xmx4g -jar GenomeAnalysisTK.jar \
        *   -T VariantRecalibrator \
      @@ -152,7 +155,7 @@ public class VariantRecalibrator extends RodWalker> resource = Collections.emptyList();
       
           /////////////////////////////
      @@ -170,7 +173,7 @@ public class VariantRecalibrator extends RodWalkerInput
      + * 

      Input

      *

      * A variant set to regenotype. *

      * - *

      Output

      + *

      Output

      *

      * A re-genotyped VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
      index 5469b38c8..a16fdcaa1 100644
      --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
      +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
      @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
        * [Functionality of this walker]
        * 

      *

      - *

      Input

      + *

      Input

      *

      * [Input description] *

      *

      - *

      Output

      + *

      Output

      *

      * [Output description] *

      *

      - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
      index bb55ed0c5..4267c1ffd 100644
      --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
      +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
      @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
        * [Functionality of this walker]
        * 

      *

      - *

      Input

      + *

      Input

      *

      * [Input description] *

      *

      - *

      Output

      + *

      Output

      *

      * [Output description] *

      *

      - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
      index 9ade37019..045b21527 100644
      --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
      +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
      @@ -53,17 +53,17 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
        * [Functionality of this walker]
        * 

      *

      - *

      Input

      + *

      Input

      *

      * [Input description] *

      *

      - *

      Output

      + *

      Output

      *

      * [Output description] *

      *

      - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
      index c93f68ef8..5a308928d 100644
      --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
      +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java
      @@ -74,10 +74,10 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
       
           @DataProvider(name = "data")
           public Object[][] createData() {
      -        new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "aea3d5df32a2acd400da48d06b4dbc60");
      -        new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
      -        new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "e71e23e7ebfbe768e59527bc62f8918d");
      -        new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
      +        new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "71869ddf9665773a842a9def4cc5f3c8");
      +        new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "cec7c644c84ef9c96aacaed604d9ec9b");
      +        new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "47546e03344103020e49d8037a7e0727");
      +        new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "d27b37f7a366c8dacca5cd2590d3c6ce");
               return TestParams.getTests(TestParams.class);
           }
       
      diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
      index dc7a08287..4a49cf932 100644
      --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
      +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
      @@ -19,9 +19,11 @@ Medical and Population Genetics Program
       Maintainer: Kiran Garimella
       }
       \references{
      -GSA wiki page: http://www.broadinstitute.org/gatk
      +GATK website: http://www.broadinstitute.org/gatk
       
      -GATK help forum: http://www.broadinstitute.org/gatk
      +GATK documentation guide: http://www.broadinstitute.org/gatk/guide
      +
      +GATK help forum: http://gatkforums.broadinstitute.org
       }
       \examples{
       ## get script arguments in interactive and non-interactive mode
      diff --git a/public/doc/README b/public/doc/README
      index ec5fa8500..e70ced0df 100644
      --- a/public/doc/README
      +++ b/public/doc/README
      @@ -59,7 +59,7 @@ index (.fasta.fai).
       
       Instructions for preparing input files are available here:
       
      -http://www.broadinstitute.org/gsa/wiki/index.php/Preparing_input_files
      +http://www.broadinstitute.org/gatk/guide/article?id=1204
       
       The bundled 'resources' directory  contains an example BAM and fasta.
       
      @@ -69,7 +69,7 @@ The GATK is distributed with a few standard analyses, including PrintReads,
       Pileup, and DepthOfCoverage.  More information on the included walkers is
       available here:
       
      -http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_walkers
      +http://www.broadinstitute.org/gatk/gatkdocs
       
       To print the reads of the included sample data, untar the package into
       the GenomeAnalysisTK directory and run the following command:
      @@ -81,6 +81,6 @@ java -jar GenomeAnalysisTK/GenomeAnalysisTK.jar \
       
       Support
       -------
      -Documentation for the GATK is available at http://www.broadinstitute.org/gsa/wiki.  
      +Documentation for the GATK is available at http://www.broadinstitute.org/gatk/guide.
       For help using the GATK, developing analyses with the GATK, bug reports, 
      -or feature requests, please email gsadevelopers@broadinstitute.org.
      +or feature requests, please visit our support forum at http://gatkforums.broadinstitute.org/
      diff --git a/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java b/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java
      index 93b4d5e6f..d313f35ce 100644
      --- a/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java
      +++ b/public/java/src/org/broadinstitute/sting/alignment/CheckAlignment.java
      @@ -42,9 +42,14 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
       import java.util.Iterator;
       
       /**
      - * Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them
      + * Validates consistency of the aligner interface
      + *
      + * 

      Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them * of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original - * alignment from the input file. + * alignment from the input file.

      + * + *

      Caveat

      + *

      This tool requires that BWA be available on the java path.

      * * @author mhanna * @version 0.1 diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 08aa5f8b3..cf11bb61c 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -370,7 +370,7 @@ public abstract class CommandLineProgram { errorPrintf("------------------------------------------------------------------------------------------%n"); errorPrintf("A GATK RUNTIME ERROR has occurred (version %s):%n", CommandLineGATK.getVersionNumber()); errorPrintf("%n"); - errorPrintf("Please visit the wiki to see if this is a known problem%n"); + errorPrintf("Please check the documentation guide to see if this is a known problem%n"); errorPrintf("If not, please post the error, with stack trace, to the GATK forum%n"); printDocumentationReference(); if ( msg == null ) // some exceptions don't have detailed messages diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index a3e19b944..a9016708b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -206,7 +206,7 @@ public class GATKArgumentCollection { * Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. * Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). */ - @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration") + @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration (intended for use with BaseRecalibrator and PrintReads)") public File BQSR_RECAL_FILE = null; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java index 362cb202e..fcae3cc68 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -41,17 +41,17 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; * [Functionality of this walker] *

      * - *

      Input

      + *

      Input

      *

      * [Input description] *

      * - *

      Output

      + *

      Output

      *

      * [Output description] *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java
      index e0166ab38..41ab59845 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java
      @@ -37,18 +37,18 @@ import org.broadinstitute.sting.commandline.Argument;
        *  

      * * - *

      Input

      + *

      Input

      *

      * BAM file(s) *

      * * - *

      Output

      + *

      Output

      *

      * BAM file(s) with all reads mapping qualities reassigned *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java
      index c894dd801..f31313a86 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java
      @@ -47,18 +47,18 @@ import org.broadinstitute.sting.commandline.Argument;
        *  

      * * - *

      Input

      + *

      Input

      *

      * BAM file(s) *

      * * - *

      Output

      + *

      Output

      *

      * BAM file(s) with one read mapping quality selectively reassigned as desired *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
      index 73c31ef66..6e7bc9805 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
      @@ -46,7 +46,7 @@ import java.util.Map;
       
       
       /**
      - * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
      + * The allele balance (fraction of ref bases over ref + alt bases) across all biallelic het-called samples
        */
       public class AlleleBalance extends InfoFieldAnnotation {
       
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
      index 826dc9f22..fa3ab885d 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
      @@ -55,17 +55,17 @@ import java.util.*;
        * VariantAnnotator is a GATK tool for annotating variant calls based on their context.
        * The tool is modular; new annotations can be written easily without modifying VariantAnnotator itself.
        *
      - * 

      Input

      + *

      Input

      *

      * A variant set to annotate and optionally one or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      * An annotated VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      @@ -142,7 +142,8 @@ public class VariantAnnotator extends RodWalker implements Ann
           protected List annotationsToExclude = new ArrayList();
       
           /**
      -     * See the -list argument to view available groups.
      +     * If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
      +     * Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs.
            */
           @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
           protected List annotationGroupsToUse = new ArrayList();
      @@ -166,13 +167,13 @@ public class VariantAnnotator extends RodWalker implements Ann
           /**
            * Note that the --list argument requires a fully resolved and correct command-line to work.
            */
      -    @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
      +    @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit", required=false)
           protected Boolean LIST = false;
       
           /**
            * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
            */
      -    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated")
      +    @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated", required=false)
           protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
           public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
       
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
      index 2e85fe8f9..4b96dbffb 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
      @@ -61,7 +61,7 @@ import static java.lang.Math.log10;
        * Note that this walker requires all input files produced by Beagle.
        *
        *
      - * 

      Example

      + *

      Example

      *
        *     java -Xmx4000m -jar dist/GenomeAnalysisTK.jar \
        *      -R reffile.fasta -T BeagleOutputToVCF \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
      index 937c3abc0..618fda0df 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
      @@ -57,7 +57,7 @@ import java.util.*;
        *  Converts the input VCF into a format accepted by the Beagle imputation/analysis program.
        * 

      * - *

      Input

      + *

      Input

      *

      * A VCF with variants to convert to Beagle format *

      @@ -70,7 +70,7 @@ import java.util.*; * Optional: A file with a list of markers *

      * - *

      Examples

      + *

      Examples

      *
        *     java -Xmx2g -jar dist/GenomeAnalysisTK.jar -L 20 \
        *      -R reffile.fasta -T ProduceBeagleInput \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
      index 0681ebf1e..a2efa626c 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
      @@ -70,12 +70,12 @@ import java.io.PrintStream;
        * 
        * 

      *

      - *

      Input

      + *

      Input

      *

      * A BAM file containing exactly one sample. *

      *

      - *

      Output

      + *

      Output

      *

      *

        *
      • -o: a OutputFormatted (recommended BED) file with the callable status covering each base
      • @@ -83,7 +83,7 @@ import java.io.PrintStream; *
      *

      *

      - *

      Examples

      + *

      Examples

      *
        *     -T CallableLociWalker \
        *     -I my.bam \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
      index 3bd114aa1..61574d947 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
      @@ -66,7 +66,7 @@ import java.util.*;
        * and/or percentage of bases covered to or beyond a threshold.
        * Additionally, reads and bases can be filtered by mapping or base quality score.
        *
      - * 

      Input

      + *

      Input

      *

      * One or more bam files (with proper headers) to be analyzed for coverage statistics *

      @@ -75,7 +75,7 @@ import java.util.*; *

      * (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation) *

      - *

      Output

      + *

      Output

      *

      * Tables pertaining to different coverage summaries. Suffix on the table files declares the contents: *

      @@ -98,7 +98,7 @@ import java.util.*; * - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java
      index 9a6ef61d8..2975df4a5 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java
      @@ -44,21 +44,21 @@ import java.util.List;
        * Walks along reference and calculates the GC content for each interval.
        *
        *
      - * 

      Input

      + *

      Input

      *

      * A reference file *

      * - *

      Output

      + *

      Output

      *

      * GC content calculations per interval. *

      * - *

      Examples

      + *

      Example

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -R ref.fasta \
        *   -T GCContentByInterval \
      + *   -R ref.fasta \
        *   -o output.txt \
        *   -L input.intervals
        * 
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java index a5a8edb0c..169c2708b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java @@ -50,17 +50,17 @@ import java.util.Collection; * CoveredByNSamplesSites is a GATK tool for filter out sites based on their coverage. * The sites that pass the filter are printed out to an intervals file. * - *

      Input

      + *

      Input

      *

      * A variant file and optionally min coverage and sample percentage values. *

      * - *

      Output

      + *

      Output

      *

      * An intervals file. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
      index 76f5478a4..86676ca54 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
      @@ -49,12 +49,12 @@ import java.io.PrintStream;
        * Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
        * group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
        *
      - * 

      Input

      + *

      Input

      *

      * Any number of BAM files *

      * - *

      Output

      + *

      Output

      *

      * GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate. * @@ -82,7 +82,7 @@ import java.io.PrintStream; *

      *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java
      index de7ac3e41..0af1dbed5 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java
      @@ -53,12 +53,12 @@ import java.util.Map;
        * the median statistics are well determined.  It is safe to run it WG and it'll finish in an appropriate
        * timeframe.
        *
      - * 

      Input

      + *

      Input

      *

      * Any number of BAM files *

      * - *

      Output

      + *

      Output

      *

      * GATKReport containing read group, sample, library, platform, center, median insert size and median read length. * @@ -86,7 +86,7 @@ import java.util.Map; *

      *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java
      index ccad7f0b2..a269a94bc 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java
      @@ -49,17 +49,17 @@ import java.util.List;
        *  

      * * - *

      Input

      + *

      Input

      *

      * A BAM file. *

      * - *

      Output

      + *

      Output

      *

      * A human/R readable table of tab separated values with one column per sample and one row per read. *

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
      index 7ac59790c..c909eb2d5 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java
      @@ -83,7 +83,7 @@ public class DiffEngine {
                   DiffElement masterElt = master.getElement(name);
                   DiffElement testElt = test.getElement(name);
                   if ( masterElt == null && testElt == null ) {
      -                throw new ReviewedStingException("BUG: unexceptedly got two null elements for field: " + name);
      +                throw new ReviewedStingException("BUG: unexpectedly got two null elements for field: " + name);
                   } else if ( masterElt == null || testElt == null ) { // if either is null, we are missing a value
                       // todo -- should one of these be a special MISSING item?
                       diffs.add(new Difference(masterElt, testElt));
      @@ -283,8 +283,7 @@ public class DiffEngine {
               // now that we have a specific list of values we want to show, display them
               GATKReport report = new GATKReport();
               final String tableName = "differences";
      -        // TODO for Geraldine -- link needs to be updated below
      -        report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
      +        report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gatk/guide/article?id=1299 for more information", 3);
               final GATKReportTable table = report.getTable(tableName);
               table.addColumn("Difference");
               table.addColumn("NumberOfOccurrences");
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java
      index d1903c2bb..6b5189dfd 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java
      @@ -68,12 +68,12 @@ import java.util.List;
        *      The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them.  This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others.
        * 

      * - *

      Input

      + *

      Input

      *

      * The DiffObjectsWalker works with BAM or VCF files. *

      * - *

      Output

      + *

      Output

      *

      * The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named * nodes. Suppose I have two trees: @@ -132,6 +132,10 @@ import java.util.List; [testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1

      * + *

      Caveat

      + *

      Because this is a walker, it requires that you pass a reference file. However the reference is not actually used, so it does not matter what you pass as reference.

      + * + * * @author Mark DePristo * @since 7/4/11 */ @@ -140,8 +144,7 @@ public class DiffObjects extends RodWalker { /** * Writes out a file of the DiffEngine format: * - * TODO for Geraldine -- link needs to be updated below (and also in SelectVariants and RefSeqCodec GATK docs) - * http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine + * See http://www.broadinstitute.org/gatk/guide/article?id=1299 for details. */ @Output(doc="File to which results should be written",required=true) protected PrintStream out; @@ -169,7 +172,7 @@ public class DiffObjects extends RodWalker { @Argument(fullName="maxObjectsToRead", shortName="motr", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) int MAX_OBJECTS_TO_READ = -1; - @Argument(fullName="maxRawDiffsToSummary", shortName="maxRawDiffsToSummary", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false) + @Argument(fullName="maxRawDiffsToSummarize", shortName="maxRawDiffsToSummarize", doc="Max. number of differences to include in the summary. -1 [default] means unlimited", required=false) int maxRawDiffsToSummary = -1; @Argument(fullName="doPairwise", shortName="doPairwise", doc="If provided, we will compute the minimum pairwise differences to summary, which can be extremely expensive", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java index e881315b9..d2f2e32b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java @@ -60,17 +60,17 @@ import java.util.List; * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * - *

      Input

      + *

      Input

      *

      * The reference, requested intervals, and any number of variant rod files. *

      * - *

      Output

      + *

      Output

      *

      * A fasta file representing the requested intervals. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java
      index f2f5fb5fe..fb7941fec 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java
      @@ -48,17 +48,17 @@ import java.io.PrintStream;
        * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
        * separate fasta sequence (named numerically in order).
        *
      - * 

      Input

      + *

      Input

      *

      * The reference and requested intervals. *

      * - *

      Output

      + *

      Output

      *

      * A fasta file representing the requested intervals. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java
      index 9fbaca14e..8883523d9 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java
      @@ -38,7 +38,27 @@ import org.broadinstitute.sting.utils.help.HelpConstants;
       import java.io.PrintStream;
       
       /**
      - * Calculates basic statistics about the reference sequence itself
      + * Calculate basic statistics about the reference sequence itself
      + *
      + * 

      These are very basic statistics: total number of bases and number of "regular" bases (i.e. A, C, T or G).

      + * + *

      Input

      + *

      + * A FASTA reference file. + *

      + * + *

      Output

      + *

      + * Base counts are written to file if an output file name is given (with -o), otherwise output to stdout. + *

      + * + *

      Example

      + *
      + * java -Xmx2g -jar GenomeAnalysisTK.jar \
      + *   -T FastaStats \
      + *   -R ref.fasta \
      + *   [-o output.txt]
      + * 
      */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class FastaStats extends RefWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 61a847f4c..c59c61803 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -55,17 +55,17 @@ import java.util.*; * VariantFiltration is a GATK tool for hard-filtering variant calls based on certain criteria. * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. * - *

      Input

      + *

      Input

      *

      * A variant set to filter. *

      * - *

      Output

      + *

      Output

      *

      * A filtered VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      @@ -114,7 +114,7 @@ public class VariantFiltration extends RodWalker {
            * One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience
            * methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1").
            */
      -    @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false)
      +    @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info)", required=false)
           protected ArrayList GENOTYPE_FILTER_EXPS = new ArrayList();
       
           /**
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java
      index 503cdb6d6..8b82e50a7 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountBases.java
      @@ -38,17 +38,17 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
       /**
        * Walks over the input data set, calculating the number of bases seen for diagnostic purposes.
        *
      - * 

      Input

      + *

      Input

      *

      * One or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      * Number of bases seen. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
      index 3b8eba398..e7b6df623 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java
      @@ -45,9 +45,42 @@ import java.util.Collections;
       import java.util.List;
       
       /**
      - * Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but
      - * very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to.
      - * This was its very first use.
      + * Count contiguous regions in an interval list.
      + *
      + * 

      When the GATK reads in intervals from an intervals list, any intervals that overlap each other get merged into + * a single interval spanning the original ones. For example, if you have the following intervals: + *

      • + * 20:1-2000 + *
      • + * 20:1500-3000 + *
      + * They will be merged into a single interval: + *
      • 20:1-3000
      + * + * This tool allows you to check, for a given list of intervals, how many separate intervals the GATK will actually + * distinguish at runtime. + *

      + * + *

      Input

      + *

      + * One or more rod files containing intervals to check. + *

      + * + *

      Output

      + *

      + * Number of separate intervals identified by GATK after merging overlapping intervals. + *

      + * + * You can use the -numOverlaps argument to find out how many cases you have of a specific number of overlaps. + * + *

      Example

      + *
      + * java -Xmx2g -jar GenomeAnalysisTK.jar \
      + *   -T CountIntervals \
      + *   -R ref.fasta \
      + *   -0 output.txt \
      + *   -check intervals.list
      + * 
      */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class CountIntervals extends RefWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java index f2bd791c1..d999dfebf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java @@ -42,33 +42,34 @@ import java.io.PrintStream; * Walks over the input data set, calculating the total number of covered loci for diagnostic purposes. * *

      - * Simplest example of a locus walker. + * This is the simplest example of a locus walker. + *

      * - * - *

      Input

      + *

      Input

      *

      * One or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      - * Number of loci traversed. + * Number of loci traversed. If an output file name is provided, then the result will be written to that file. + * Otherwise it will be sent to standard console output. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -R ref.fasta \
        *   -T CountLoci \
      - *   -o output.txt \
      + *   -R ref.fasta \
        *   -I input.bam \
      + *   -o output.txt \
        *   [-L input.intervals]
        * 
      * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class CountLoci extends LocusWalker implements TreeReducible, NanoSchedulable { - @Output(doc="Write count to this file instead of STDOUT") + @Output PrintStream out; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java index 6fb4b84d6..7279a64a4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMales.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -37,12 +38,36 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.io.PrintStream; + /** * Walks over the input data set, calculating the number of reads seen from male samples for diagnostic purposes. + * + *

      Input

      + *

      + * One or more BAM files. + *

      + * + *

      Output

      + *

      + * Number of reads seen from male samples. + *

      + * + *

      Examples

      + *
      + * java -Xmx2g -jar GenomeAnalysisTK.jar \
      + *   -T CountMales \
      + *   -R ref.fasta \
      + *   -I samples.bam \
      + *   -o output.txt
      + * 
      */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountMales extends ReadWalker { + @Output + public PrintStream out; + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { Sample sample = getSampleDB().getSample(read); return sample.getGender() == Gender.MALE ? 1 : 0; @@ -53,4 +78,8 @@ public class CountMales extends ReadWalker { public Integer reduce(Integer value, Integer sum) { return value + sum; } + + public void onTraversalDone( Integer c ) { + out.println(c); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index c01a1df89..65f82efe4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -53,22 +53,32 @@ import java.util.*; /** * Prints out counts of the number of reference ordered data objects encountered. * + *

      CountRods is a RODWalker, and so traverses the data by ROD. For example if the ROD passed to it is a VCF file, + * it will count the variants in the file.

      * - *

      Input

      + *

      Note that this tool is different from CountRodsByRef which is a RefWalker, and so traverses the data by + * position along the reference. CountRodsByRef can count ROD elements (such as, but not limited to, variants) found + * at each position or within specific intervals if you use the -L argument (see CommandLineGATK).

      + * + *

      Both these tools are different from CountVariants in that they are more generic (they can also count RODs that + * are not variants) and CountVariants is more detailed, in that it computes additional statistics (type of variants + * being indels vs. SNPs etc).

      + * + *

      Input

      *

      * One or more rod files. *

      * - *

      Output

      + *

      Output

      *

      * Number of rods seen. *

      * - *

      Examples

      + *

      Example

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -R ref.fasta \
        *   -T CountRODs \
      + *   -R ref.fasta \
        *   -o output.txt \
        *   --rod input.vcf
        * 
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java index 303f1704f..594ca239d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java @@ -43,24 +43,34 @@ import java.util.Collections; import java.util.List; /** - * Prints out counts of the number of reference ordered data objects encountered. + * Prints out counts of the number of reference ordered data objects encountered along the reference. * + *

      CountRodsByRef is a RefWalker, and so traverses the data by position along the reference. It counts ROD + * elements (such as, but not limited to, variants) found at each position or within specific intervals if you use + * the -L argument (see CommandLineGATK).

      * - *

      Input

      + *

      Note that this tool is different from the basic CountRods, which is a RODWalker, and so traverses the data by + * ROD. For example if the ROD passed to it is a VCF file, CountRods will simply count the variants in the file.

      + * + *

      Both these tools are different from CountVariants in that they are more generic (they can also count RODs that + * are not variants) and CountVariants is more detailed, in that it computes additional statistics (type of variants + * being indels vs. SNPs etc).

      + * + *

      Input

      *

      * One or more rod files. *

      * - *

      Output

      + *

      Output

      *

      * Number of rods seen. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -R ref.fasta \
        *   -T CountRODsByRef \
      + *   -R ref.fasta \
        *   -o output.txt \
        *   --rod input.vcf
        * 
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java index 8b0646092..cfb7325a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadEvents.java @@ -47,22 +47,22 @@ import java.util.Map; /** * Walks over the input data set, counting the number of read events (from the CIGAR operator) * - *

      Input

      + *

      Input

      *

      * One or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      - * Number of reads events for each category + * Number of read events for each category, formatted as a GATKReport table. * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      - *   -R ref.fasta \
        *   -T CountReadEvents \
      - *   -o output.grp \
      + *   -R ref.fasta \
        *   -I input.bam \
      + *   -o output.grp \
        *   [-L input.intervals]
        * 
      */ @@ -70,7 +70,7 @@ import java.util.Map; @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadEvents extends ReadWalker> , Map>> { - @Output (doc = "GATKReport table output") + @Output PrintStream out; public Map> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java index 1a3984014..825fcac90 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java @@ -44,17 +44,17 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * --read-filter command line argument). Simplest example of a read-backed analysis. * * - *

      Input

      + *

      Input

      *

      * One or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      * Number of reads seen. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java
      index 40b78588f..54562aa43 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java
      @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
       
       import net.sf.samtools.CigarElement;
       import net.sf.samtools.CigarOperator;
      +import org.broadinstitute.sting.commandline.Output;
       import org.broadinstitute.sting.gatk.CommandLineGATK;
       import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
       import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
      @@ -39,22 +40,23 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
       import org.broadinstitute.sting.utils.help.HelpConstants;
       import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
       
      +import java.io.PrintStream;
       import java.util.List;
       
       /**
        * Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
        *
      - * 

      Input

      + *

      Input

      *

      * One or more BAM files. *

      * - *

      Output

      + *

      Output

      *

      * Number of reads ending in each category. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      @@ -67,6 +69,9 @@ import java.util.List;
       @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
       @Requires({DataSource.READS, DataSource.REFERENCE})
       public class CountTerminusEvent extends ReadWalker, Pair> {
      +    @Output
      +    public PrintStream out;
      +
           public Pair map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
               List cigarElements = read.getCigar().getCigarElements();
       
      @@ -94,6 +99,6 @@ public class CountTerminusEvent extends ReadWalker, Pair result) {
      -        System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
      +        out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
           }
       }
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/FlagStat.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/FlagStat.java
      index d0a3f3508..17fb4e322 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/FlagStat.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/FlagStat.java
      @@ -41,36 +41,31 @@ import java.io.PrintStream;
       import java.text.DecimalFormat;
       import java.text.NumberFormat;
       
      -
      -/*
      - * Copyright (c) 2009 The Broad Institute
      - *
      - * Permission is hereby granted, free of charge, to any person
      - * obtaining a copy of this software and associated documentation
      - * files (the "Software"), to deal in the Software without
      - * restriction, including without limitation the rights to use,
      - * copy, modify, merge, publish, distribute, sublicense, and/or sell
      - * copies of the Software, and to permit persons to whom the
      - * Software is furnished to do so, subject to the following
      - * conditions:
      - *
      - * The above copyright notice and this permission notice shall be
      - * included in all copies or substantial portions of the Software.
      - *
      - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
      - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
      - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
      - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      - * OTHER DEALINGS IN THE SOFTWARE.
      - */
      -
       /**
      - * A reimplementation of the 'samtools flagstat' subcommand in the GATK.  Walks
      - * over all input data, accumulating statistics such as total number of reads,
      - * reads with QC failure flag set, number of duplicates, percentage mapped, etc.
      + * A reimplementation of the 'samtools flagstat' subcommand in the GATK
      + *
      + * 

      This tool walks over all input data, accumulating statistics such as total number of reads, + * reads with QC failure flag set, number of duplicates, percentage mapped, etc.

      + * + *

      Input

      + *

      + * A BAM file containing the sequence data. + *

      + * + *

      Output

      + *

      + * Resulting stats are written to file if an output file name is given (with -o), otherwise output to stdout. + *

      + * + *

      Example

      + *
      + * java -Xmx2g -jar GenomeAnalysisTK.jar \
      + *   -T FlagStat \
      + *   -R ref.fasta \
      + *   -I reads.bam \
      + *   [-o output.txt]
      + * 
      + * * @author aaron */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java index 0790f2ced..bc98c670a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java @@ -49,11 +49,33 @@ import java.util.Collections; import java.util.List; /** - * Prints the alignment in something similar to the samtools pileup format. Each line represents a genomic position, + * Emulates the samtools pileup command to print aligned reads + * + *

      Prints the alignment in something similar to the samtools pileup format. Each line represents a genomic position, * consisting of chromosome name, coordinate, reference base, read bases, and read qualities. * - * Associated command: + * Emulated command: * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] + * + *

      Input

      + *

      + * A BAM file and the interval to print. + *

      + * + *

      Output

      + *

      + * Formatted pileup-style alignment of reads. + *

      + * + *

      Example

      + *
      + * java -Xmx2g -jar GenomeAnalysisTK.jar \
      + *   -T Pileup \
      + *   -R ref.fasta \
      + *   -I aligned_reads.bam \
      + *   -o output.txt
      + * 
      + * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class Pileup extends LocusWalker implements TreeReducible, NanoSchedulable { @@ -68,7 +90,7 @@ public class Pileup extends LocusWalker implements TreeReducibl * and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality. These per * read items are delimited with an '@' character. */ - @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output") + @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output", required=false) public boolean SHOW_VERBOSE = false; @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java index 395945f03..48bd6feba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java @@ -44,17 +44,17 @@ import java.io.PrintStream; * Quality control for the reference fasta * * - *

      Input

      + *

      Input

      *

      * One reference file only. And optionally -L intervals *

      * - *

      Output

      + *

      Output

      *

      * If ok, nothing, else will throw an exception at the site where there's been a problem *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java
      index f7b125828..739da5a98 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java
      @@ -88,12 +88,12 @@ import java.util.regex.Pattern;
        *
        * 

      * - *

      Input

      + *

      Input

      *

      * Any number of BAM files. *

      * - *

      Output

      + *

      Output

      *

      * A new BAM file containing all of the reads from the input BAMs with the user-specified clipping * operation applied to each read. @@ -145,7 +145,7 @@ import java.util.regex.Pattern; *

      *

      * - *

      Examples

      + *

      Examples

      *
        *     -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
        *     -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java
      index 8a1178574..475f7a25d 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/PrintReads.java
      @@ -56,18 +56,24 @@ import java.util.*;
        * PrintReads can dynamically merge the contents of multiple input BAM files, resulting
        * in merged output sorted in coordinate order.  Can also optionally filter reads based on the
        * --read_filter command line argument.
      + * 

      * - *

      Input

      + *

      + * Note that when PrintReads is used as part of the Base Quality Score Recalibration workflow, + * it takes the --BQSR engine argument, which is listed under Inherited Arguments > CommandLineGATK below. + *

      + * + *

      Input

      *

      * One or more bam files. *

      * - *

      Output

      + *

      Output

      *

      * A single processed bam file. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
      index 45c5fe090..c75997e67 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
      @@ -65,13 +65,13 @@ import java.util.List;
        * reasons why the site may fail validation (nearby variation, for example).
        * 

      * - *

      Input

      + *

      Input

      *

      * Requires a VCF containing alleles to design amplicons towards, a VCF of variants to mask out of the amplicons, and an * interval list defining the size of the amplicons around the sites to be validated *

      * - *

      Output

      + *

      Output

      *

      * Output is a FASTA-formatted file with some modifications at probe sites. For instance: *

      @@ -100,7 +100,7 @@ import java.util.List;
        * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
        * 

      * - *

      Examples

      + *

      Examples

      *
        *    java
        *      -jar GenomeAnalysisTK.jar
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
      index a3e480bd0..06fa455be 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
      @@ -77,12 +77,12 @@ import java.util.*;
        * evaluation and stratification modules, and by providing a framework that permits the easy development of new evaluation
        * and stratification modules.
        *
      - * 

      Input

      + *

      Input

      *

      * One or more variant sets to evaluate plus any number of comparison sets. *

      * - *

      Output

      + *

      Output

      *

      * Evaluation tables detailing the results of the eval modules which were applied. * For example: @@ -103,7 +103,7 @@ import java.util.*; *

      *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
      index e5fe46a07..436a973df 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
      @@ -68,7 +68,7 @@ import java.util.*;
        * can be exacted using JEXL expressions on the set attribute using SelectVariants.  If you want to extract just
        * the records in common between two VCFs, you would first run CombineVariants on the two files to generate a single
        * VCF and then run SelectVariants to extract the common records with -select 'set == "Intersection"', as worked out
      - * in the detailed example on the wiki.
      + * in the detailed example in the documentation guide.
        *
        * Note that CombineVariants supports multi-threaded parallelism (8/15/12).  This is particularly useful
        * when converting from VCF to BCF2, which can be expensive.  In this case each thread spends CPU time
      @@ -83,17 +83,17 @@ import java.util.*;
        *      max QUAL, which resulted in sometime strange downstream confusion
    2. * * - *

      Input

      + *

      Input

      *

      * One or more variant sets to combine. *

      * - *

      Output

      + *

      Output

      *

      * A combined VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
      index 65ec7a4f0..e6d3e6e94 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java
      @@ -60,17 +60,17 @@ import java.util.*;
        * place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
        * Note that this tool cannot handle anything other than bi-allelic, simple indels.  Complex events are written out unchanged.
        *
      - * 

      Input

      + *

      Input

      *

      * A variant set to left-align. *

      * - *

      Output

      + *

      Output

      *

      * A left-aligned VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
      index 17aaa7513..9bbf728e1 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java
      @@ -58,17 +58,17 @@ import java.util.*;
        * SelectHeaders can be used for this purpose. Given a single VCF file, one or more headers can be extracted from the
        * file (based on a complete header name or a pattern match).
        * 

      - *

      Input

      + *

      Input

      *

      * A set of VCFs. *

      *

      - *

      Output

      + *

      Output

      *

      * A header selected VCF. *

      *

      - *

      Examples

      + *

      Examples

      *
        * Select only the FILTER, FORMAT, and INFO headers:
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
      index 9c209ae2c..f72ce3bd6 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
      @@ -62,20 +62,20 @@ import java.util.*;
        * Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a
        * pattern match).  Variants can be further selected by specifying criteria for inclusion, i.e. "DP > 1000" (depth of
        * coverage greater than 1000x), "AF < 0.25" (sites with allele frequency less than 0.25).  These JEXL expressions are
      - * documented in the Using JEXL expressions section (http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions).
      + * documented in the Using JEXL expressions section (http://www.broadinstitute.org/gatk/guide/article?id=1255).
        * One can optionally include concordance or discordance tracks for use in selecting overlapping variants.
        *
      - * 

      Input

      + *

      Input

      *

      * A variant set to select from. *

      * - *

      Output

      + *

      Output

      *

      * A selected VCF. *

      * - *

      Examples

      + *

      Examples

      *
        * Select two samples out of a VCF with many samples:
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
      index a242f9310..d11cf5aee 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
      @@ -60,12 +60,12 @@ import java.util.Set;
        *
        * If you are looking simply to test the adherence to the VCF specification, use --validationType NONE.
        *
      - * 

      Input

      + *

      Input

      *

      * A variant set to validate. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
      index 02089eb6c..0e2a04bf2 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
      @@ -55,12 +55,12 @@ import java.util.*;
        * default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
        * If you have .ped files, please first convert them to VCF format.
        *
      - * 

      Input

      + *

      Input

      *

      * A validation VCF to annotate. *

      * - *

      Output

      + *

      Output

      *

      * An annotated VCF. Additionally, a table like the following will be output: *

      @@ -74,7 +74,7 @@ import java.util.*;
        * 
      *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
      index b12f51a1e..444eb745c 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
      @@ -62,14 +62,13 @@ import java.util.*;
        * genotypes), NO-CALL (count of no-call genotypes), TYPE (the type of event), VAR (count of
        * non-reference genotypes), NSAMPLES (number of samples), NCALLED (number of called samples),
        * GQ (from the genotype field; works only for a file with a single sample), and MULTI-ALLELIC
      - * (is the record from a multi-allelic site).  Note that this tool does not support capturing any
      - * GENOTYPE field values.  If a VCF record is missing a value, then the tool by
      + * (is the record from a multi-allelic site).  Note that if a VCF record is missing a value, then the tool by
        * default throws an error, but the special value NA can be emitted instead with
        * appropriate tool arguments.
        *
        * 

      * - *

      Input

      + *

      Input

      *

      *

        *
      • A VCF file
      • @@ -77,12 +76,12 @@ import java.util.*; *
      *

      * - *

      Output

      + *

      Output

      *

      * A tab-delimited file containing the values of the requested fields in the VCF file *

      * - *

      Examples

      + *

      Examples

      *
        *     java -jar GenomeAnalysisTK.jar \
        *     -R reference.fasta
      diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
      index ffe61f76d..7c7f52803 100644
      --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
      +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java
      @@ -62,17 +62,17 @@ import java.util.*;
        * 

      * Note that there must be a Tribble feature/codec for the file format as well as an adaptor. * - *

      Input

      + *

      Input

      *

      * A variant file to filter. *

      * - *

      Output

      + *

      Output

      *

      * A VCF file. *

      * - *

      Examples

      + *

      Examples

      *
        * java -Xmx2g -jar GenomeAnalysisTK.jar \
        *   -R ref.fasta \
      diff --git a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java
      index 10fb606f9..e1dd2c255 100644
      --- a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java
      +++ b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java
      @@ -35,6 +35,9 @@ import org.broadinstitute.sting.commandline.Argument;
       import org.broadinstitute.sting.commandline.Input;
       import org.broadinstitute.sting.commandline.Output;
       import org.broadinstitute.sting.commandline.CommandLineProgram;
      +import org.broadinstitute.sting.gatk.CommandLineGATK;
      +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
      +import org.broadinstitute.sting.utils.help.HelpConstants;
       import org.broadinstitute.variant.bcf2.BCF2Codec;
       import org.broadinstitute.sting.utils.collections.Pair;
       import org.broadinstitute.variant.vcf.VCFCodec;
      @@ -51,12 +54,48 @@ import java.util.*;
       
       /**
        *
      - * Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants    [sorted (optional)]");
      - * The input files can be of type: VCF (ends in .vcf or .VCF)");
      - *                                 BCF2 (ends in .bcf or .BCF)");
      - * Output file must be vcf or bcf file (.vcf or .bcf)");
      - * If the input files are already sorted, the last argument can indicate that");
      + * Concatenates VCF files of non-overlapped genome intervals, all with the same set of samples.
      + *
      + * 

      + * The main purpose of this tool is to speed up the gather function when using scatter-gather parallelization. + * This tool concatenates the scattered output VCF files. It assumes that: + * - All the input VCFs (or BCFs) contain the same samples in the same order. + * - The variants in each input file are from non-overlapping (scattered) intervals. + * + * When the input files are already sorted based on the intervals start positions, use -assumeSorted. + * + * Note: Currently the tool is more efficient when working with VCFs; we will work to make it as efficient for BCFs. + * + *

      + * + *

      Input

      + *

      + * One or more variant sets to combine. They should be of non-overlapping genome intervals and with the same samples (in the same order). + * The input files should be 'name.vcf' or 'name.VCF' or 'name.bcf' or 'name.BCF'. + * If the files are ordered according to the appearance of intervals in the ref genome, then one can use the -assumeSorted flag. + *

      + * + *

      Output

      + *

      + * A combined VCF. The output file should be 'name.vcf' or 'name.VCF'. + * <\p> + * + * + *

      Examples

      + *
      + * java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants \
      + *    -R ref.fasta \
      + *    -V input1.vcf \
      + *    -V input2.vcf \
      + *    -out output.vcf \
      + *    -assumeSorted
      + * 
      + * + * @author Ami Levy Moonshine + * @since Jan 2012 */ + +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class CatVariants extends CommandLineProgram { // setup the logging system, used by some codecs private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger(); @@ -64,6 +103,14 @@ public class CatVariants extends CommandLineProgram { @Input(fullName = "reference", shortName = "R", doc = "genome reference file .fasta", required = true) private File refFile = null; + /** + * The VCF or BCF files to merge together + * + * CatVariants can take any number of -V arguments on the command line. Each -V argument + * will be included in the final merged output VCF. The order of arguments does not matter, but it runs more + * efficiently if they are sorted based on the intervals and the assumeSorted argument is used. + * + */ @Input(fullName="variant", shortName="V", doc="Input VCF file/s named .vcf or .bcf", required = true) private List variant = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index fb26f6c37..82ee76a81 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -45,8 +45,8 @@ import java.util.ArrayList; *

      * *

      - * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here - * http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq + * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the documentation guide here + * http://www.broadinstitute.org/gatk/guide/article?id=1329 *

      *

      Usage

      * The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 8a8c76806..e20d285e1 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -171,7 +171,7 @@ class GATKResourcesBundle extends QScript { "CEUTrio.HiSeq.WGS.b37.bestPractices.phased",b37,true,false)) // - // example call set for wiki tutorial + // example call set for documentation guide tutorial // addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf", "NA12878.HiSeq.WGS.bwa.cleaned.raw.subset", b37, true, true)) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala index 344f5fe5b..529615c24 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala @@ -31,7 +31,7 @@ import org.broadinstitute.sting.commandline.{Argument, Output, Input} /** * Basic snpEff support. - * See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator + * See: http://www.broadinstitute.org/gatk/guide/article?id=50 */ class SnpEff extends JavaCommandLineFunction { javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff" diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index 587828d1e..b05ad65c0 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -130,7 +130,7 @@
      -

      Introduction

      +

      Overview

      ${description} <#-- Create references to additional capabilities if appropriate -->