diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java index 5138ac9af..5c48417ac 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Coverage.java @@ -70,10 +70,11 @@ import java.util.Map; /** * Total (unfiltered) depth over all samples. * - * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's + *

While the sample-level (FORMAT) DP field describes the total depth of reads that passed the caller's * internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth * over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for * N samples with -dcov D is N * D + *

*/ public class Coverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 39fdcb707..7960a3ce2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -69,10 +69,15 @@ import java.util.*; /** - * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation - * being seen on only the forward or only the reverse strand) in the reads? More bias is - * indicative of false positive calls. Note that the fisher strand test may not be - * calculated for certain complex indel cases or for multi-allelic sites. + * Phred-scaled p-value using Fisher's Exact Test to detect strand bias + * + *

Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation + * being seen on only the forward or only the reverse strand) in the reads. More bias is + * indicative of false positive calls. + *

+ * + *

Caveat

+ *

The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.

*/ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { private final static Logger logger = Logger.getLogger(FisherStrand.class); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index a4b1b1b49..827e39c11 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -68,7 +68,7 @@ import java.util.Map; /** - * GC content of the reference around this site + * GC content of the reference around the given site * *

The GC content is the number of GC bases relative to the total number of bases (# GC bases / # all bases) around this site on the reference.

* diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 447569643..0a4899f1c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -61,7 +61,7 @@ import java.util.List; * User: rpoplin * Date: Nov 27, 2009 * - * A collection of the arguments that are common to both CovariateCounterWalker and TableRecalibrationWalker. + * A collection of the arguments that are used for BQSR. Used to be common to both CovariateCounterWalker and TableRecalibrationWalker. * This set of arguments will also be passed to the constructor of every Covariate when it is instantiated. */ @@ -131,14 +131,14 @@ public class RecalibrationArgumentCollection { public boolean RUN_WITHOUT_DBSNP = false; /** - * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the + * BaseRecalibrator accepts a --solid_recal_mode flag which governs how the recalibrator handles the * reads which have had the reference inserted because of color space inconsistencies. */ @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO; /** - * CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles + * BaseRecalibrator accepts a --solid_nocall_strategy flag which governs how the recalibrator handles * no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in * their color space tag can not be recalibrated. */ diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationPerformance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationPerformance.java index d0af08d90..271617059 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationPerformance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationPerformance.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.*; @@ -55,18 +56,27 @@ import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.recalibration.*; import java.io.*; /** + * Evaluate the performance of the base recalibration process + * + *

This tool aims to evaluate the results of the Base Quality Score Recalibration (BQSR) process.

+ * + *

Caveat

+ *

This tool is currently experimental. We do not provide documentation nor support for its operation.

+ * */ - +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class}) @PartitionBy(PartitionType.READ) public class RecalibrationPerformance extends RodWalker implements NanoSchedulable { - @Output(doc="Write output to this file") + @Output public PrintStream out; @Input(fullName="recal", shortName="recal", required=false, doc="The input covariates table file") diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java index b70581dd3..53b7cebaa 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/BaseCoverageDistribution.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -55,6 +56,8 @@ import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import java.io.PrintStream; import java.util.ArrayList; @@ -63,11 +66,11 @@ import java.util.LinkedList; import java.util.Map; /** - * Simple walker to plot the coverage distribution per base. + * Simple walker to plot the coverage distribution per base * *

* Features of this walker: - *

  • includes a smart counting of uncovered bases without visiting the uncovered loci.
  • + *
  • includes a smart counting of uncovered bases without visiting the uncovered loci
  • *
  • includes reads with deletions in the loci (optionally can be turned off)
  • *

    * @@ -91,10 +94,11 @@ import java.util.Map; * -fd \ * -o report.grp * - * User: carneiro - * Date: 1/27/13 - * Time: 11:16 AM + * + * @author carneiro + * @since 1/27/13 */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) public class BaseCoverageDistribution extends LocusWalker, Map>> { /** * The output GATK Report table diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index ce2869e94..ae6b56e19 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -82,7 +82,7 @@ import java.util.*; * * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions. * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias. - * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. + * This class holds the parsing methods that are shared between BaseRecalibrator and PrintReads. */ public class RecalUtils { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index 61574d947..29016af43 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -117,7 +117,7 @@ import java.util.*; // todo -- alter logarithmic scaling to spread out bins more // todo -- allow for user to set linear binning (default is logarithmic) // todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now -@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE) @PartitionBy(PartitionType.NONE) @Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java index 92034da70..506ef2c72 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/CoveredByNSamplesSites.java @@ -29,12 +29,15 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.variantcontext.VariantContext; @@ -44,12 +47,15 @@ import java.io.*; import java.util.Collection; /** - * print intervals file with all the variant sites that have "most" ( >= 90% by default) of the samples with "good" (>= 10 by default)coverage ("most" and "good" can be set in the command line). + * Print intervals file with all the variant sites for which most of the samples have good coverage * *

    - * CoveredByNSamplesSites is a GATK tool for filter out sites based on their coverage. + * CoveredByNSamplesSites is a GATK tool for filtering out sites based on their coverage. * The sites that pass the filter are printed out to an intervals file. * + * See argument defaults for what constitutes "most" samples and "good" coverage. These parameters can be modified from the command line. + *

    + * *

    Input

    *

    * A variant file and optionally min coverage and sample percentage values. @@ -60,7 +66,7 @@ import java.util.Collection; * An intervals file. *

    * - *

    Examples

    + *

    Example

    *
      * java -Xmx2g -jar GenomeAnalysisTK.jar \
      *   -R ref.fasta \
    @@ -71,7 +77,7 @@ import java.util.Collection;
      * 
    * */ - +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE_ORDERED_DATA) public class CoveredByNSamplesSites extends RodWalker implements TreeReducible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 048c7ef77..35213af34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,6 +34,8 @@ import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; @@ -41,29 +44,30 @@ import java.io.PrintStream; import java.util.*; /** - * A simple walker for performing genotype concordance calculations between two callsets. Outputs a GATK table with - * per-sample and aggregate counts and frequencies, a summary table for NRD/NRS, and a table for site allele overlaps. + * Genotype concordance (per-sample and aggregate counts and frequencies, NRD/NRS and site allele overlaps) between two callsets * *

    - * Genotype concordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles, + * GenotypeConcordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles, * and for each sample, the genotype-by-genotype counts (for instance, the number of sites at which a sample was * called homozygous reference in the EVAL callset, but homozygous variant in the COMP callset). It outputs these * counts as well as convenient proportions (such as the proportion of het calls in the EVAL which were called REF in * the COMP) and metrics (such as NRD and NRS). * - *

    INPUT

    + *

    Input

    *

    * Genotype concordance requires two callsets (as it does a comparison): an EVAL and a COMP callset, specified via - * the -eval and -comp arguments - *

    + * the -eval and -comp arguments. + * * (Optional) Jexl expressions for genotype-level filtering of EVAL or COMP genotypes, specified via the -gfe and * -cfe arguments, respectively. + *

    * - *

    OUTPUT

    - * Genotype Concordance writes a GATK report to the specified (via -o) file, consisting of multiple tables of counts + *

    Output

    + * Genotype Concordance writes a GATK report to the specified file (via -o) , consisting of multiple tables of counts * and proportions. These tables may be optionally moltenized via the -moltenize argument. * */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> { /**