Made some documentation updates & fixes
--Mostly doc block tweaks --Added @DocumentedGATKFeature to some walkers that were undocumented because they were ending up in "uncategorized". Very important for GSA: if a walker is in public or protected, it HAS to be properly tagged-in. If it's not ready for the public, it should be in private.
This commit is contained in:
parent
c813259283
commit
95a9ed853d
|
|
@ -70,10 +70,11 @@ import java.util.Map;
|
|||
/**
|
||||
* Total (unfiltered) depth over all samples.
|
||||
*
|
||||
* While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
|
||||
* <p>While the sample-level (FORMAT) DP field describes the total depth of reads that passed the caller's
|
||||
* internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
|
||||
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
|
||||
* N samples with -dcov D is N * D
|
||||
* </p>
|
||||
*/
|
||||
public class Coverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -69,10 +69,15 @@ import java.util.*;
|
|||
|
||||
|
||||
/**
|
||||
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
|
||||
* being seen on only the forward or only the reverse strand) in the reads? More bias is
|
||||
* indicative of false positive calls. Note that the fisher strand test may not be
|
||||
* calculated for certain complex indel cases or for multi-allelic sites.
|
||||
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias
|
||||
*
|
||||
* <p>Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
|
||||
* being seen on only the forward or only the reverse strand) in the reads. More bias is
|
||||
* indicative of false positive calls.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Caveat</h3>
|
||||
* <p>The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.</p>
|
||||
*/
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private final static Logger logger = Logger.getLogger(FisherStrand.class);
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ import java.util.Map;
|
|||
|
||||
|
||||
/**
|
||||
* GC content of the reference around this site
|
||||
* GC content of the reference around the given site
|
||||
*
|
||||
* <p>The GC content is the number of GC bases relative to the total number of bases (# GC bases / # all bases) around this site on the reference.</p>
|
||||
*
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ import java.util.List;
|
|||
* User: rpoplin
|
||||
* Date: Nov 27, 2009
|
||||
*
|
||||
* A collection of the arguments that are common to both CovariateCounterWalker and TableRecalibrationWalker.
|
||||
* A collection of the arguments that are used for BQSR. Used to be common to both CovariateCounterWalker and TableRecalibrationWalker.
|
||||
* This set of arguments will also be passed to the constructor of every Covariate when it is instantiated.
|
||||
*/
|
||||
|
||||
|
|
@ -131,14 +131,14 @@ public class RecalibrationArgumentCollection {
|
|||
public boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* BaseRecalibrator accepts a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
*/
|
||||
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
|
||||
/**
|
||||
* CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
* BaseRecalibrator accepts a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
* no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in
|
||||
* their color space tag can not be recalibrated.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
|
|
@ -55,18 +56,27 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.recalibration.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* Evaluate the performance of the base recalibration process
|
||||
*
|
||||
* <p>This tool aims to evaluate the results of the Base Quality Score Recalibration (BQSR) process.</p>
|
||||
*
|
||||
* <h3>Caveat</h3>
|
||||
* <p>This tool is currently experimental. We do not provide documentation nor support for its operation.</p>
|
||||
*
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public class RecalibrationPerformance extends RodWalker<Integer, Integer> implements NanoSchedulable {
|
||||
|
||||
@Output(doc="Write output to this file")
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
||||
@Input(fullName="recal", shortName="recal", required=false, doc="The input covariates table file")
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -55,6 +56,8 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
|
|||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -63,11 +66,11 @@ import java.util.LinkedList;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Simple walker to plot the coverage distribution per base.
|
||||
* Simple walker to plot the coverage distribution per base
|
||||
*
|
||||
* <p>
|
||||
* Features of this walker:
|
||||
* <li>includes a smart counting of uncovered bases without visiting the uncovered loci.</li>
|
||||
* <li>includes a smart counting of uncovered bases without visiting the uncovered loci</li>
|
||||
* <li>includes reads with deletions in the loci (optionally can be turned off)</li>
|
||||
* </p>
|
||||
*
|
||||
|
|
@ -91,10 +94,11 @@ import java.util.Map;
|
|||
* -fd \
|
||||
* -o report.grp
|
||||
* </pre>
|
||||
* User: carneiro
|
||||
* Date: 1/27/13
|
||||
* Time: 11:16 AM
|
||||
*
|
||||
* @author carneiro
|
||||
* @since 1/27/13
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
public class BaseCoverageDistribution extends LocusWalker<ArrayList<Integer>, Map<Integer, ArrayList<Long>>> {
|
||||
/**
|
||||
* The output GATK Report table
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ import java.util.*;
|
|||
*
|
||||
* This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
|
||||
* It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias.
|
||||
* This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
|
||||
* This class holds the parsing methods that are shared between BaseRecalibrator and PrintReads.
|
||||
*/
|
||||
|
||||
public class RecalUtils {
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ import java.util.*;
|
|||
// todo -- alter logarithmic scaling to spread out bins more
|
||||
// todo -- allow for user to set linear binning (default is logarithmic)
|
||||
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@By(DataSource.REFERENCE)
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE)
|
||||
|
|
|
|||
|
|
@ -29,12 +29,15 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
|
@ -44,12 +47,15 @@ import java.io.*;
|
|||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* print intervals file with all the variant sites that have "most" ( >= 90% by default) of the samples with "good" (>= 10 by default)coverage ("most" and "good" can be set in the command line).
|
||||
* Print intervals file with all the variant sites for which most of the samples have good coverage
|
||||
*
|
||||
* <p>
|
||||
* CoveredByNSamplesSites is a GATK tool for filter out sites based on their coverage.
|
||||
* CoveredByNSamplesSites is a GATK tool for filtering out sites based on their coverage.
|
||||
* The sites that pass the filter are printed out to an intervals file.
|
||||
*
|
||||
* See argument defaults for what constitutes "most" samples and "good" coverage. These parameters can be modified from the command line.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* A variant file and optionally min coverage and sample percentage values.
|
||||
|
|
@ -60,7 +66,7 @@ import java.util.Collection;
|
|||
* An intervals file.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Examples</h3>
|
||||
* <h3>Example</h3>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
|
|
@ -71,7 +77,7 @@ import java.util.Collection;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
public class CoveredByNSamplesSites extends RodWalker<GenomeLoc, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -33,6 +34,8 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
@ -41,29 +44,30 @@ import java.io.PrintStream;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A simple walker for performing genotype concordance calculations between two callsets. Outputs a GATK table with
|
||||
* per-sample and aggregate counts and frequencies, a summary table for NRD/NRS, and a table for site allele overlaps.
|
||||
* Genotype concordance (per-sample and aggregate counts and frequencies, NRD/NRS and site allele overlaps) between two callsets
|
||||
*
|
||||
* <p>
|
||||
* Genotype concordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles,
|
||||
* GenotypeConcordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles,
|
||||
* and for each sample, the genotype-by-genotype counts (for instance, the number of sites at which a sample was
|
||||
* called homozygous reference in the EVAL callset, but homozygous variant in the COMP callset). It outputs these
|
||||
* counts as well as convenient proportions (such as the proportion of het calls in the EVAL which were called REF in
|
||||
* the COMP) and metrics (such as NRD and NRS).
|
||||
*
|
||||
* <h2> INPUT </h2>
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* Genotype concordance requires two callsets (as it does a comparison): an EVAL and a COMP callset, specified via
|
||||
* the -eval and -comp arguments
|
||||
* <p>
|
||||
* the -eval and -comp arguments.
|
||||
*
|
||||
* (Optional) Jexl expressions for genotype-level filtering of EVAL or COMP genotypes, specified via the -gfe and
|
||||
* -cfe arguments, respectively.
|
||||
* </p>
|
||||
*
|
||||
* <h2> OUTPUT </h2>
|
||||
* Genotype Concordance writes a GATK report to the specified (via -o) file, consisting of multiple tables of counts
|
||||
* <h3>Output</h3>
|
||||
* Genotype Concordance writes a GATK report to the specified file (via -o) , consisting of multiple tables of counts
|
||||
* and proportions. These tables may be optionally moltenized via the -moltenize argument.
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,VariantContext>>,ConcordanceMetrics> {
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue