Adding docs to more walkers

This commit is contained in:
Eric Banks 2011-08-16 16:26:26 -04:00
parent 335421820e
commit 946f5c53fe
4 changed files with 161 additions and 26 deletions

View File

@ -39,8 +39,7 @@ import org.simpleframework.xml.*;
public class DbsnpArgumentCollection {
/**
* A dbSNP VCF file. Variants in this track will be treated as "known" variants
* in tools using this track.
* A dbSNP VCF file.
*/
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
public RodBinding<VariantContext> dbsnp;

View File

@ -49,7 +49,34 @@ import java.util.*;
/**
* Annotates variant calls with context information. Users can specify which of the available annotations to use.
* Annotates variant calls with context information.
*
* <p>
* VariantAnnotator is a GATK tool for annotating variant calls based on their context.
* The tool is modular; new annotations can be written easily without modifying VariantAnnotator itself.
*
* <h2>Input</h2>
* <p>
* A variant set to annotate and optionally one or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* An annotated VCF.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T VariantAnnotator \
* -I input.bam \
* -o output.vcf \
* -A DepthOfCoverage
* --variant input.vcf \
* --dbsnp dbsnp.vcf
* </pre>
*
*/
@Requires(value={})
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@ -69,8 +96,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
/**
* A dbSNP VCF file from which to annotate.
*
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
*/
@ArgumentCollection
@ -101,15 +126,25 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null;
@Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false)
protected String sampleName = null;
/**
* See the -list argument to view available annotations.
*/
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
/**
* See the -list argument to view available groups.
*/
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected List<String> annotationGroupsToUse = new ArrayList<String>();
/**
* This option enables you to add annotations from one VCF to another.
*
* For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource',
* you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position.
* If multiple records in the rod overlap the given position, one is chosen arbitrarily.
*/
@Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false)
protected List<String> expressionsToUse = new ArrayList<String>();
@ -127,8 +162,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
private VariantAnnotatorEngine engine;
private Collection<VariantContext> indelBufferContext;
@ -164,12 +197,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
// add the non-VCF sample from the command-line, if applicable
if ( sampleName != null ) {
nonVCFsampleName.put(sampleName.toUpperCase(), "variant");
samples.add(sampleName.toUpperCase());
}
// if there are no valid samples, warn the user
if ( samples.size() == 0 ) {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");

View File

@ -45,6 +45,34 @@ import java.util.*;
/**
* Filters variant calls using a number of user-selectable, parameterizable criteria.
*
* <p>
* VariantFiltration is a GATK tool for hard-filtering variant calls based on certain criteria.
* Records are hard-filtered by changing the value in the FILTER field to something other than PASS.
*
* <h2>Input</h2>
* <p>
* A variant set to filter.
* </p>
*
* <h2>Output</h2>
* <p>
* A filtered VCF.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T VariantFiltration \
* -o output.vcf \
* --variant input.vcf \
* --filterExpression "AB < 0.2 || MQ0 > 50" \
* --filterName "Nov09filters" \
* --mask mask.vcf \
* --maskName InDel
* </pre>
*
*/
@Reference(window=@Window(start=-50,stop=50))
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@ -52,33 +80,65 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* Any variant which overlaps entries from the provided mask rod will be filtered.
*/
@Input(fullName="mask", doc="Input ROD mask", required=false)
public RodBinding<Feature> mask;
@Output(doc="File to which variants should be written", required=true)
protected VCFWriter writer = null;
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false)
/**
* VariantFiltration accepts any number of JEXL expressions (so you can have two named filters by using
* --filterName One --filterExpression "X < 1" --filterName Two --filterExpression "X > 2").
*/
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter", required=false)
protected ArrayList<String> FILTER_EXPS = new ArrayList<String>();
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
/**
* This name is put in the FILTER field for variants that get filtered. Note that there must be a 1-to-1 mapping between filter expressions and filter names.
*/
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters", required=false)
protected ArrayList<String> FILTER_NAMES = new ArrayList<String>();
/**
* Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead.
* VariantFiltration will add the sample-level FT tag to the FORMAT field of filtered samples (this does not affect the record's FILTER tag).
* One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience
* methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1").
*/
@Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false)
protected ArrayList<String> GENOTYPE_FILTER_EXPS = new ArrayList<String>();
/**
* Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead.
*/
@Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
protected ArrayList<String> GENOTYPE_FILTER_NAMES = new ArrayList<String>();
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]", required=false)
/**
* Works together with the --clusterWindowSize argument.
*/
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster", required=false)
protected Integer clusterSize = 3;
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
/**
* Works together with the --clusterSize argument. To disable the clustered SNP filter, set this value to less than 1.
*/
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs", required=false)
protected Integer clusterWindow = 0;
@Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
@Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered", required=false)
protected Integer MASK_EXTEND = 0;
@Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
@Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
protected String MASK_NAME = "Mask";
@Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false)
/**
* By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as PASSing.
* Use this argument to have it evaluate as failing filters instead for these cases.
*/
@Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false)
protected Boolean FAIL_MISSING_VALUES = false;
// JEXL expressions for the filters

View File

@ -36,20 +36,61 @@ import java.util.*;
/**
* General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)
*
* <p>
* Given a variant callset, it is common to calculate various quality control metrics. These metrics include the number of
* raw or filtered SNP counts; ratio of transition mutations to transversions; concordance of a particular sample's calls
* to a genotyping chip; number of singletons per sample; etc. Furthermore, it is often useful to stratify these metrics
* by various criteria like functional class (missense, nonsense, silent), whether the site is CpG site, the amino acid
* degeneracy of the site, etc. VariantEval facilitates these calculations in two ways: by providing several built-in
* evaluation and stratification modules, and by providing a framework that permits the easy development of new evaluation
* and stratification modules.
*
* <h2>Input</h2>
* <p>
* One or more variant sets to evaluate plus any number of comparison sets.
* </p>
*
* <h2>Output</h2>
* <p>
* Evaluation tables.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T VariantEval \
* -o output.eval.gatkreport \
* --eval:set1 set1.vcf \
* --eval:set2 set2.vcf \
* [--comp comp.vcf]
* </pre>
*
*/
@Reference(window=@Window(start=-50, stop=50))
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
// Output arguments
@Output
protected PrintStream out;
/**
* The variant file(s) to evaluate.
*/
@Input(fullName="eval", shortName = "eval", doc="Input evaluation file(s)", required=true)
public List<RodBinding<VariantContext>> evals;
/**
* The variant file(s) to compare against.
*/
@Input(fullName="comp", shortName = "comp", doc="Input comparison file(s)", required=false)
public List<RodBinding<VariantContext>> compsProvided = Collections.emptyList();
private List<RodBinding<VariantContext>> comps = new ArrayList<RodBinding<VariantContext>>();
/**
* dbSNP comparison VCF. By default, the dbSNP file is used to specify the set of "known" variants.
* Other sets can be specified with the -knownName (--known_names) argument.
*/
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
@ -67,6 +108,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="sample", shortName="sn", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
protected Set<String> SAMPLE_EXPRESSIONS;
/**
* List of rod tracks to be used for specifying "known" variants other than dbSNP.
*/
@Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
protected HashSet<String> KNOWN_NAMES = new HashSet<String>();
List<RodBinding<VariantContext>> knowns = new ArrayList<RodBinding<VariantContext>>();
@ -81,7 +125,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
protected Set<VariantContext.Type> typesToUse = null;
// Evaluator arguments
/**
* See the -list argument to view available modules.
*/
@Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false)
protected String[] MODULES_TO_USE = {};
@ -95,7 +141,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
protected double MIN_PHASE_QUALITY = 10.0;
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
/**
* This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false)
protected String FAMILY_STRUCTURE;
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)