Fixed typos and made some argument docs improvements

This commit is contained in:
Geraldine Van der Auwera 2015-07-29 18:57:59 -04:00
parent bb4c9fa1d3
commit 875c7ffa1a
2 changed files with 72 additions and 44 deletions

View File

@ -53,7 +53,7 @@ import java.util.*;
* Annotate variant calls with context information
*
* <p>
* This tool is designed to annotate variant calls based on their context (ass opposed to functional annotation).
* This tool is designed to annotate variant calls based on their context (as opposed to functional annotation).
* Various annotation modules are available; see the
* <a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_VariantAnnotator.php#VariantAnnotations">documentation</a>
* for a complete list.
@ -95,9 +95,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
/**
* The INFO field will be annotated with information on the most biologically significant effect
* listed in the SnpEff output file for each variant.
* listed for each variant in the SnpEff file.
*/
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file from which to get annotations", required=false)
public RodBinding<VariantContext> snpEffFile;
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
@ -114,7 +114,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
* Records that are filtered in the comp track will be ignored. Note that 'dbSNP' has been special-cased
* (see the --dbsnp argument).
*/
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
@ -127,7 +127,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
* '-E my_resource.AC' (-E is short for --expression, also documented on this page). In the resulting output
* VCF, any records for which there is a record at the same position in the resource file will be annotated with
* 'my_resource.AC=N'. Note that if there are multiple records in the resource file that overlap the given
* position, one is chosen randomly.
* position, one is chosen randomly. Note also that this does not currently check for allele concordance;
* the match is based on position only.
*/
@Input(fullName="resource", shortName = "resource", doc="External resource VCF file", required=false)
public List<RodBinding<VariantContext>> resources = Collections.emptyList();
@ -184,20 +185,25 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
protected Boolean USE_ALL_ANNOTATIONS = false;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work. As an alternative, you can use ListAnnotations (see Help Utilities).
* Note that the --list argument requires a fully resolved and correct command-line to work. As an alternative,
* you can use ListAnnotations (see Help Utilities).
*/
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit", required=false)
protected Boolean LIST = false;
/**
* By default, the dbSNP ID is added only when the ID field in the variant VCF is empty (not already annotated).
* This argument allows you to override that behavior. This is used in conjuction with the -dbsnp argument.
* By default, a dbSNP ID is added only when the ID field in the variant record is empty (not already annotated).
* This argument allows you to override that behavior, and appends the new ID to the existing one. This is used
* in conjunction with the -dbsnp argument.
*/
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="Append the dbSNP ID even when the variant VCF already has the ID field populated", required=false)
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="Add dbSNP ID even if one is already present", required=false)
protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality threshold in order to annotate mendelian violation ratio")
/**
* The genotype quality (GQ) threshold above which the mendelian violation ratio should be annotated.
*/
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="GQ threshold for annotating MV ratio")
public double minGenotypeQualityP = 0.0;
private VariantAnnotatorEngine engine;

View File

@ -57,9 +57,9 @@ import java.util.*;
* Combine variant records from different sources
*
* <p>CombineVariants reads in variants records from separate ROD (Reference-Ordered Data) sources and combines them into
* a single VCF. Any (unique) name can be used to bind your ROD and any number of sources can be input. This tool aims
* to fulfill two main possible use cases, reflected by the two combination options (MERGE and UNION), for merging
* records at the variant level (the first 8 fields of the VCF) or at the genotype level (the rest).</p>
* a single VCF. Any number of sources can be input. This tool aims to fulfill two main possible use cases, reflected
* by the two combination options (MERGE and UNION), for merging records at the variant level (the first 8 fields of
* the VCF) or at the genotype level (the rest).</p>
*
* <ul>
* <li><b>MERGE:</b> combines multiple variant records present at the same site in the different input sources into a
@ -71,6 +71,13 @@ import java.util.*;
* It uses the priority list (if provided) to emit a single record instance at every position represented in the input RODs.</li>
* </ul>
*
* <p>By default, the input sets will be named variants, variants2, variants3, and so on. You can override this by
* providing an explicit name tag for each input, using the syntax " -V:format,name". Each input tagged in this
* way will be labeled as such in the output (i.e., set=name rather than set=variants2). For example, you could specify
* a set of control samples as " -V:vcf,control my_control_samples.vcf", and the resulting VCF records would contain
* the annotation "set=control" in the INFO field. It is strongly recommended to provide explicit names in this way
* when a rod priority list is provided.</p>
*
* <p>CombineVariants will emit a record for every site that was present in any of your input VCF files, and will annotate
* (in the set attribute in the INFO field) whether the record had a PASS or FILTER status in each input ROD . In effect,
* CombineVariants always produces a union of the input VCFs. However, any part of the Venn of the merged VCFs
@ -136,20 +143,8 @@ import java.util.*;
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-50,stop=50))
public class CombineVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
/**
* The VCF files to merge together
*
* variants can take any number of arguments on the command line. Each -V argument
* will be included in the final merged output VCF. If no explicit name is provided,
* the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc.
* The user can override this by providing an explicit name -V:name,vcf for each -V argument,
* and each named argument will be labeled as such in the output (i.e., set=name rather than
* set=variants2). The order of arguments does not matter unless except for the naming, so
* if you provide an rod priority list and no explicit names than variants, variants2, etc
* are technically order dependent. It is strongly recommended to provide explicit names when
* a rod priority list is provided.
*/
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
@Input(fullName="variant", shortName = "V", doc="VCF files to merge together", required=true)
public List<RodBindingCollection<VariantContext>> variantCollections;
final private List<RodBinding<VariantContext>> variants = new ArrayList<>();
@ -167,48 +162,75 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
/**
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
* Refers to the merging priority behavior described in the tool documentation regarding the choice of which record
* gets emitted when taking the union of variants that contain genotypes. The list must be passed as a
* comma-separated string listing the names of the variant input files. The list must be complete and include all
* variant inputs that are being provided to the tool. Use name tags for best results.
*/
@Argument(fullName="rod_priority_list", shortName="priority", doc="A comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted", required=false)
@Argument(fullName="rod_priority_list", shortName="priority", doc="Ordered list specifying priority for merging", required=false)
public String PRIORITY_STRING = null;
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Emit interesting sites requiring complex compatibility merging to file", required=false)
public boolean printComplexMerges = false;
@Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF", required=false)
/**
* If enabled, this flag causes filtered variants (i.e. variant records where the FILTER field is populated by
* something other than PASS or a dot) to be omitted from the output.
*/
@Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="Treat filtered variants as uncalled", required=false)
public boolean filteredAreUncalled = false;
/**
* Used to generate a sites-only file.
* If this flag is enabled, the INFO, FORMAT and sample-level (genotype) fields will not be emitted to the output file.
*/
@Argument(fullName="minimalVCF", shortName="minimalVCF", doc="If true, then the output VCF will contain no INFO or genotype FORMAT fields", required=false)
@Argument(fullName="minimalVCF", shortName="minimalVCF", doc="Emit a sites-only file", required=false)
public boolean minimalVCF = false;
@Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the combining procedure", required=false)
/**
* Exclude sites that do not contain any called ALT alleles in the merged callset. The evaluation is made after the
* merging procedure is complete.
*/
@Argument(fullName="excludeNonVariants", shortName="env", doc="Exclude sites where no variation is present after merging", required=false)
public boolean EXCLUDE_NON_VARIANTS = false;
/**
* Set to 'null' if you don't want the set field emitted.
* Key used in the INFO key=value tag emitted describing which set(s) the combined record came from
* (e.g. set=control). This provides the option to override the default naming, so instead of set=control you could
* have it be origin=control, or any other word you want that is not already an INFO field attribute. Set this to
* 'null' if you don't want the set attribute emitted at all.
*/
@Argument(fullName="setKey", shortName="setKey", doc="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from", required=false)
@Argument(fullName="setKey", shortName="setKey", doc="Key name for the set attribute", required=false)
public String SET_KEY = "set";
/**
* This option allows the user to perform a simple merge (concatenation) to combine the VCFs, drastically reducing the runtime.
* This option allows you to perform a simple merge (concatenation) to combine the VCFs, drastically reducing
* runtime. Note that in many cases where you think you want to use this option, you may want to check out the
* CatVariants tool instead, because CatVariants provides the same functionality, but does so even more efficiently.
*/
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls", required=false)
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="Assume input VCFs have identical sample sets and disjoint calls", required=false)
public boolean ASSUME_IDENTICAL_SAMPLES = false;
@Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if the variant is present in at least N input files.", required=false)
/**
* Sites that are present in fewer than this number of inputs will be ignored. This is a convenient way to build
* a collection of common variants and exclude rare variants.
*/
@Argument(fullName="minimumN", shortName="minN", doc="Minimum number of input files the site must be observed in to be included", required=false)
public int minimumN = 1;
/**
* This option allows the suppression of the command line in the VCF header. This is most often usefully when combining variants for dozens or hundreds of smaller VCFs.
* By default, this tool writes the command line that was used in the header of the output VCF file. This flag
* enables you to override that behavior . This is most often useful when combining variants for dozens or
* hundreds of smaller VCFs iteratively, to avoid cluttering the header with a lot of command lines.
*/
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false)
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="Do not output the command line to the header", required=false)
public boolean SUPPRESS_COMMAND_LINE_HEADER = false;
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
/**
* By default, the INFO field of the merged variant record only contains the INFO field attributes for which all
* original overlapping records had the same values. Discordant attributes are therefore discarded. This flag allows you to
* override that behavior and simply copy over the INFO field contents of whichever record had the highest AC value.
*/
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="Use the INFO content of the record with the highest AC", required=false)
public boolean MERGE_INFO_WITH_MAX_AC = false;
private List<String> priority = null;
@ -224,7 +246,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES);
if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance");
} else
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites-only output option");
validateAnnotateUnionArguments();
@ -233,7 +255,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
if (genotypeMergeOption == null && !ASSUME_IDENTICAL_SAMPLES) {
if (!sampleNamesAreUnique)
throw new UserException("Duplicate sample names were discovered but no genotypemergeoption was supplied. " +
"To combine samples without merging specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " +
"To combine samples without merging, specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " +
"without specified priority is unsupported, but can be achieved by specifying --genotypemergeoption UNSORTED.");
else
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;