Fixed typos and made some argument docs improvements
This commit is contained in:
parent
bb4c9fa1d3
commit
875c7ffa1a
|
|
@ -53,7 +53,7 @@ import java.util.*;
|
|||
* Annotate variant calls with context information
|
||||
*
|
||||
* <p>
|
||||
* This tool is designed to annotate variant calls based on their context (ass opposed to functional annotation).
|
||||
* This tool is designed to annotate variant calls based on their context (as opposed to functional annotation).
|
||||
* Various annotation modules are available; see the
|
||||
* <a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_VariantAnnotator.php#VariantAnnotations">documentation</a>
|
||||
* for a complete list.
|
||||
|
|
@ -95,9 +95,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
|
||||
/**
|
||||
* The INFO field will be annotated with information on the most biologically significant effect
|
||||
* listed in the SnpEff output file for each variant.
|
||||
* listed for each variant in the SnpEff file.
|
||||
*/
|
||||
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
|
||||
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file from which to get annotations", required=false)
|
||||
public RodBinding<VariantContext> snpEffFile;
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
* Records that are filtered in the comp track will be ignored. Note that 'dbSNP' has been special-cased
|
||||
* (see the --dbsnp argument).
|
||||
*/
|
||||
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
|
||||
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
|
||||
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
|
||||
|
||||
|
|
@ -127,7 +127,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
* '-E my_resource.AC' (-E is short for --expression, also documented on this page). In the resulting output
|
||||
* VCF, any records for which there is a record at the same position in the resource file will be annotated with
|
||||
* 'my_resource.AC=N'. Note that if there are multiple records in the resource file that overlap the given
|
||||
* position, one is chosen randomly.
|
||||
* position, one is chosen randomly. Note also that this does not currently check for allele concordance;
|
||||
* the match is based on position only.
|
||||
*/
|
||||
@Input(fullName="resource", shortName = "resource", doc="External resource VCF file", required=false)
|
||||
public List<RodBinding<VariantContext>> resources = Collections.emptyList();
|
||||
|
|
@ -184,20 +185,25 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
protected Boolean USE_ALL_ANNOTATIONS = false;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work. As an alternative, you can use ListAnnotations (see Help Utilities).
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work. As an alternative,
|
||||
* you can use ListAnnotations (see Help Utilities).
|
||||
*/
|
||||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit", required=false)
|
||||
protected Boolean LIST = false;
|
||||
|
||||
/**
|
||||
* By default, the dbSNP ID is added only when the ID field in the variant VCF is empty (not already annotated).
|
||||
* This argument allows you to override that behavior. This is used in conjuction with the -dbsnp argument.
|
||||
* By default, a dbSNP ID is added only when the ID field in the variant record is empty (not already annotated).
|
||||
* This argument allows you to override that behavior, and appends the new ID to the existing one. This is used
|
||||
* in conjunction with the -dbsnp argument.
|
||||
*/
|
||||
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="Append the dbSNP ID even when the variant VCF already has the ID field populated", required=false)
|
||||
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="Add dbSNP ID even if one is already present", required=false)
|
||||
protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
|
||||
public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
|
||||
|
||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality threshold in order to annotate mendelian violation ratio")
|
||||
/**
|
||||
* The genotype quality (GQ) threshold above which the mendelian violation ratio should be annotated.
|
||||
*/
|
||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="GQ threshold for annotating MV ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
|
|
|||
|
|
@ -57,9 +57,9 @@ import java.util.*;
|
|||
* Combine variant records from different sources
|
||||
*
|
||||
* <p>CombineVariants reads in variants records from separate ROD (Reference-Ordered Data) sources and combines them into
|
||||
* a single VCF. Any (unique) name can be used to bind your ROD and any number of sources can be input. This tool aims
|
||||
* to fulfill two main possible use cases, reflected by the two combination options (MERGE and UNION), for merging
|
||||
* records at the variant level (the first 8 fields of the VCF) or at the genotype level (the rest).</p>
|
||||
* a single VCF. Any number of sources can be input. This tool aims to fulfill two main possible use cases, reflected
|
||||
* by the two combination options (MERGE and UNION), for merging records at the variant level (the first 8 fields of
|
||||
* the VCF) or at the genotype level (the rest).</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li><b>MERGE:</b> combines multiple variant records present at the same site in the different input sources into a
|
||||
|
|
@ -71,6 +71,13 @@ import java.util.*;
|
|||
* It uses the priority list (if provided) to emit a single record instance at every position represented in the input RODs.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>By default, the input sets will be named variants, variants2, variants3, and so on. You can override this by
|
||||
* providing an explicit name tag for each input, using the syntax " -V:format,name". Each input tagged in this
|
||||
* way will be labeled as such in the output (i.e., set=name rather than set=variants2). For example, you could specify
|
||||
* a set of control samples as " -V:vcf,control my_control_samples.vcf", and the resulting VCF records would contain
|
||||
* the annotation "set=control" in the INFO field. It is strongly recommended to provide explicit names in this way
|
||||
* when a rod priority list is provided.</p>
|
||||
*
|
||||
* <p>CombineVariants will emit a record for every site that was present in any of your input VCF files, and will annotate
|
||||
* (in the set attribute in the INFO field) whether the record had a PASS or FILTER status in each input ROD . In effect,
|
||||
* CombineVariants always produces a union of the input VCFs. However, any part of the Venn of the merged VCFs
|
||||
|
|
@ -136,20 +143,8 @@ import java.util.*;
|
|||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=-50,stop=50))
|
||||
public class CombineVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
/**
|
||||
* The VCF files to merge together
|
||||
*
|
||||
* variants can take any number of arguments on the command line. Each -V argument
|
||||
* will be included in the final merged output VCF. If no explicit name is provided,
|
||||
* the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc.
|
||||
* The user can override this by providing an explicit name -V:name,vcf for each -V argument,
|
||||
* and each named argument will be labeled as such in the output (i.e., set=name rather than
|
||||
* set=variants2). The order of arguments does not matter unless except for the naming, so
|
||||
* if you provide an rod priority list and no explicit names than variants, variants2, etc
|
||||
* are technically order dependent. It is strongly recommended to provide explicit names when
|
||||
* a rod priority list is provided.
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="VCF files to merge together", required=true)
|
||||
public List<RodBindingCollection<VariantContext>> variantCollections;
|
||||
final private List<RodBinding<VariantContext>> variants = new ArrayList<>();
|
||||
|
||||
|
|
@ -167,48 +162,75 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
|
||||
/**
|
||||
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
|
||||
* Refers to the merging priority behavior described in the tool documentation regarding the choice of which record
|
||||
* gets emitted when taking the union of variants that contain genotypes. The list must be passed as a
|
||||
* comma-separated string listing the names of the variant input files. The list must be complete and include all
|
||||
* variant inputs that are being provided to the tool. Use name tags for best results.
|
||||
*/
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="A comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted", required=false)
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="Ordered list specifying priority for merging", required=false)
|
||||
public String PRIORITY_STRING = null;
|
||||
|
||||
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
|
||||
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Emit interesting sites requiring complex compatibility merging to file", required=false)
|
||||
public boolean printComplexMerges = false;
|
||||
|
||||
@Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF", required=false)
|
||||
/**
|
||||
* If enabled, this flag causes filtered variants (i.e. variant records where the FILTER field is populated by
|
||||
* something other than PASS or a dot) to be omitted from the output.
|
||||
*/
|
||||
@Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="Treat filtered variants as uncalled", required=false)
|
||||
public boolean filteredAreUncalled = false;
|
||||
|
||||
/**
|
||||
* Used to generate a sites-only file.
|
||||
* If this flag is enabled, the INFO, FORMAT and sample-level (genotype) fields will not be emitted to the output file.
|
||||
*/
|
||||
@Argument(fullName="minimalVCF", shortName="minimalVCF", doc="If true, then the output VCF will contain no INFO or genotype FORMAT fields", required=false)
|
||||
@Argument(fullName="minimalVCF", shortName="minimalVCF", doc="Emit a sites-only file", required=false)
|
||||
public boolean minimalVCF = false;
|
||||
|
||||
@Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the combining procedure", required=false)
|
||||
/**
|
||||
* Exclude sites that do not contain any called ALT alleles in the merged callset. The evaluation is made after the
|
||||
* merging procedure is complete.
|
||||
*/
|
||||
@Argument(fullName="excludeNonVariants", shortName="env", doc="Exclude sites where no variation is present after merging", required=false)
|
||||
public boolean EXCLUDE_NON_VARIANTS = false;
|
||||
|
||||
/**
|
||||
* Set to 'null' if you don't want the set field emitted.
|
||||
* Key used in the INFO key=value tag emitted describing which set(s) the combined record came from
|
||||
* (e.g. set=control). This provides the option to override the default naming, so instead of set=control you could
|
||||
* have it be origin=control, or any other word you want that is not already an INFO field attribute. Set this to
|
||||
* 'null' if you don't want the set attribute emitted at all.
|
||||
*/
|
||||
@Argument(fullName="setKey", shortName="setKey", doc="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from", required=false)
|
||||
@Argument(fullName="setKey", shortName="setKey", doc="Key name for the set attribute", required=false)
|
||||
public String SET_KEY = "set";
|
||||
|
||||
/**
|
||||
* This option allows the user to perform a simple merge (concatenation) to combine the VCFs, drastically reducing the runtime.
|
||||
* This option allows you to perform a simple merge (concatenation) to combine the VCFs, drastically reducing
|
||||
* runtime. Note that in many cases where you think you want to use this option, you may want to check out the
|
||||
* CatVariants tool instead, because CatVariants provides the same functionality, but does so even more efficiently.
|
||||
*/
|
||||
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls", required=false)
|
||||
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="Assume input VCFs have identical sample sets and disjoint calls", required=false)
|
||||
public boolean ASSUME_IDENTICAL_SAMPLES = false;
|
||||
|
||||
@Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if the variant is present in at least N input files.", required=false)
|
||||
/**
|
||||
* Sites that are present in fewer than this number of inputs will be ignored. This is a convenient way to build
|
||||
* a collection of common variants and exclude rare variants.
|
||||
*/
|
||||
@Argument(fullName="minimumN", shortName="minN", doc="Minimum number of input files the site must be observed in to be included", required=false)
|
||||
public int minimumN = 1;
|
||||
|
||||
/**
|
||||
* This option allows the suppression of the command line in the VCF header. This is most often usefully when combining variants for dozens or hundreds of smaller VCFs.
|
||||
* By default, this tool writes the command line that was used in the header of the output VCF file. This flag
|
||||
* enables you to override that behavior . This is most often useful when combining variants for dozens or
|
||||
* hundreds of smaller VCFs iteratively, to avoid cluttering the header with a lot of command lines.
|
||||
*/
|
||||
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false)
|
||||
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="Do not output the command line to the header", required=false)
|
||||
public boolean SUPPRESS_COMMAND_LINE_HEADER = false;
|
||||
|
||||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
|
||||
/**
|
||||
* By default, the INFO field of the merged variant record only contains the INFO field attributes for which all
|
||||
* original overlapping records had the same values. Discordant attributes are therefore discarded. This flag allows you to
|
||||
* override that behavior and simply copy over the INFO field contents of whichever record had the highest AC value.
|
||||
*/
|
||||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="Use the INFO content of the record with the highest AC", required=false)
|
||||
public boolean MERGE_INFO_WITH_MAX_AC = false;
|
||||
|
||||
private List<String> priority = null;
|
||||
|
|
@ -224,7 +246,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance");
|
||||
} else
|
||||
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
|
||||
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites-only output option");
|
||||
|
||||
validateAnnotateUnionArguments();
|
||||
|
||||
|
|
@ -233,7 +255,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if (genotypeMergeOption == null && !ASSUME_IDENTICAL_SAMPLES) {
|
||||
if (!sampleNamesAreUnique)
|
||||
throw new UserException("Duplicate sample names were discovered but no genotypemergeoption was supplied. " +
|
||||
"To combine samples without merging specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " +
|
||||
"To combine samples without merging, specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " +
|
||||
"without specified priority is unsupported, but can be achieved by specifying --genotypemergeoption UNSORTED.");
|
||||
else
|
||||
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
|
|
|
|||
Loading…
Reference in New Issue