From 4d20c71e09d62afb9b8fed3b7273a33cd68156e3 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 6 Aug 2013 13:03:33 -0400 Subject: [PATCH 1/2] Improvements to various gatkdocs - Make -rod required - Document that contaminationFile is currently not functional with HC - Document liftover process more clearly - Document VariantEval combinations of ST and VE that are incompatible - Added a caveat about using MVLR from HC and UG. - Added caveat about not using -mte with -nt - Clarified masking options - Fixed docs based on Erics comments --- .../arguments/StandardCallerArgumentCollection.java | 2 +- .../gatk/walkers/annotator/MVLikelihoodRatio.java | 11 +++++++++-- .../sting/gatk/arguments/GATKArgumentCollection.java | 4 ++-- .../sting/gatk/walkers/filters/VariantFiltration.java | 11 +++++++++++ .../sting/gatk/walkers/qc/CountRODs.java | 2 +- .../sting/gatk/walkers/varianteval/VariantEval.java | 6 ++++++ .../walkers/variantutils/FilterLiftedVariants.java | 7 +++++++ .../gatk/walkers/variantutils/LiftoverVariants.java | 11 ++++++++--- 8 files changed, 45 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 37606201c..c331451d5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -173,7 +173,7 @@ public class StandardCallerArgumentCollection { /** * This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples. - * Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION + * Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION. **/ @Advanced @Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"\" (Contamination is double) per line; No header.", required = false) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index ad974a083..4e6e87797 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -70,12 +70,19 @@ import java.util.*; *

Given a variant context, this tool uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * versus the likelihood of the site transmitting according to mendelian rules.

* + *

Caveats

+ * + *

This tool assumes that the organism is diploid.

+ * *

Note that this annotation requires a valid ped file.

* - *

Caveat

- *

This tool assumes that the organism is diploid. When multiple trios are present, the annotation is simply the maximum + *

When multiple trios are present, the annotation is simply the maximum * of the likelihood ratios, rather than the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain * sites and many trios.

+ * + *

This annotation can only be used from the Variant Annotator. + * If you attempt to use it from the UnifiedGenotyper, the run will fail with an error message to that effect. + * If you attempt to use it from the HaplotypeCaller, the run will complete successfully but the annotation will not be added to any variants.

*/ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 509b875bb..174e434fe 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -332,9 +332,9 @@ public class GATKArgumentCollection { public int numberOfIOThreads = 0; /** - * Enable GATK to monitor its own threading efficiency, at a itsy-bitsy tiny + * Enable GATK to monitor its own threading efficiency, at an itsy-bitsy tiny * cost (< 0.1%) in runtime because of turning on the JavaBean. This is largely for - * debugging purposes. + * debugging purposes. Note that this argument is not compatible with -nt, it only works with -nct. */ @Argument(fullName = "monitorThreadEfficiency", shortName = "mte", doc = "Enable GATK threading efficiency monitoring", required = false) public Boolean monitorThreadEfficiency = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 83d4d81d0..1d07215ad 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -89,6 +89,8 @@ public class VariantFiltration extends RodWalker { /** * Any variant which overlaps entries from the provided mask rod will be filtered. If the user wants logic to be reversed, * i.e. filter variants that do not overlap with provided mask, then argument -filterNotInMask can be used. + * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used + * (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). */ @Input(fullName="mask", shortName="mask", doc="Input ROD mask", required=false) public RodBinding mask; @@ -138,6 +140,13 @@ public class VariantFiltration extends RodWalker { @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered", required=false) protected Integer MASK_EXTEND = 0; + + /** + * When using the -mask argument, the maskName will be annotated in the variant record. + * Note that when using the -filterNotInMask argument to reverse the masking logic, + * it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used + * (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). + */ @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) protected String MASK_NAME = "Mask"; @@ -145,6 +154,8 @@ public class VariantFiltration extends RodWalker { * By default, if the -mask argument is used, any variant falling in a mask will be filtered. * If this argument is used, logic is reversed, and variants falling outside a given mask will be filtered. * Use case is, for example, if we have an interval list or BED file with "good" sites. + * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used + * (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). */ @Argument(fullName="filterNotInMask", shortName="filterNotInMask", doc="Filter records NOT in given input mask.", required=false) protected boolean filterRecordsNotInMask = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index 65f82efe4..7c2d19d30 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -92,7 +92,7 @@ public class CountRODs extends RodWalker> rods = Collections.emptyList(); @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index 06fa455be..3aae056d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -114,6 +114,12 @@ import java.util.*; * [--comp comp.vcf] * * + *

Caveat

+ * + *

Some stratifications and evaluators are incompatible with each other due to their respective memory requirements, such as AlleleCount and VariantSummary, or Sample and VariantSummary. + * If you specify such a combination, the program will output an error message and ask you to disable one of these options. + * We do not currently provide an exhaustive list of incompatible combinations, so we recommend trying out combinations that you are interested in on a dummy command line, to rapidly ascertain whether it will work or not.

+ * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50, stop=50)) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index e61cda765..d26ab08f7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -46,6 +46,13 @@ import java.util.*; /** * Filters a lifted-over VCF file for ref bases that have been changed. + * + * "Lifting over" variants means adjusting variant calls from one reference to another. Specifically, the process adjusts the position of the call to match the corresponding position on the target reference. + * For example, if you have variants called from reads aligned to the hg19 reference, and you want to compare them to calls made based on the b37 reference, you need to liftover one of the callsets to the other reference. + * + * FilteredLiftedVariants is intended to be the second of two processing steps for the liftover process. The first step is to run LiftoverVariants on your VCF file. + * The second step is to run FilterLiftedVariants on the output of LiftoverVariants. This will produce valid well-behaved VCF files, where you'll see that the contig names in the header have all been correctly replaced. + * */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=0,stop=100)) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 0e38869c6..379b1c2a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -56,9 +56,14 @@ import java.util.*; /** * Lifts a VCF file over from one build to another. * - * Important note: the resulting VCF is not guaranteed to be valid according to the official specification. The file could - * possibly be mis-sorted and the header may not be complete. LiftoverVariants is intended to be the first of two processing steps - * for the liftover; the second step, FilterLiftedVariants, will produce a valid well-behaved VCF file. + * "Lifting over" variants means adjusting variant calls from one reference to another. Specifically, the process adjusts the position of the call to match the corresponding position on the target reference. + * For example, if you have variants called from reads aligned to the hg19 reference, and you want to compare them to calls made based on the b37 reference, you need to liftover one of the callsets to the other reference. + * + * LiftoverVariants is intended to be the first of two processing steps for the liftover process. + * The second step is to run FilterLiftedVariants on the output of LiftoverVariants. This will produce valid well-behaved VCF files, where you'll see that the contig names in the header have all been correctly replaced. + * + * To be clear, the VCF resulting from the LiftoverVariants run is not guaranteed to be valid according to the official specification. The file could + * possibly be mis-sorted and the header may not be complete. That is why you need to run FilterLiftedVariants on it. */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class LiftoverVariants extends RodWalker { From a09831489b150424ba78f973028ffbe1e7d20fea Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 6 Aug 2013 16:45:11 -0400 Subject: [PATCH 2/2] Disabled emission of doc URLs for external codecs to avoid broken links --- .../gatk/refdata/tracks/FeatureManager.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index 68e751521..60b6f4683 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -225,10 +225,20 @@ public class FeatureManager { docs.append(String.format(format, nameHeader, featureHeader, docHeader)); for ( final FeatureDescriptor descriptor : featureDescriptors ) { if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) { - String oneDoc = String.format(format, - descriptor.getName(), - descriptor.getSimpleFeatureName(), - GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass())); + final String DocURL = GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()); + final String oneDoc; + if ( DocURL.contains("_sting_") ) { + oneDoc = String.format(format, + descriptor.getName(), + descriptor.getSimpleFeatureName(), + DocURL); + } else { + oneDoc = String.format(format, + descriptor.getName(), + descriptor.getSimpleFeatureName(), + "(this is an external codec and is not documented within GATK)"); + } + docs.append(oneDoc); } }