From 2b70f14740cb83b94e1f2d8982f7737d7ceefb67 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Sat, 27 Feb 2016 13:11:17 -0500 Subject: [PATCH] Misc documentation improvements Added caveat to VariantFiltration documentation Fixed PON creation example in M2 doc Improved MalformedReadFilter doc Updated N CIGAR error message --- .../gatk/tools/walkers/cancer/m2/MuTect2.java | 2 +- .../engine/filters/MalformedReadFilter.java | 52 +++++++++++-------- .../walkers/filters/VariantFiltration.java | 24 ++++++--- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java index ec0038bab..3b4d31c9b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java @@ -141,7 +141,7 @@ import static java.lang.Math.pow; *
  *   java
  *     -jar GenomeAnalysisTK.jar
- *     -T HaplotypeCaller
+ *     -T MuTect2
  *     -R reference.fasta
  *     -I:tumor normal1.bam \
  *     [--dbsnp dbSNP.vcf] \
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java
index 4e267167e..eb524eba8 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java
@@ -32,17 +32,36 @@ import org.broadinstitute.gatk.engine.ReadProperties;
 import org.broadinstitute.gatk.utils.ValidationExclusion;
 import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
 import org.broadinstitute.gatk.utils.exceptions.UserException;
+import org.broadinstitute.gatk.utils.help.HelpConstants;
 
 /**
  * Filter out malformed reads
  *
  * 

This filter is applied automatically by all GATK tools in order to protect them from crashing on reads that are - * grossly malformed. There are a few issues (such as the absence of sequence bases) that will cause the run to fail with an - * error, but these cases can be preempted by setting flags that cause the problem reads to also be filtered.

+ * malformed. There are a few types of malformation (such as the absence of sequence bases) that are not filtered out + * by default and can cause errors, but these cases can be preempted by setting flags that cause the problem reads to + * also be filtered.

+ * + *

Criteria used by default

+ * + * + *

Optional criteria

+ * * *

Usage example

* - *

Set the malformed read filter to filter out reads that have no sequence bases

+ *

Set the malformed read filter to also filter out reads that have no stored sequence bases

*
  *     java -jar GenomeAnalysisTk.jar \
  *         -T ToolName \
@@ -200,26 +219,13 @@ public class MalformedReadFilter extends ReadFilter {
             if (! filterReadsWithNCigar && !allowNCigars) {
                 throw new UserException.UnsupportedCigarOperatorException(
                         CigarOperator.N,read,
-                        "Perhaps you are"
-                        + " trying to use RNA-Seq data?"
-                        + " While we are currently actively working to"
-                        + " support this data type unfortunately the"
-                        + " GATK cannot be used with this data in its"
-                        + " current form. You have the option of either"
-                        + " filtering out all reads with operator "
-                        + CigarOperator.N + " in their CIGAR string"
-                        + " (please add --"
-                        +  FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME
-                        + " to your command line) or"
-                        + " assume the risk of processing those reads as they"
-                        + " are including the pertinent unsafe flag (please add -U"
-                        + ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
-                        + " to your command line). Notice however that if you were"
-                        + " to choose the latter, an unspecified subset of the"
-                        + " analytical outputs of an unspecified subset of the tools"
-                        + " will become unpredictable. Consequently the GATK team"
-                        + " might well not be able to provide you with the usual support"
-                        + " with any issue regarding any output");
+                        "If you are working with RNA-Seq data, see " + HelpConstants.articlePost("3891") + " for guidance. "
+                        + "If you choose to disregard those instructions, or for other uses, you have the option of either "
+                        + "filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string" + " (add --"
+                        +  FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or overriding this check (add -U " 
+                        + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + " to your command line). Notice however that the latter "
+                        + "is unsupported, so if you use it and encounter any problems, the GATK support team not be able to help "
+                        + "you.");
             }
             return ! filterReadsWithNCigar;
         }
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
index 3c61235e3..c4b6ea09e 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
@@ -53,9 +53,13 @@ import java.util.*;
  * Filter variant calls based on INFO and FORMAT annotations
  *
  * 

- * This tool is designed for hard-filtering variant calls based on certain criteria. - * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. Filtered records - * will be preserved in the output unless their removal is requested in the command line.

+ * This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered + * by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved + * in the output unless their removal is requested in the command line.

+ * + *

The most common way of specifying filtering criteria is by using JEXL queries. See the + * article on JEXL expressions in the + * documentation Guide for detailed information and examples.

* *

Input

*

@@ -75,10 +79,18 @@ import java.util.*; * -o output.vcf \ * --variant input.vcf \ * --filterExpression "AB < 0.2 || MQ0 > 50" \ - * --filterName "Nov09filters" \ - * --mask mask.vcf \ - * --maskName InDel + * --filterName "SomeFilterName" *

+ * + *

Caveat

+ *

when you run VariantFiltration with a command that includes multiple logical parts, each part of the command is applied + * individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies + * some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a + * genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might + * think that such a command would allow you to filter sites based on sample-level annotations in one go. However, that would only + * work if the parts of the command were applied internally in series (like a pipeline) but that's not the case; they are applied + * in parallel to the same original record. So unfortunately, to achieve the desired result, these filters should be applied as + * separate commands.

* */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )