diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java index ec0038bab..3b4d31c9b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java @@ -141,7 +141,7 @@ import static java.lang.Math.pow; *
* java * -jar GenomeAnalysisTK.jar - * -T HaplotypeCaller + * -T MuTect2 * -R reference.fasta * -I:tumor normal1.bam \ * [--dbsnp dbSNP.vcf] \ diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java index 4e267167e..eb524eba8 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java @@ -32,17 +32,36 @@ import org.broadinstitute.gatk.engine.ReadProperties; import org.broadinstitute.gatk.utils.ValidationExclusion; import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource; import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.utils.help.HelpConstants; /** * Filter out malformed reads * *This filter is applied automatically by all GATK tools in order to protect them from crashing on reads that are - * grossly malformed. There are a few issues (such as the absence of sequence bases) that will cause the run to fail with an - * error, but these cases can be preempted by setting flags that cause the problem reads to also be filtered.
+ * malformed. There are a few types of malformation (such as the absence of sequence bases) that are not filtered out + * by default and can cause errors, but these cases can be preempted by setting flags that cause the problem reads to + * also be filtered. + * + *Criteria used by default
+ *
* java -jar GenomeAnalysisTk.jar \
* -T ToolName \
@@ -200,26 +219,13 @@ public class MalformedReadFilter extends ReadFilter {
if (! filterReadsWithNCigar && !allowNCigars) {
throw new UserException.UnsupportedCigarOperatorException(
CigarOperator.N,read,
- "Perhaps you are"
- + " trying to use RNA-Seq data?"
- + " While we are currently actively working to"
- + " support this data type unfortunately the"
- + " GATK cannot be used with this data in its"
- + " current form. You have the option of either"
- + " filtering out all reads with operator "
- + CigarOperator.N + " in their CIGAR string"
- + " (please add --"
- + FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME
- + " to your command line) or"
- + " assume the risk of processing those reads as they"
- + " are including the pertinent unsafe flag (please add -U"
- + ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
- + " to your command line). Notice however that if you were"
- + " to choose the latter, an unspecified subset of the"
- + " analytical outputs of an unspecified subset of the tools"
- + " will become unpredictable. Consequently the GATK team"
- + " might well not be able to provide you with the usual support"
- + " with any issue regarding any output");
+ "If you are working with RNA-Seq data, see " + HelpConstants.articlePost("3891") + " for guidance. "
+ + "If you choose to disregard those instructions, or for other uses, you have the option of either "
+ + "filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string" + " (add --"
+ + FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or overriding this check (add -U "
+ + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + " to your command line). Notice however that the latter "
+ + "is unsupported, so if you use it and encounter any problems, the GATK support team not be able to help "
+ + "you.");
}
return ! filterReadsWithNCigar;
}
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
index 3c61235e3..c4b6ea09e 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
@@ -53,9 +53,13 @@ import java.util.*;
* Filter variant calls based on INFO and FORMAT annotations
*
*
- * This tool is designed for hard-filtering variant calls based on certain criteria.
- * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. Filtered records
- * will be preserved in the output unless their removal is requested in the command line.
+ * This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered
+ * by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved
+ * in the output unless their removal is requested in the command line.
+ *
+ * The most common way of specifying filtering criteria is by using JEXL queries. See the
+ * article on JEXL expressions in the
+ * documentation Guide for detailed information and examples.
*
* Input
*
@@ -75,10 +79,18 @@ import java.util.*;
* -o output.vcf \
* --variant input.vcf \
* --filterExpression "AB < 0.2 || MQ0 > 50" \
- * --filterName "Nov09filters" \
- * --mask mask.vcf \
- * --maskName InDel
+ * --filterName "SomeFilterName"
*
+ *
+ * when you run VariantFiltration with a command that includes multiple logical parts, each part of the command is applied + * individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies + * some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a + * genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might + * think that such a command would allow you to filter sites based on sample-level annotations in one go. However, that would only + * work if the parts of the command were applied internally in series (like a pipeline) but that's not the case; they are applied + * in parallel to the same original record. So unfortunately, to achieve the desired result, these filters should be applied as + * separate commands.
* */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )