Merge pull request #1299 from broadinstitute/gvda_doc_updates

Documentation updates
2016-03-04 16:06:12 -08:00 · 2016-03-04 16:06:12 -08:00 · 7d2c56f681
parent 8c15d3ccd8 2b70f14740
commit 7d2c56f681
3 changed files with 48 additions and 30 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/cancer/m2/MuTect2.java
@ -141,7 +141,7 @@ import static java.lang.Math.pow;
 * <pre>
 *   java
 *     -jar GenomeAnalysisTK.jar
- *     -T HaplotypeCaller
+ *     -T MuTect2
 *     -R reference.fasta
 *     -I:tumor normal1.bam \
 *     [--dbsnp dbSNP.vcf] \
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/filters/MalformedReadFilter.java
@ -32,17 +32,36 @@ import org.broadinstitute.gatk.engine.ReadProperties;
 import org.broadinstitute.gatk.utils.ValidationExclusion;
 import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
 import org.broadinstitute.gatk.utils.exceptions.UserException;
+import org.broadinstitute.gatk.utils.help.HelpConstants;

 /**
 * Filter out malformed reads
 *
 * <p>This filter is applied automatically by all GATK tools in order to protect them from crashing on reads that are
- * grossly malformed. There are a few issues (such as the absence of sequence bases) that will cause the run to fail with an
- * error, but these cases can be preempted by setting flags that cause the problem reads to also be filtered.</p>
+ * malformed. There are a few types of malformation (such as the absence of sequence bases) that are not filtered out 
+ * by default and can cause errors, but these cases can be preempted by setting flags that cause the problem reads to 
+ * also be filtered.</p>
+ * 
+ * <h4>Criteria used by default</h4>
+ * <ul>
+ *      <li><b>Invalid Alignment Start:</b> Read alignment start is inconsistent with the read unmapped flag; either read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START, or read is not flagged as 'unmapped', but alignment start is -1.</li>
+ *      <li><b>Invalid Alignment End:</b> Read aligns to negative number of bases in the reference.</li>
+ *      <li><b>Alignment Disagrees With Header:</b> Read is aligned to nonexistent contig or read is aligned to a point after the end of the contig.</li>
+ *      <li><b>Missing or Undefined Read Group:</b> Either the RG tag is missing, it is not defined in the header, or required elements such as RGID are missing.</li>
+ *      <li><b>Cigar Disagrees With Alignment:</b> Read has a valid alignment start, but the CIGAR string is empty.</li>
+ *      <li><b>CIGAR Is Not Supported:</b> Read CIGAR contains operators that are not supported (N which is treated separately).</li>
+ * </ul>
+ * 
+ * <h4>Optional criteria</h4>
+ * <ul>
+ *      <li><b>Mismatching Bases And Quals:</b> Read does not have the same number of bases and base qualities.</li>
+ *      <li><b>Bases Not Stored:</b> Read with no stored bases, has '*' instead in the SEQ field.</li>
+ *      <li><b>CIGAR With N Operator:</b> Read CIGAR contains N operator (typical of RNA_seq data).</li>
+ * </ul>
 *
 * <h3>Usage example</h3>
 *
- * <h4>Set the malformed read filter to filter out reads that have no sequence bases</h4>
+ * <h4>Set the malformed read filter to also filter out reads that have no stored sequence bases</h4>
 * <pre>
 *     java -jar GenomeAnalysisTk.jar \
 *         -T ToolName \
@ -200,26 +219,13 @@ public class MalformedReadFilter extends ReadFilter {
            if (! filterReadsWithNCigar && !allowNCigars) {
                throw new UserException.UnsupportedCigarOperatorException(
                        CigarOperator.N,read,
-                        "Perhaps you are"
-                        + " trying to use RNA-Seq data?"
-                        + " While we are currently actively working to"
-                        + " support this data type unfortunately the"
-                        + " GATK cannot be used with this data in its"
-                        + " current form. You have the option of either"
-                        + " filtering out all reads with operator "
-                        + CigarOperator.N + " in their CIGAR string"
-                        + " (please add --"
-                        +  FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME
-                        + " to your command line) or"
-                        + " assume the risk of processing those reads as they"
-                        + " are including the pertinent unsafe flag (please add -U"
-                        + ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
-                        + " to your command line). Notice however that if you were"
-                        + " to choose the latter, an unspecified subset of the"
-                        + " analytical outputs of an unspecified subset of the tools"
-                        + " will become unpredictable. Consequently the GATK team"
-                        + " might well not be able to provide you with the usual support"
-                        + " with any issue regarding any output");
+                        "If you are working with RNA-Seq data, see " + HelpConstants.articlePost("3891") + " for guidance. "
+                        + "If you choose to disregard those instructions, or for other uses, you have the option of either "
+                        + "filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string" + " (add --"
+                        +  FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or overriding this check (add -U " 
+                        + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + " to your command line). Notice however that the latter "
+                        + "is unsupported, so if you use it and encounter any problems, the GATK support team not be able to help "
+                        + "you.");
            }
            return ! filterReadsWithNCigar;
        }
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java
@ -53,9 +53,13 @@ import java.util.*;
 * Filter variant calls based on INFO and FORMAT annotations
 *
 * <p>
- * This tool is designed for hard-filtering variant calls based on certain criteria.
- * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. Filtered records
- * will be preserved in the output unless their removal is requested in the command line. </p>
+ * This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered 
+ * by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved 
+ * in the output unless their removal is requested in the command line. </p>
+ * 
+ * <p>The most common way of specifying filtering criteria is by using JEXL queries. See the 
+ * <a href='https://www.broadinstitute.org/gatk/guide/article?id=1255'> article on JEXL expressions</a> in the 
+ * documentation Guide for detailed information and examples.</p>
 *
 * <h3>Input</h3>
 * <p>
@ -75,10 +79,18 @@ import java.util.*;
 *   -o output.vcf \
 *   --variant input.vcf \
 *   --filterExpression "AB < 0.2 || MQ0 > 50" \
- *   --filterName "Nov09filters" \
- *   --mask mask.vcf \
- *   --maskName InDel
+ *   --filterName "SomeFilterName" 
 * </pre>
+ * 
+ * <h3>Caveat</h3>
+ * <p>when you run VariantFiltration with a command that includes multiple logical parts, each part of the command is applied 
+ * individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies 
+ * some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a 
+ * genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might 
+ * think that such a command would allow you to filter sites based on sample-level annotations in one go. However, that would only 
+ * work if the parts of the command were applied internally in series (like a pipeline) but that's not the case; they are applied 
+ * in parallel to the same original record. So unfortunately, to achieve the desired result, these filters should be applied as 
+ * separate commands.</p>
 *
 */
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )