Merge pull request #1299 from broadinstitute/gvda_doc_updates
Documentation updates
This commit is contained in:
commit
7d2c56f681
|
|
@ -141,7 +141,7 @@ import static java.lang.Math.pow;
|
|||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T HaplotypeCaller
|
||||
* -T MuTect2
|
||||
* -R reference.fasta
|
||||
* -I:tumor normal1.bam \
|
||||
* [--dbsnp dbSNP.vcf] \
|
||||
|
|
|
|||
|
|
@ -32,17 +32,36 @@ import org.broadinstitute.gatk.engine.ReadProperties;
|
|||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
|
||||
/**
|
||||
* Filter out malformed reads
|
||||
*
|
||||
* <p>This filter is applied automatically by all GATK tools in order to protect them from crashing on reads that are
|
||||
* grossly malformed. There are a few issues (such as the absence of sequence bases) that will cause the run to fail with an
|
||||
* error, but these cases can be preempted by setting flags that cause the problem reads to also be filtered.</p>
|
||||
* malformed. There are a few types of malformation (such as the absence of sequence bases) that are not filtered out
|
||||
* by default and can cause errors, but these cases can be preempted by setting flags that cause the problem reads to
|
||||
* also be filtered.</p>
|
||||
*
|
||||
* <h4>Criteria used by default</h4>
|
||||
* <ul>
|
||||
* <li><b>Invalid Alignment Start:</b> Read alignment start is inconsistent with the read unmapped flag; either read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START, or read is not flagged as 'unmapped', but alignment start is -1.</li>
|
||||
* <li><b>Invalid Alignment End:</b> Read aligns to negative number of bases in the reference.</li>
|
||||
* <li><b>Alignment Disagrees With Header:</b> Read is aligned to nonexistent contig or read is aligned to a point after the end of the contig.</li>
|
||||
* <li><b>Missing or Undefined Read Group:</b> Either the RG tag is missing, it is not defined in the header, or required elements such as RGID are missing.</li>
|
||||
* <li><b>Cigar Disagrees With Alignment:</b> Read has a valid alignment start, but the CIGAR string is empty.</li>
|
||||
* <li><b>CIGAR Is Not Supported:</b> Read CIGAR contains operators that are not supported (N which is treated separately).</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h4>Optional criteria</h4>
|
||||
* <ul>
|
||||
* <li><b>Mismatching Bases And Quals:</b> Read does not have the same number of bases and base qualities.</li>
|
||||
* <li><b>Bases Not Stored:</b> Read with no stored bases, has '*' instead in the SEQ field.</li>
|
||||
* <li><b>CIGAR With N Operator:</b> Read CIGAR contains N operator (typical of RNA_seq data).</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Usage example</h3>
|
||||
*
|
||||
* <h4>Set the malformed read filter to filter out reads that have no sequence bases</h4>
|
||||
* <h4>Set the malformed read filter to also filter out reads that have no stored sequence bases</h4>
|
||||
* <pre>
|
||||
* java -jar GenomeAnalysisTk.jar \
|
||||
* -T ToolName \
|
||||
|
|
@ -200,26 +219,13 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
if (! filterReadsWithNCigar && !allowNCigars) {
|
||||
throw new UserException.UnsupportedCigarOperatorException(
|
||||
CigarOperator.N,read,
|
||||
"Perhaps you are"
|
||||
+ " trying to use RNA-Seq data?"
|
||||
+ " While we are currently actively working to"
|
||||
+ " support this data type unfortunately the"
|
||||
+ " GATK cannot be used with this data in its"
|
||||
+ " current form. You have the option of either"
|
||||
+ " filtering out all reads with operator "
|
||||
+ CigarOperator.N + " in their CIGAR string"
|
||||
+ " (please add --"
|
||||
+ FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME
|
||||
+ " to your command line) or"
|
||||
+ " assume the risk of processing those reads as they"
|
||||
+ " are including the pertinent unsafe flag (please add -U"
|
||||
+ ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
|
||||
+ " to your command line). Notice however that if you were"
|
||||
+ " to choose the latter, an unspecified subset of the"
|
||||
+ " analytical outputs of an unspecified subset of the tools"
|
||||
+ " will become unpredictable. Consequently the GATK team"
|
||||
+ " might well not be able to provide you with the usual support"
|
||||
+ " with any issue regarding any output");
|
||||
"If you are working with RNA-Seq data, see " + HelpConstants.articlePost("3891") + " for guidance. "
|
||||
+ "If you choose to disregard those instructions, or for other uses, you have the option of either "
|
||||
+ "filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string" + " (add --"
|
||||
+ FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or overriding this check (add -U "
|
||||
+ ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + " to your command line). Notice however that the latter "
|
||||
+ "is unsupported, so if you use it and encounter any problems, the GATK support team not be able to help "
|
||||
+ "you.");
|
||||
}
|
||||
return ! filterReadsWithNCigar;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,9 +53,13 @@ import java.util.*;
|
|||
* Filter variant calls based on INFO and FORMAT annotations
|
||||
*
|
||||
* <p>
|
||||
* This tool is designed for hard-filtering variant calls based on certain criteria.
|
||||
* Records are hard-filtered by changing the value in the FILTER field to something other than PASS. Filtered records
|
||||
* will be preserved in the output unless their removal is requested in the command line. </p>
|
||||
* This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered
|
||||
* by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved
|
||||
* in the output unless their removal is requested in the command line. </p>
|
||||
*
|
||||
* <p>The most common way of specifying filtering criteria is by using JEXL queries. See the
|
||||
* <a href='https://www.broadinstitute.org/gatk/guide/article?id=1255'> article on JEXL expressions</a> in the
|
||||
* documentation Guide for detailed information and examples.</p>
|
||||
*
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
|
|
@ -75,10 +79,18 @@ import java.util.*;
|
|||
* -o output.vcf \
|
||||
* --variant input.vcf \
|
||||
* --filterExpression "AB < 0.2 || MQ0 > 50" \
|
||||
* --filterName "Nov09filters" \
|
||||
* --mask mask.vcf \
|
||||
* --maskName InDel
|
||||
* --filterName "SomeFilterName"
|
||||
* </pre>
|
||||
*
|
||||
* <h3>Caveat</h3>
|
||||
* <p>when you run VariantFiltration with a command that includes multiple logical parts, each part of the command is applied
|
||||
* individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies
|
||||
* some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a
|
||||
* genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might
|
||||
* think that such a command would allow you to filter sites based on sample-level annotations in one go. However, that would only
|
||||
* work if the parts of the command were applied internally in series (like a pipeline) but that's not the case; they are applied
|
||||
* in parallel to the same original record. So unfortunately, to achieve the desired result, these filters should be applied as
|
||||
* separate commands.</p>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
|
|
|
|||
Loading…
Reference in New Issue