Merge pull request #894 from broadinstitute/gg_ami_docs_license

Edited ASEReadCounter documentation
2015-03-28 13:15:24 -04:00 · 2015-03-28 13:15:24 -04:00 · 87b3dddb39
parent 1f81c033f5 9b812308b1
commit 87b3dddb39
1 changed files with 55 additions and 35 deletions
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
@ -44,25 +44,38 @@ import java.io.PrintStream;
 import java.util.List;

 /**
- * Calculated allele counts in specific loci, to allow allele specific expression analysis.
+ * Calculate allele counts for allele-specific expression analysis
 *
 * <p>
- * Calculated allele counts in specific loci after filtering based on mapping quality, base quality, depth of coverage, overlapping paired reads and deletion on the position.
- * All thresholds and option can be set as command line arguments.
- * The output of this tool is a tab delimited txt file, compatible with Mamba, a downstream tool (http://www.well.ox.ac.uk/~rivas/mamba/).
- * A user can use -drf DuplicateRead to allow the tool to count also duplicated reads  (a useful option for RNAseq data tools)
+ * This tool calculates allele counts at a set of given loci after applying filters that are tuned for enabling
+ * allele-specific expression (ASE) analysis. The filters operate on mapping quality, base quality, depth of coverage,
+ * overlapping paired reads and deletions overlapping the position. All thresholds and options are controlled by
+ * command-line arguments.
 * </p>
 *
+ * <h4>Notes</h4>
+ * <ul>
+ *     <li>Like most GATK tools, this tools filters out duplicate reads by default. However, some ASE methods
+ *     recommend including duplicate reads in the analysis, so the DuplicateReads filter can be disabled using the
+ *     `-drf DuplicateReads` flag in the command-line.</li>
+ * </ul>
+ * <h4>Caveats</h4>
+ * <ul>
+ *     <li>This tool will only process biallelic sites. If your callset contains multiallelic sites, they will be ignored.
+ *     Optionally, you can subset your callset to just biallelic variants using e.g.
+ *     <a href="org_broadinstitute_gatk_tools_walkers_variantutils_SelectVariants.php">SelectVariants</a>
+ *     with the option `-restrictAllelesTo BIALLELIC`.</li>
+ * </ul>
 * <h3>Input</h3>
- * <p>
- * BAM files (with proper headers) to be analyzed for ASE * </p>
- * <p>
- * a VCF file with specific sites to process
- * </p>
+ * <ul>
+ *     <li>BAM files (with proper headers) to be analyzed for ASE</li>
+ *     <li>A VCF file with specific sites to process.</li>
 * </p></p>
 * <h3>Output</h3>
 * <p>
- * Table (tab delimited file) with the specific allele counts
+ * A table of allele counts at the given sites. By default, it is formatted as a tab-delimited text file
+ * that is readable by R and compatible with <a href="http://www.well.ox.ac.uk/~rivas/mamba/">Mamba</a>,
+ * a downstream tool developed for allele-specific expression analysis.
 * </p>
 *
 * <h3>Examples</h3>
@ -72,11 +85,11 @@ import java.util.List;
 *   -T ASEReadCounter \
 *   -o file_name \
 *   -I input.bam \
- *   -sites sites.vcf
- *   -U ALLOW_N_CIGAR_READS
+ *   -sites sites.vcf \
+ *   -U ALLOW_N_CIGAR_READS \
 *   [-minDepth 10] \
 *   [--minMappingQuality 10] \
- *   [--minBaseQuality 2]
+ *   [--minBaseQuality 2] \
 *   [-drf DuplicateRead]
 * </pre>
 */
@ -91,38 +104,40 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
    public RodBinding<VariantContext> sites;

    /**
-     * Loci with total depth lower then this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+     * If this argument is enabled, loci with total depth lower than this threshold after all filters have been applied
+     * will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
     */
-    @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of filtered-pass bases that we need to see for reporting this site", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+    @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
    public int minDepthOfNonFilteredBases = -1;

    /**
-     * Reads with mapping quality values lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+     * If this argument is enabled, reads with mapping quality values lower than this threshold will not be counted.
+     * This is set to -1 by default to disable the evaluation and ignore this threshold.
     */
-    @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+    @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
    public int minMappingQuality = 0;

    /**
-     * Bases with quality scores lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+     * If this argument is enabled, bases with quality scores lower than this threshold will not be counted.
+     * This is set to -1 by default to disable the evaluation and ignore this threshold.
     */
-    @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth", required = false, minValue = 0, maxValue = Byte.MAX_VALUE)
+    @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = 0, maxValue = Byte.MAX_VALUE)
    public byte minBaseQuality = 0;

    /**
-     * There are few option to deal with an overlapping read pair.
-     * COUNT_READS -  Count all reads independently (even if from the same fragment).
-     * COUNT_FRAGMENTS - Count all fragments (even if the reads that compose the fragment are not consistent at that base).
-     * COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE - Count all fragments (but only if the reads that compose the fragment are consistent at that base).
-     * defaults to COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE
+     * These options modify how the tool deals with overlapping read pairs.
+     * COUNT_READS -  Count all reads independently, even if they are from the same fragment.
+     * COUNT_FRAGMENTS - Count all fragments, even if the reads that compose the fragment are not consistent at that base.
+     * COUNT_FRAGMENTS_REQUIRE_SAME_BASE - Count all fragments, but only if the reads that compose the fragment are consistent at that base (default).
     */
-    @Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "How should overlapping reads from the same fragment be handled. Current options: COUNT_READS, COUNT_FRAGMENTS and COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE (default)", required = false)
+    @Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "Handling of overlapping reads from the same fragment", required = false)
    public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE;

    /**
-     * Output file format (e.g. csv, table, rtable); defaults to r-readable table.
+     * Available options are csv, table, rtable. By default, the format is an r-readable table.
     */
-    @Argument(fullName = "outputFormat", doc = "The format of the output file, can be csv, table, rtable", required = false)
-    public String outputFormat = "rtable";
+    @Argument(fullName = "outputFormat", doc = "Format of the output file, can be CSV, TABLE, RTABLE", required = false)
+    public OUTPUT_FORMAT outputFormat = OUTPUT_FORMAT.RTABLE;

    /**
     * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output.
@ -135,9 +150,14 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
    @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
    public boolean ignoreDeletionSites = false;

-    final String[] OUTPUT_FORMATS = {"table","rtable","csv"};
    public String separator = "\t";

+    public enum OUTPUT_FORMAT{
+        TABLE,
+        RTABLE,
+        CSV
+    }
+
    ////////////////////////////////////////////////////////////////////////////////////
    // STANDARD WALKER METHODS
    ////////////////////////////////////////////////////////////////////////////////////
@ -148,15 +168,15 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {

        // Check the output format
        boolean goodOutputFormat = false;
-        for ( final String f : OUTPUT_FORMATS ) {
+        for ( final OUTPUT_FORMAT f : OUTPUT_FORMAT.values()) {
            goodOutputFormat = goodOutputFormat || f.equals(outputFormat);
        }

        if ( ! goodOutputFormat ) {
-            throw new IllegalArgumentException("Improper output format. Can be one of table,rtable,csv. Was "+outputFormat);
+            throw new IllegalArgumentException("Improper output format. Can be one of TABLE, RTABLE, CSV. Was "+outputFormat);
        }

-        if ( outputFormat.equals("csv") ) {
+        if ( outputFormat.equals(OUTPUT_FORMAT.CSV) ) {
            separator = ",";
        }
        final String header = "contig"+separator+"position"+separator+"variantID"+separator+"refAllele"+separator+"altAllele"+separator+"refCount"+separator+"altCount"+separator+"totalCount"+separator+"lowMAPQDepth"+separator+"lowBaseQDepth"+separator+"rawDepth"+separator+"otherBases"+separator+"improperPairs";
@ -176,13 +196,13 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {

        final List<VariantContext> VCs =  tracker.getValues(sites, context.getLocation());
        if(VCs != null && VCs.size() > 1)
-            throw new UserException("more then one variant context in position: "+contig+":"+position);
+            throw new UserException("More then one variant context at position: "+contig+":"+position);
        if(VCs == null || VCs.size() == 0)
            return null;

        final VariantContext vc = VCs.get(0);
        if(!vc.isBiallelic()) {
-            logger.warn("ignore site: cannot run ASE on non-biallelic sites: " + vc.toString());
+            logger.warn("Ignoring site: cannot run ASE on non-biallelic sites: " + vc.toString());
            return null;
        }