Merge pull request #894 from broadinstitute/gg_ami_docs_license

Edited ASEReadCounter documentation
This commit is contained in:
Geraldine Van der Auwera 2015-03-28 13:15:24 -04:00
commit 87b3dddb39
1 changed files with 55 additions and 35 deletions

View File

@ -44,25 +44,38 @@ import java.io.PrintStream;
import java.util.List; import java.util.List;
/** /**
* Calculated allele counts in specific loci, to allow allele specific expression analysis. * Calculate allele counts for allele-specific expression analysis
* *
* <p> * <p>
* Calculated allele counts in specific loci after filtering based on mapping quality, base quality, depth of coverage, overlapping paired reads and deletion on the position. * This tool calculates allele counts at a set of given loci after applying filters that are tuned for enabling
* All thresholds and option can be set as command line arguments. * allele-specific expression (ASE) analysis. The filters operate on mapping quality, base quality, depth of coverage,
* The output of this tool is a tab delimited txt file, compatible with Mamba, a downstream tool (http://www.well.ox.ac.uk/~rivas/mamba/). * overlapping paired reads and deletions overlapping the position. All thresholds and options are controlled by
* A user can use -drf DuplicateRead to allow the tool to count also duplicated reads (a useful option for RNAseq data tools) * command-line arguments.
* </p> * </p>
* *
* <h4>Notes</h4>
* <ul>
* <li>Like most GATK tools, this tools filters out duplicate reads by default. However, some ASE methods
* recommend including duplicate reads in the analysis, so the DuplicateReads filter can be disabled using the
* `-drf DuplicateReads` flag in the command-line.</li>
* </ul>
* <h4>Caveats</h4>
* <ul>
* <li>This tool will only process biallelic sites. If your callset contains multiallelic sites, they will be ignored.
* Optionally, you can subset your callset to just biallelic variants using e.g.
* <a href="org_broadinstitute_gatk_tools_walkers_variantutils_SelectVariants.php">SelectVariants</a>
* with the option `-restrictAllelesTo BIALLELIC`.</li>
* </ul>
* <h3>Input</h3> * <h3>Input</h3>
* <p> * <ul>
* BAM files (with proper headers) to be analyzed for ASE * </p> * <li>BAM files (with proper headers) to be analyzed for ASE</li>
* <p> * <li>A VCF file with specific sites to process.</li>
* a VCF file with specific sites to process
* </p>
* </p></p> * </p></p>
* <h3>Output</h3> * <h3>Output</h3>
* <p> * <p>
* Table (tab delimited file) with the specific allele counts * A table of allele counts at the given sites. By default, it is formatted as a tab-delimited text file
* that is readable by R and compatible with <a href="http://www.well.ox.ac.uk/~rivas/mamba/">Mamba</a>,
* a downstream tool developed for allele-specific expression analysis.
* </p> * </p>
* *
* <h3>Examples</h3> * <h3>Examples</h3>
@ -72,11 +85,11 @@ import java.util.List;
* -T ASEReadCounter \ * -T ASEReadCounter \
* -o file_name \ * -o file_name \
* -I input.bam \ * -I input.bam \
* -sites sites.vcf * -sites sites.vcf \
* -U ALLOW_N_CIGAR_READS * -U ALLOW_N_CIGAR_READS \
* [-minDepth 10] \ * [-minDepth 10] \
* [--minMappingQuality 10] \ * [--minMappingQuality 10] \
* [--minBaseQuality 2] * [--minBaseQuality 2] \
* [-drf DuplicateRead] * [-drf DuplicateRead]
* </pre> * </pre>
*/ */
@ -91,38 +104,40 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
public RodBinding<VariantContext> sites; public RodBinding<VariantContext> sites;
/** /**
* Loci with total depth lower then this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold. * If this argument is enabled, loci with total depth lower than this threshold after all filters have been applied
* will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
*/ */
@Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of filtered-pass bases that we need to see for reporting this site", required = false, minValue = 0, maxValue = Integer.MAX_VALUE) @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of bases that pass filters", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
public int minDepthOfNonFilteredBases = -1; public int minDepthOfNonFilteredBases = -1;
/** /**
* Reads with mapping quality values lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold. * If this argument is enabled, reads with mapping quality values lower than this threshold will not be counted.
* This is set to -1 by default to disable the evaluation and ignore this threshold.
*/ */
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth", required = false, minValue = 0, maxValue = Integer.MAX_VALUE) @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum read mapping quality", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
public int minMappingQuality = 0; public int minMappingQuality = 0;
/** /**
* Bases with quality scores lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold. * If this argument is enabled, bases with quality scores lower than this threshold will not be counted.
* This is set to -1 by default to disable the evaluation and ignore this threshold.
*/ */
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth", required = false, minValue = 0, maxValue = Byte.MAX_VALUE) @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum base quality", required = false, minValue = 0, maxValue = Byte.MAX_VALUE)
public byte minBaseQuality = 0; public byte minBaseQuality = 0;
/** /**
* There are few option to deal with an overlapping read pair. * These options modify how the tool deals with overlapping read pairs.
* COUNT_READS - Count all reads independently (even if from the same fragment). * COUNT_READS - Count all reads independently, even if they are from the same fragment.
* COUNT_FRAGMENTS - Count all fragments (even if the reads that compose the fragment are not consistent at that base). * COUNT_FRAGMENTS - Count all fragments, even if the reads that compose the fragment are not consistent at that base.
* COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE - Count all fragments (but only if the reads that compose the fragment are consistent at that base). * COUNT_FRAGMENTS_REQUIRE_SAME_BASE - Count all fragments, but only if the reads that compose the fragment are consistent at that base (default).
* defaults to COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE
*/ */
@Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "How should overlapping reads from the same fragment be handled. Current options: COUNT_READS, COUNT_FRAGMENTS and COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE (default)", required = false) @Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "Handling of overlapping reads from the same fragment", required = false)
public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE; public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE;
/** /**
* Output file format (e.g. csv, table, rtable); defaults to r-readable table. * Available options are csv, table, rtable. By default, the format is an r-readable table.
*/ */
@Argument(fullName = "outputFormat", doc = "The format of the output file, can be csv, table, rtable", required = false) @Argument(fullName = "outputFormat", doc = "Format of the output file, can be CSV, TABLE, RTABLE", required = false)
public String outputFormat = "rtable"; public OUTPUT_FORMAT outputFormat = OUTPUT_FORMAT.RTABLE;
/** /**
* Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output.
@ -135,9 +150,14 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
public boolean ignoreDeletionSites = false; public boolean ignoreDeletionSites = false;
final String[] OUTPUT_FORMATS = {"table","rtable","csv"};
public String separator = "\t"; public String separator = "\t";
public enum OUTPUT_FORMAT{
TABLE,
RTABLE,
CSV
}
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
// STANDARD WALKER METHODS // STANDARD WALKER METHODS
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
@ -148,15 +168,15 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
// Check the output format // Check the output format
boolean goodOutputFormat = false; boolean goodOutputFormat = false;
for ( final String f : OUTPUT_FORMATS ) { for ( final OUTPUT_FORMAT f : OUTPUT_FORMAT.values()) {
goodOutputFormat = goodOutputFormat || f.equals(outputFormat); goodOutputFormat = goodOutputFormat || f.equals(outputFormat);
} }
if ( ! goodOutputFormat ) { if ( ! goodOutputFormat ) {
throw new IllegalArgumentException("Improper output format. Can be one of table,rtable,csv. Was "+outputFormat); throw new IllegalArgumentException("Improper output format. Can be one of TABLE, RTABLE, CSV. Was "+outputFormat);
} }
if ( outputFormat.equals("csv") ) { if ( outputFormat.equals(OUTPUT_FORMAT.CSV) ) {
separator = ","; separator = ",";
} }
final String header = "contig"+separator+"position"+separator+"variantID"+separator+"refAllele"+separator+"altAllele"+separator+"refCount"+separator+"altCount"+separator+"totalCount"+separator+"lowMAPQDepth"+separator+"lowBaseQDepth"+separator+"rawDepth"+separator+"otherBases"+separator+"improperPairs"; final String header = "contig"+separator+"position"+separator+"variantID"+separator+"refAllele"+separator+"altAllele"+separator+"refCount"+separator+"altCount"+separator+"totalCount"+separator+"lowMAPQDepth"+separator+"lowBaseQDepth"+separator+"rawDepth"+separator+"otherBases"+separator+"improperPairs";
@ -176,13 +196,13 @@ public class ASEReadCounter extends LocusWalker<String, Integer> {
final List<VariantContext> VCs = tracker.getValues(sites, context.getLocation()); final List<VariantContext> VCs = tracker.getValues(sites, context.getLocation());
if(VCs != null && VCs.size() > 1) if(VCs != null && VCs.size() > 1)
throw new UserException("more then one variant context in position: "+contig+":"+position); throw new UserException("More then one variant context at position: "+contig+":"+position);
if(VCs == null || VCs.size() == 0) if(VCs == null || VCs.size() == 0)
return null; return null;
final VariantContext vc = VCs.get(0); final VariantContext vc = VCs.get(0);
if(!vc.isBiallelic()) { if(!vc.isBiallelic()) {
logger.warn("ignore site: cannot run ASE on non-biallelic sites: " + vc.toString()); logger.warn("Ignoring site: cannot run ASE on non-biallelic sites: " + vc.toString());
return null; return null;
} }