From fadcbf68fd72d936f7b4532b57bf37ae98ea10d1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Aug 2011 09:39:33 -0400 Subject: [PATCH] Adding docs to QC walkers --- .../sting/gatk/walkers/PrintReadsWalker.java | 51 ++++++++++++++++--- .../coverage/GCContentByIntervalWalker.java | 24 ++++++++- .../gatk/walkers/qc/CountLociWalker.java | 24 +++++++++ .../gatk/walkers/qc/CountPairsWalker.java | 20 ++++++++ .../gatk/walkers/qc/CountReadsWalker.java | 24 +++++++++ .../sting/gatk/walkers/qc/CountRodWalker.java | 42 ++++++++++++--- 6 files changed, 169 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 7e1dcd707..fdfac6bf7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -40,26 +40,65 @@ import java.util.TreeSet; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; + /** - * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear - * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting - * in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter - * command line argument. + * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. + * + *

+ * PrintReads can dynamically merge the contents of multiple input BAM files, resulting + * in merged output sorted in coordinate order. Can also optionally filter reads based on the + * --read_filter command line argument. + * + *

Input

+ *

+ * One or more bam files. + *

+ * + *

Output

+ *

+ * A single processed bam file. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T PrintReads \
+ *   -o output.bam \
+ *   -I input1.bam \
+ *   -I input2.bam \
+ *   --read_filter MappingQualityZero
+ * 
+ * */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReadsWalker extends ReadWalker { - /** an optional argument to dump the reads out to a BAM file */ + @Output(doc="Write output to this BAM filename instead of STDOUT") SAMFileWriter out; + @Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false) String readGroup = null; + + /** + * For example, --platform ILLUMINA or --platform 454. + */ @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false) - String platform = null; // E.g. ILLUMINA, 454 + String platform = null; + @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + + /** + * Only reads from samples listed in the provided file(s) will be included in the output. + */ @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) public Set sampleFile = new TreeSet(); + + /** + * Only reads from the sample(s) will be included in the output. + */ @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) public Set sampleNames = new TreeSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java index a4944e939..5c2a967b9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java @@ -38,12 +38,32 @@ import java.util.List; /** * Walks along reference and calculates the GC content for each interval. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * GC content calculations per interval. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T GCContentByInterval \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   -L input.intervals
+ * 
+ * */ @Allows(value = {DataSource.REFERENCE}) @Requires(value = {DataSource.REFERENCE}) - @By(DataSource.REFERENCE) - public class GCContentByIntervalWalker extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java index 0d68c8493..09113704a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java @@ -11,7 +11,31 @@ import java.io.PrintStream; /** * Walks over the input data set, calculating the total number of covered loci for diagnostic purposes. + * + *

* Simplest example of a locus walker. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * Number of loci traversed. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountLoci \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * 
+ * */ public class CountLociWalker extends LocusWalker implements TreeReducible { @Output(doc="Write count to this file instead of STDOUT") diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index 26fa9a258..e770418c1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -39,6 +39,26 @@ import java.util.List; * query name order. Breaks counts down by total pairs and number * of paired reads. * + * + *

Input

+ *

+ * One or more bam files. + *

+ * + *

Output

+ *

+ * Number of pairs seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountPairs \
+ *   -o output.txt \
+ *   -I input.bam
+ * 
+ * * @author mhanna */ public class CountPairsWalker extends ReadPairWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 87c0409b9..9ce9c4eec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -9,8 +9,32 @@ import org.broadinstitute.sting.gatk.walkers.Requires; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. + * + *

* Can also count the number of reads matching a given criterion using read filters (see the * --read-filter command line argument). Simplest example of a read-backed analysis. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * Number of reads seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountReads \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * 
+ * */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadsWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index 8a03dea44..04d04c2c4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -41,23 +44,46 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** - * Prints out counts of the number of reference ordered data objects are - * each locus for debugging RodWalkers. + * Prints out counts of the number of reference ordered data objects encountered. + * + * + *

Input

+ *

+ * One or more rod files. + *

+ * + *

Output

+ *

+ * Number of rods seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountRod \
+ *   -o output.txt \
+ *   --rod input.vcf
+ * 
+ * */ public class CountRodWalker extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; - @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false) + /** + * One or more input rod files + */ + @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false) + public List> rods = Collections.emptyList(); + + @Argument(fullName = "verbose", shortName = "v", doc="If true, CountRod will print out detailed information about the rods it finds and locations", required = false) public boolean verbose = false; - @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false) + @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skipped locations", required = false) public boolean showSkipped = false; @Override