Adding docs to QC walkers

This commit is contained in:
Eric Banks 2011-08-17 09:39:33 -04:00
parent 5f3f46aad1
commit fadcbf68fd
6 changed files with 169 additions and 16 deletions

View File

@ -40,26 +40,65 @@ import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter
* command line argument.
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
*
* <p>
* PrintReads can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the
* --read_filter command line argument.
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* A single processed bam file.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
* </pre>
*
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
String readGroup = null;
/**
* For example, --platform ILLUMINA or --platform 454.
*/
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
String platform = null; // E.g. ILLUMINA, 454
String platform = null;
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1;
/**
* Only reads from samples listed in the provided file(s) will be included in the output.
*/
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
public Set<File> sampleFile = new TreeSet<File>();
/**
* Only reads from the sample(s) will be included in the output.
*/
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames = new TreeSet<String>();

View File

@ -38,12 +38,32 @@ import java.util.List;
/**
* Walks along reference and calculates the GC content for each interval.
*
*
* <h2>Input</h2>
* <p>
* One or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* GC content calculations per interval.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T GCContentByInterval \
* -o output.txt \
* -I input.bam \
* -L input.intervals
* </pre>
*
*/
@Allows(value = {DataSource.REFERENCE})
@Requires(value = {DataSource.REFERENCE})
@By(DataSource.REFERENCE)
public class GCContentByIntervalWalker extends LocusWalker<Long, Long> {
@Output
protected PrintStream out;

View File

@ -11,7 +11,31 @@ import java.io.PrintStream;
/**
* Walks over the input data set, calculating the total number of covered loci for diagnostic purposes.
*
* <p>
* Simplest example of a locus walker.
*
*
* <h2>Input</h2>
* <p>
* One or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of loci traversed.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountLoci \
* -o output.txt \
* -I input.bam \
* [-L input.intervals]
* </pre>
*
*/
public class CountLociWalker extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
@Output(doc="Write count to this file instead of STDOUT")

View File

@ -39,6 +39,26 @@ import java.util.List;
* query name order. Breaks counts down by total pairs and number
* of paired reads.
*
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of pairs seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountPairs \
* -o output.txt \
* -I input.bam
* </pre>
*
* @author mhanna
*/
public class CountPairsWalker extends ReadPairWalker<Integer,Long> {

View File

@ -9,8 +9,32 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
*
* <p>
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*
*
* <h2>Input</h2>
* <p>
* One or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of reads seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountReads \
* -o output.txt \
* -I input.bam \
* [-L input.intervals]
* </pre>
*
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReadsWalker extends ReadWalker<Integer, Integer> {

View File

@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -41,23 +44,46 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
/**
* Prints out counts of the number of reference ordered data objects are
* each locus for debugging RodWalkers.
* Prints out counts of the number of reference ordered data objects encountered.
*
*
* <h2>Input</h2>
* <p>
* One or more rod files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of rods seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountRod \
* -o output.txt \
* --rod input.vcf
* </pre>
*
*/
public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
@Output
public PrintStream out;
@Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false)
/**
* One or more input rod files
*/
@Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false)
public List<RodBinding<Feature>> rods = Collections.emptyList();
@Argument(fullName = "verbose", shortName = "v", doc="If true, CountRod will print out detailed information about the rods it finds and locations", required = false)
public boolean verbose = false;
@Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false)
@Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skipped locations", required = false)
public boolean showSkipped = false;
@Override