From d1815f35592fd094ca67c80dad34433ead60d79b Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 11 Dec 2009 18:02:33 +0000 Subject: [PATCH] More documentation for walkers that I'm familiar with in the collection of core walkers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2328 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/walkers/FixBAMSortOrderTag.java | 8 ++++---- .../broadinstitute/sting/gatk/walkers/FlagStatWalker.java | 8 +++----- .../broadinstitute/sting/gatk/walkers/PileupWalker.java | 7 +++++++ .../sting/gatk/walkers/PrintRODsWalker.java | 3 ++- .../sting/gatk/walkers/PrintReadsWalker.java | 8 ++++---- .../sting/gatk/walkers/SplitSamFileWalker.java | 4 ++++ 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/FixBAMSortOrderTag.java b/java/src/org/broadinstitute/sting/gatk/walkers/FixBAMSortOrderTag.java index c4064e1b8..ca74e7626 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/FixBAMSortOrderTag.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/FixBAMSortOrderTag.java @@ -22,10 +22,10 @@ import net.sf.samtools.SAMFileWriterFactory; import java.io.File; /** - * Created by IntelliJ IDEA. - * User: aaron - * Date: Oct 9, 2009 - * Time: 2:21:08 PM + * Fixes slightly corrupted BAM files by rewriting the input BAM file, altering + * the header by changing the sort order tag (SO) to coordinate sort order. Will NOT + * verify the contents of the file to ensure that the data is actually in coordinate sorted + * order. */ public class FixBAMSortOrderTag extends ReadWalker { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java index 46787e020..caffc15d0 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java @@ -32,12 +32,10 @@ import java.text.NumberFormat; */ /** + * A reimplementation of the 'samtools flagstat' subcommand in the GATK. Walks + * over all input data, accumulating statistics such as total number of reads, + * reads with QC failure flag set, number of duplicates, percentage mapped, etc. * @author aaron - *

- * Class FlagStatWalker - *

- * This walker mirrors that stats that are generated by the flagstat - * command of samtools. */ @Requires({DataSource.READS}) public class FlagStatWalker extends ReadWalker { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 03b5fca0a..29b845d83 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -51,6 +51,13 @@ import java.util.ArrayList; * Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read * separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality. * A symbol '$' marks the end of a read segment. + * + * @help.description Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position, + * consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping + * qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all + * encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, + * a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the + * reverse strand. */ public class PileupWalker extends LocusWalker implements TreeReducible { @Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 472a27dc2..39a0398dd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -32,7 +32,8 @@ import org.broadinstitute.sting.utils.genotype.Variation; import java.util.Iterator; /** - * PrintRODsWalker prints out all of the RODs that it sees (using the ROD's toString method) + * Prints out all of the RODs in the input data set. Data is rendered using the toString() method + * of the given ROD. */ public class PrintRODsWalker extends RodWalker { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 5a885d88b..8b1fadcf1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -31,10 +31,10 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; */ /** - * This walker prints out the reads from the BAM files provided to the traversal engines. - * It also supports the command line option '-outputBamFile filname', which outputs all the - * reads to a specified BAM file - * The walker now also optionally filters reads based on command line options. + * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear + * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting + * in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter + * command line argument. */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReadsWalker extends ReadWalker { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 4055ef90e..6f6311b9e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -32,6 +32,10 @@ import org.apache.log4j.Logger; import java.util.*; +/** + * Divides the input data set into separate BAM files, one for each sample in the input data set. The split + * files are named concatenating the sample name to the end of the provided outputRoot command-line argument. + */ @WalkerName("SplitSamFile") @Requires({DataSource.READS}) public class SplitSamFileWalker extends ReadWalker> {