diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java index cc8b3401e..59b95f2ba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java @@ -26,8 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -48,39 +48,53 @@ import java.io.PrintStream; import java.util.Arrays; /** + * Read clipping statistics for all reads. + * + * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length + * of the clipping to the output stream. + * + * Note: Ignores N's in the Cigar string. + * + *

Input

+ * One or more BAM files + * + *

Output

+ * A simple tabulated text file with read length and clipping statistics for every read (or every N reads if the "skip" + * option is used) + * * User: depristo * Date: May 5, 2010 * Time: 12:16:41 PM */ -/** - * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length - * of the clipping to the output stream. - */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS}) public class ReadClippingStats extends ReadWalker { @Output protected PrintStream out; - @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ - "on mapped reads only, while unmapped reads will be discarded", required=false) - protected boolean MAPPED_ONLY = true; + /** + * when this flag is set (default), statistics will be collected on unmapped reads as well. The default behavior + * is to ignore unmapped reads." + */ + @Argument(fullName="include_unmapped", shortName="u", doc="Include unmapped reads in the analysis", required=false) + protected boolean INCLUDE_UNMAPPED = false; - @Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false) + /** + * print every read whose read number is divisible by SKIP. READ_NUMBER % SKIP == 0. First read in the file has read number = 1, + * second is 2, third is 3, ... A value of 1 means print every read. A value of 1000 means print every 1000th read. + */ + @Advanced + @Argument(fullName="skip", shortName="skip", doc="Do not print all reads, skip some.", required=false) protected int SKIP = 1; -// public void initialize() { -// -// } - public class ReadClippingInfo { SAMReadGroupRecord rg; int readLength, nClippingEvents, nClippedBases; } public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { - if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY) + if ( AlignmentUtils.isReadUnmapped(read) && !INCLUDE_UNMAPPED) return null; ReadClippingInfo info = new ReadClippingInfo(); @@ -89,24 +103,21 @@ public class ReadClippingStats extends ReadWalker