diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java
index cc8b3401e..59b95f2ba 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java
@@ -26,8 +26,8 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarElement;
-import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMReadGroupRecord;
+import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@@ -48,39 +48,53 @@ import java.io.PrintStream;
import java.util.Arrays;
/**
+ * Read clipping statistics for all reads.
+ *
+ * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
+ * of the clipping to the output stream.
+ *
+ * Note: Ignores N's in the Cigar string.
+ *
+ *
Input
+ * One or more BAM files
+ *
+ * Output
+ * A simple tabulated text file with read length and clipping statistics for every read (or every N reads if the "skip"
+ * option is used)
+ *
* User: depristo
* Date: May 5, 2010
* Time: 12:16:41 PM
*/
-/**
- * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
- * of the clipping to the output stream.
- */
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
public class ReadClippingStats extends ReadWalker {
@Output
protected PrintStream out;
- @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
- "on mapped reads only, while unmapped reads will be discarded", required=false)
- protected boolean MAPPED_ONLY = true;
+ /**
+ * when this flag is set (default), statistics will be collected on unmapped reads as well. The default behavior
+ * is to ignore unmapped reads."
+ */
+ @Argument(fullName="include_unmapped", shortName="u", doc="Include unmapped reads in the analysis", required=false)
+ protected boolean INCLUDE_UNMAPPED = false;
- @Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false)
+ /**
+ * print every read whose read number is divisible by SKIP. READ_NUMBER % SKIP == 0. First read in the file has read number = 1,
+ * second is 2, third is 3, ... A value of 1 means print every read. A value of 1000 means print every 1000th read.
+ */
+ @Advanced
+ @Argument(fullName="skip", shortName="skip", doc="Do not print all reads, skip some.", required=false)
protected int SKIP = 1;
-// public void initialize() {
-//
-// }
-
public class ReadClippingInfo {
SAMReadGroupRecord rg;
int readLength, nClippingEvents, nClippedBases;
}
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
- if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
+ if ( AlignmentUtils.isReadUnmapped(read) && !INCLUDE_UNMAPPED)
return null;
ReadClippingInfo info = new ReadClippingInfo();
@@ -89,24 +103,21 @@ public class ReadClippingStats extends ReadWalker