From d1febb89c8921453480dcf6b323038db87d2fb7b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 1 Jan 2014 14:26:26 -0500 Subject: [PATCH] Better documentation for ReadClippingStats walker * add overall walker GATKDocs * add explanation for skip parameter and make it advanced * reverse the logic on exculding unmapped reads for clarity * fix read length calculation to no longer include indels ps: I am not sure how useful this walker is (I didn't write it) but the skip logic is poor and calculates the entire statistic for the reads it is eventually going to skip. This would be an easy fix, but only worth our time if people actually use this. --- .../gatk/walkers/qc/ReadClippingStats.java | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java index cc8b3401e..59b95f2ba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStats.java @@ -26,8 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -48,39 +48,53 @@ import java.io.PrintStream; import java.util.Arrays; /** + * Read clipping statistics for all reads. + * + * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length + * of the clipping to the output stream. + * + * Note: Ignores N's in the Cigar string. + * + *

Input

+ * One or more BAM files + * + *

Output

+ * A simple tabulated text file with read length and clipping statistics for every read (or every N reads if the "skip" + * option is used) + * * User: depristo * Date: May 5, 2010 * Time: 12:16:41 PM */ -/** - * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length - * of the clipping to the output stream. - */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires({DataSource.READS}) public class ReadClippingStats extends ReadWalker { @Output protected PrintStream out; - @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ - "on mapped reads only, while unmapped reads will be discarded", required=false) - protected boolean MAPPED_ONLY = true; + /** + * when this flag is set (default), statistics will be collected on unmapped reads as well. The default behavior + * is to ignore unmapped reads." + */ + @Argument(fullName="include_unmapped", shortName="u", doc="Include unmapped reads in the analysis", required=false) + protected boolean INCLUDE_UNMAPPED = false; - @Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false) + /** + * print every read whose read number is divisible by SKIP. READ_NUMBER % SKIP == 0. First read in the file has read number = 1, + * second is 2, third is 3, ... A value of 1 means print every read. A value of 1000 means print every 1000th read. + */ + @Advanced + @Argument(fullName="skip", shortName="skip", doc="Do not print all reads, skip some.", required=false) protected int SKIP = 1; -// public void initialize() { -// -// } - public class ReadClippingInfo { SAMReadGroupRecord rg; int readLength, nClippingEvents, nClippedBases; } public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { - if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY) + if ( AlignmentUtils.isReadUnmapped(read) && !INCLUDE_UNMAPPED) return null; ReadClippingInfo info = new ReadClippingInfo(); @@ -89,24 +103,21 @@ public class ReadClippingStats extends ReadWalker