From cdea744b953bdd5a27b07281650abe770d918ec8 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 2 Jul 2013 15:27:36 -0400 Subject: [PATCH] Improve -dcov documentation to address recent user confusion -Explicitly state that -dcov does not produce an unbiased random sampling from all available reads at each locus, and that instead it tries to maintain an even representation of reads from all alignment start positions (which, of course, is a form of bias) -Recommend -dfrac for users who want a true across-the-board unbiased random sampling --- .../sting/gatk/arguments/GATKArgumentCollection.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index b5113fdea..b38f0fc0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -128,10 +128,12 @@ public class GATKArgumentCollection { @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to. For locus-based traversals (eg., LocusWalkers and ActiveRegionWalkers)," + "this controls the maximum depth of coverage at each locus. For non-locus-based traversals (eg., ReadWalkers), " + - "this controls the maximum number of reads sharing the same alignment start position. Note that the " + - "coverage target is an approximate goal that is not guaranteed to be met exactly: the GATK's approach " + - "to downsampling is based on even representation of reads from all alignment start positions, and the " + - "downsampling algorithm will under some circumstances retain slightly more coverage than requested.", + "this controls the maximum number of reads sharing the same alignment start position. Note that this downsampling " + + "option does NOT produce an unbiased random sampling from all available reads at each locus: instead, the primary goal of " + + "the to-coverage downsampler is to maintain an even representation of reads from all alignment start positions " + + "when removing excess coverage. For a true across-the-board unbiased random sampling of reads, use -dfrac instead. " + + "Also note that the coverage target is an approximate goal that is not guaranteed to be met exactly: the downsampling " + + "algorithm will under some circumstances retain slightly more coverage than requested.", required = false) public Integer downsampleCoverage = null;