diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 8826de232..864be55b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -19,8 +19,19 @@ import java.util.Map; /** * Total (unfiltered) depth over all samples. * - * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D - * is N * D + * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample + * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal + * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. + * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the + * power I have to determine the genotype of the sample at this site, while the AD tells me how many times + * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering + * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like + * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would + * normally be excluded from the statistical calculations going into GQ and QUAL. + * + * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with + * -dcov D is N * D */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1cd30c51d..5d706d9c5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -24,9 +24,9 @@ import java.util.Map; /** - * The depth of coverage of each VCF allele in this sample + * The depth of coverage of each VCF allele in this sample. * - * Complementary fields that two important ways of thinking about the depth of the data for this sample + * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the @@ -38,8 +38,8 @@ import java.util.Map; * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and - * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base - * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what + * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base + * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what * determine the genotype calls (see below). */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {