From 701ede2817372b6b2f65c18e40101b059c058c5c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 27 Nov 2013 11:53:49 -0500 Subject: [PATCH] Add GC Content to DiagnoseTargets --- .../diagnosetargets/DiagnoseTargets.java | 12 ++++++++++-- .../diagnosetargets/IntervalStratification.java | 14 +++++++++++++- .../DiagnoseTargetsIntegrationTest.java | 4 ++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index bde324e3c..fbf4b23c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -89,7 +89,12 @@ import java.util.*; *

*

Output

*

- * A modified VCF detailing each interval by sample + * A modified VCF detailing each interval by sample and information for each interval according to the thresholds used. + * Interval information includes GC Content, average interval depth, callable status among others. + * + * If you use the --missing option, you can get as a second output a intervals file with the loci that have missing data. + * This file can then be used as input to QualifyMissingIntervals for full qualification and interpretation of why + * the data is missing. *

*

*

Examples

@@ -117,6 +122,7 @@ public class DiagnoseTargets extends LocusWalker { private static final String AVG_INTERVAL_DP_KEY = "IDP"; private static final String LOW_COVERAGE_LOCI = "LL"; private static final String ZERO_COVERAGE_LOCI = "ZL"; + private static final String GC_CONTENT_KEY = "GC"; @Output(doc = "File to which interval statistics should be written") @@ -161,7 +167,7 @@ public class DiagnoseTargets extends LocusWalker { // at this point, all intervals in intervalMap overlap with this locus, so update all of them for (IntervalStratification intervalStratification : intervalMap.values()) - intervalStratification.addLocus(context); + intervalStratification.addLocus(context, ref); return 1L; } @@ -276,6 +282,7 @@ public class DiagnoseTargets extends LocusWalker { attributes.put(VCFConstants.END_KEY, interval.getStop()); attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); + attributes.put(GC_CONTENT_KEY, stats.gcContent()); vcb = vcb.attributes(attributes); vcb = vcb.genotypes(genotypes); @@ -391,6 +398,7 @@ public class DiagnoseTargets extends LocusWalker { // INFO fields for overall data headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size.")); + headerLines.add(new VCFInfoHeaderLine(GC_CONTENT_KEY, 1, VCFHeaderLineType.Float, "GC Content of the interval")); headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); // FORMAT fields for each genotype diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 3b5a23d51..cd38d28c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -57,9 +58,13 @@ final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; private List callableStatuses; + private long gcCount = 0; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { super(thresholds); + + assert interval != null && interval.size() > 0; // contracts + this.interval = interval; this.samples = new HashMap(samples.size()); for (String sample : samples) @@ -83,8 +88,11 @@ final class IntervalStratification extends AbstractStratification { * This takes the input and manages passing the data to the SampleStatistics and Locus Statistics * * @param context The alignment context given from the walker + * @param ref The reference context given from the walker */ - public void addLocus(AlignmentContext context) { + public void addLocus(final AlignmentContext context, final ReferenceContext ref) { + assert ref != null; // contracts + ReadBackedPileup pileup = context.getBasePileup(); Map samplePileups = pileup.getPileupsForSamples(samples.keySet()); @@ -99,7 +107,11 @@ final class IntervalStratification extends AbstractStratification { sampleStratification.addLocus(context.getLocation(), samplePileup); } + gcCount += (ref.getBase() == 'G' || ref.getBase() == 'C') ? 1 : 0; + } + public double gcContent() { + return (double) gcCount / interval.size(); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java index 52e385957..0259e9685 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java @@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSingleSample() { - DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "1771e95aed2b3b240dc353f84e19847d"); + DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "6ca3d3917a7b65eaa877aa3658d80912"); } @Test(enabled = true) public void testMultiSample() { - DTTest("testMultiSample ", "-I " + multiSample, "c7f1691dbe5f121c4a79be823d3057e5"); + DTTest("testMultiSample ", "-I " + multiSample, "f50c6b9bef9f63f0a8b32ae9a9bdd51a"); } }