diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java
index bde324e3c..fbf4b23c6 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java
@@ -89,7 +89,12 @@ import java.util.*;
*
* Output
*
- * A modified VCF detailing each interval by sample
+ * A modified VCF detailing each interval by sample and information for each interval according to the thresholds used.
+ * Interval information includes GC Content, average interval depth, callable status among others.
+ *
+ * If you use the --missing option, you can get as a second output a intervals file with the loci that have missing data.
+ * This file can then be used as input to QualifyMissingIntervals for full qualification and interpretation of why
+ * the data is missing.
*
*
* Examples
@@ -117,6 +122,7 @@ public class DiagnoseTargets extends LocusWalker {
private static final String AVG_INTERVAL_DP_KEY = "IDP";
private static final String LOW_COVERAGE_LOCI = "LL";
private static final String ZERO_COVERAGE_LOCI = "ZL";
+ private static final String GC_CONTENT_KEY = "GC";
@Output(doc = "File to which interval statistics should be written")
@@ -161,7 +167,7 @@ public class DiagnoseTargets extends LocusWalker {
// at this point, all intervals in intervalMap overlap with this locus, so update all of them
for (IntervalStratification intervalStratification : intervalMap.values())
- intervalStratification.addLocus(context);
+ intervalStratification.addLocus(context, ref);
return 1L;
}
@@ -276,6 +282,7 @@ public class DiagnoseTargets extends LocusWalker {
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
+ attributes.put(GC_CONTENT_KEY, stats.gcContent());
vcb = vcb.attributes(attributes);
vcb = vcb.genotypes(genotypes);
@@ -391,6 +398,7 @@ public class DiagnoseTargets extends LocusWalker {
// INFO fields for overall data
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
+ headerLines.add(new VCFInfoHeaderLine(GC_CONTENT_KEY, 1, VCFHeaderLineType.Float, "GC Content of the interval"));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java
index 3b5a23d51..cd38d28c6 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java
@@ -47,6 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -57,9 +58,13 @@ final class IntervalStratification extends AbstractStratification {
private final Map samples;
private final GenomeLoc interval;
private List callableStatuses;
+ private long gcCount = 0;
public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) {
super(thresholds);
+
+ assert interval != null && interval.size() > 0; // contracts
+
this.interval = interval;
this.samples = new HashMap(samples.size());
for (String sample : samples)
@@ -83,8 +88,11 @@ final class IntervalStratification extends AbstractStratification {
* This takes the input and manages passing the data to the SampleStatistics and Locus Statistics
*
* @param context The alignment context given from the walker
+ * @param ref The reference context given from the walker
*/
- public void addLocus(AlignmentContext context) {
+ public void addLocus(final AlignmentContext context, final ReferenceContext ref) {
+ assert ref != null; // contracts
+
ReadBackedPileup pileup = context.getBasePileup();
Map samplePileups = pileup.getPileupsForSamples(samples.keySet());
@@ -99,7 +107,11 @@ final class IntervalStratification extends AbstractStratification {
sampleStratification.addLocus(context.getLocation(), samplePileup);
}
+ gcCount += (ref.getBase() == 'G' || ref.getBase() == 'C') ? 1 : 0;
+ }
+ public double gcContent() {
+ return (double) gcCount / interval.size();
}
/**
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java
index 52e385957..0259e9685 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java
@@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSingleSample() {
- DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "1771e95aed2b3b240dc353f84e19847d");
+ DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "6ca3d3917a7b65eaa877aa3658d80912");
}
@Test(enabled = true)
public void testMultiSample() {
- DTTest("testMultiSample ", "-I " + multiSample, "c7f1691dbe5f121c4a79be823d3057e5");
+ DTTest("testMultiSample ", "-I " + multiSample, "f50c6b9bef9f63f0a8b32ae9a9bdd51a");
}
}