Merge pull request #434 from broadinstitute/mc_dt_gccontent

Add GC Content to DiagnoseTargets
This commit is contained in:
amilev 2013-12-04 09:42:26 -08:00
commit 0d94019bd6
3 changed files with 25 additions and 5 deletions

View File

@ -89,7 +89,12 @@ import java.util.*;
* <p/>
* <h3>Output</h3>
* <p>
* A modified VCF detailing each interval by sample
* A modified VCF detailing each interval by sample and information for each interval according to the thresholds used.
* Interval information includes GC Content, average interval depth, callable status among others.
*
* If you use the --missing option, you can get as a second output a intervals file with the loci that have missing data.
* This file can then be used as input to QualifyMissingIntervals for full qualification and interpretation of why
* the data is missing.
* </p>
* <p/>
* <h3>Examples</h3>
@ -117,6 +122,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
private static final String AVG_INTERVAL_DP_KEY = "IDP";
private static final String LOW_COVERAGE_LOCI = "LL";
private static final String ZERO_COVERAGE_LOCI = "ZL";
private static final String GC_CONTENT_KEY = "GC";
@Output(doc = "File to which interval statistics should be written")
@ -161,7 +167,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
// at this point, all intervals in intervalMap overlap with this locus, so update all of them
for (IntervalStratification intervalStratification : intervalMap.values())
intervalStratification.addLocus(context);
intervalStratification.addLocus(context, ref);
return 1L;
}
@ -276,6 +282,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
attributes.put(VCFConstants.END_KEY, interval.getStop());
attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
attributes.put(GC_CONTENT_KEY, stats.gcContent());
vcb = vcb.attributes(attributes);
vcb = vcb.genotypes(genotypes);
@ -391,6 +398,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
// INFO fields for overall data
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine(GC_CONTENT_KEY, 1, VCFHeaderLineType.Float, "GC Content of the interval"));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype

View File

@ -47,6 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -57,9 +58,13 @@ final class IntervalStratification extends AbstractStratification {
private final Map<String, AbstractStratification> samples;
private final GenomeLoc interval;
private List<CallableStatus> callableStatuses;
private long gcCount = 0;
public IntervalStratification(Set<String> samples, GenomeLoc interval, ThresHolder thresholds) {
super(thresholds);
assert interval != null && interval.size() > 0; // contracts
this.interval = interval;
this.samples = new HashMap<String, AbstractStratification>(samples.size());
for (String sample : samples)
@ -83,8 +88,11 @@ final class IntervalStratification extends AbstractStratification {
* This takes the input and manages passing the data to the SampleStatistics and Locus Statistics
*
* @param context The alignment context given from the walker
* @param ref The reference context given from the walker
*/
public void addLocus(AlignmentContext context) {
public void addLocus(final AlignmentContext context, final ReferenceContext ref) {
assert ref != null; // contracts
ReadBackedPileup pileup = context.getBasePileup();
Map<String, ReadBackedPileup> samplePileups = pileup.getPileupsForSamples(samples.keySet());
@ -99,7 +107,11 @@ final class IntervalStratification extends AbstractStratification {
sampleStratification.addLocus(context.getLocation(), samplePileup);
}
gcCount += (ref.getBase() == 'G' || ref.getBase() == 'C') ? 1 : 0;
}
public double gcContent() {
return (double) gcCount / interval.size();
}
/**

View File

@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSingleSample() {
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "1771e95aed2b3b240dc353f84e19847d");
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "6ca3d3917a7b65eaa877aa3658d80912");
}
@Test(enabled = true)
public void testMultiSample() {
DTTest("testMultiSample ", "-I " + multiSample, "c7f1691dbe5f121c4a79be823d3057e5");
DTTest("testMultiSample ", "-I " + multiSample, "f50c6b9bef9f63f0a8b32ae9a9bdd51a");
}
}