diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index b96e6cf1c..62477a87c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -33,7 +33,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; @@ -147,7 +148,7 @@ public class DiagnoseTargets extends LocusWalker { intervalListIterator = new PeekableIterator(getToolkit().getIntervals().iterator()); samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header - vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header + vcfWriter.writeHeader(new VCFHeader(ThresHolder.getHeaderInfo(), samples)); // initialize the VCF header } @Override @@ -289,34 +290,6 @@ public class DiagnoseTargets extends LocusWalker { } - /** - * Gets the header lines for the VCF writer - * - * @return A set of VCF header lines - */ - private static Set getHeaderInfo() { - Set headerLines = new HashSet(); - - // INFO fields for overall data - headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); - headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); - headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); - - // FORMAT fields for each genotype - // todo -- find the appropriate VCF constants - headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); - headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution.")); - headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution.")); - headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution.")); - - - // FILTER fields - for (CallableStatus stat : CallableStatus.values()) - headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description)); - - return headerLines; - } - /** * Function that process a set of statuses into strings * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java new file mode 100644 index 000000000..ac60f5f28 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.gatk.walkers.PartitionBy; +import org.broadinstitute.sting.gatk.walkers.PartitionType; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; + +import java.io.PrintStream; + +@PartitionBy(PartitionType.CONTIG) +@ActiveRegionExtension(extension = 0, maxRegion = 50000) +public class FindCoveredIntervals extends ActiveRegionWalker { + @Output(required = true) + private PrintStream out; + + @Override + // Look to see if the region has sufficient coverage + public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + + int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); + + // note the linear probability scale + int coverageThreshold = 20; + return Math.min((double) depth / coverageThreshold, 1); + + } + + @Override + public GenomeLoc map(final ActiveRegion activeRegion, final RefMetaDataTracker tracker) { + if (activeRegion.isActive) + return activeRegion.getLocation(); + else + return null; + } + + @Override + public Long reduceInit() { + return 0L; + } + + @Override + public Long reduce(final GenomeLoc value, Long reduce) { + if (value != null) { + out.println(value.toString()); + return reduce++; + } else + return reduce; + } + + @Override + public void onTraversalDone(Long reduce) { + logger.info(String.format("Found %d intervals", reduce)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java index 958a44d4a..25dda1da5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java @@ -146,7 +146,7 @@ class SampleStatistics { int locusIndex = locus.getStart() - interval.getStart(); int rawCoverage = pileup.depthOfCoverage(); - int coverage = pileup.getBaseAndMappingFilteredPileup(thresholds.getMinimumBaseQuality(), thresholds.getMinimumMappingQuality()).depthOfCoverage(); + int coverage = thresholds.getFilteredCoverage(pileup); LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java index 17098567f..20d1dee62 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java @@ -24,6 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.HashSet; +import java.util.Set; + class ThresHolder { public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5); @@ -69,14 +75,6 @@ class ThresHolder { this.qualityStatusThreshold = qualityStatusThreshold; } - public int getMinimumBaseQuality() { - return minimumBaseQuality; - } - - public int getMinimumMappingQuality() { - return minimumMappingQuality; - } - public int getMinimumCoverage() { return minimumCoverage; } @@ -116,4 +114,37 @@ class ThresHolder { public double getQualityStatusThreshold() { return qualityStatusThreshold; } + + public int getFilteredCoverage(ReadBackedPileup pileup) { + return pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage(); + } + + /** + * Gets the header lines for the VCF writer + * + * @return A set of VCF header lines + */ + public static Set getHeaderInfo() { + Set headerLines = new HashSet(); + + // INFO fields for overall data + headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); + headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); + headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); + + // FORMAT fields for each genotype + // todo -- find the appropriate VCF constants + headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); + headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution.")); + headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution.")); + headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution.")); + + + // FILTER fields + for (CallableStatus stat : CallableStatus.values()) + headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description)); + + return headerLines; + } + }