From 1466396a31aa0d2014c59dfef691c500c6f0c7c8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 25 Apr 2013 01:18:40 -0400 Subject: [PATCH 1/4] Diagnose target is outputting intervals out of order Problem ------- When the interval had no reads, it was being sent to the VCF before the intervals that just got processed, therefore violating the sort order of the VCF. Solution -------- Use a linked hash map, and make the insertion and removal all happen in one place regardless of having reads or not. Since the input is ordered, the output has to be ordered as well. Itemized changes -------------- * Clean up code duplication in LocusStratification and SampleStratification * Add number of uncovered sites and number of low covered sites to the VCF output. * Add new VCF format fields * Fix outputting multiple status when threshold is 0 (ratio must be GREATER THAN not equal to the threshold to get reported) [fixes #48780333] [fixes #48787311] --- .../AbstractStratification.java | 10 +++- .../diagnosetargets/DiagnoseTargets.java | 55 +++++++++---------- .../IntervalStratification.java | 5 +- .../diagnosetargets/LocusStratification.java | 13 +---- .../diagnosetargets/PluginUtils.java | 2 +- .../diagnosetargets/SampleStratification.java | 25 ++++++--- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java index dca83af44..8b7f3dbf2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java @@ -63,6 +63,10 @@ abstract class AbstractStratification { private Map statusTally = null; protected ThresHolder thresholds; + public AbstractStratification(ThresHolder thresholds) { + this.thresholds = thresholds; + } + /** * Calculates the average "good" coverage of this sample. Good means "passes the base and * mapping quality requirements. @@ -120,7 +124,7 @@ abstract class AbstractStratification { /** - * Tally up all the callable status of all the loci in this sample. + * Tally up all the callable status of all elements of the stratification. * * @return a map of callable status and counts */ @@ -136,10 +140,10 @@ abstract class AbstractStratification { return statusTally; } - public static List queryStatus(List statList, AbstractStratification stratification) { + public List queryStatus(List statList) { List output = new LinkedList(); for (Metric stat : statList) { - final CallableStatus status = stat.status(stratification); + final CallableStatus status = stat.status(this); if (status != null) { output.add(status); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 32f87b973..32d866b0a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -112,6 +112,9 @@ import java.util.*; public class DiagnoseTargets extends LocusWalker { private static final String AVG_INTERVAL_DP_KEY = "IDP"; + private static final String LOW_COVERAGE_LOCI = "LL"; + private static final String ZERO_COVERAGE_LOCI = "ZL"; + @Output(doc = "File to which interval statistics should be written") private VariantContextWriter vcfWriter = null; @@ -134,7 +137,7 @@ public class DiagnoseTargets extends LocusWalker { if (getToolkit().getIntervals() == null || getToolkit().getIntervals().isEmpty()) throw new UserException("This tool only works if you provide one or more intervals (use the -L argument). If you want to run whole genome, use -T DepthOfCoverage instead."); - intervalMap = new HashMap(INITIAL_HASH_SIZE); + intervalMap = new LinkedHashMap(INITIAL_HASH_SIZE); intervalListIterator = new PeekableIterator(getToolkit().getIntervals().iterator()); // get all of the unique sample names for the VCF Header @@ -151,8 +154,8 @@ public class DiagnoseTargets extends LocusWalker { // process and remove any intervals in the map that are don't overlap the current locus anymore // and add all new intervals that may overlap this reference locus - outputFinishedIntervals(refLocus, ref.getBase()); addNewOverlappingIntervals(refLocus); + outputFinishedIntervals(refLocus, ref.getBase()); // at this point, all intervals in intervalMap overlap with this locus, so update all of them for (IntervalStratification intervalStratification : intervalMap.values()) @@ -203,24 +206,17 @@ public class DiagnoseTargets extends LocusWalker { * @param refBase the reference allele */ private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) { - GenomeLoc interval = intervalListIterator.peek(); - - // output empty statistics for uncovered intervals - while (interval != null && interval.isBefore(refLocus)) { - final IntervalStratification stats = intervalMap.get(interval); - outputStatsToVCF(stats != null ? stats : createIntervalStatistic(interval), UNCOVERED_ALLELE); - if (stats != null) intervalMap.remove(interval); - intervalListIterator.next(); - interval = intervalListIterator.peek(); - } - - // remove any potential leftover interval in intervalMap (this will only happen when we have overlapping intervals) + // output any intervals that were finished + final List toRemove = new LinkedList(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { outputStatsToVCF(intervalMap.get(key), Allele.create(refBase, true)); - intervalMap.remove(key); + toRemove.add(key); } } + for (GenomeLoc key : toRemove) { + intervalMap.remove(key); + } } /** @@ -247,10 +243,21 @@ public class DiagnoseTargets extends LocusWalker { GenomeLoc interval = stats.getInterval(); - List alleles = new ArrayList(); - Map attributes = new HashMap(); - ArrayList genotypes = new ArrayList(); + final List alleles = new ArrayList(); + final Map attributes = new HashMap(); + final ArrayList genotypes = new ArrayList(); + for (String sample : samples) { + final GenotypeBuilder gb = new GenotypeBuilder(sample); + + SampleStratification sampleStat = stats.getSampleStatistics(sample); + gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); + gb.attribute(LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci()); + gb.attribute(ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci()); + gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); + + genotypes.add(gb.make()); + } alleles.add(refAllele); alleles.add(SYMBOLIC_ALLELE); VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles); @@ -262,16 +269,6 @@ public class DiagnoseTargets extends LocusWalker { attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); vcb = vcb.attributes(attributes); - for (String sample : samples) { - final GenotypeBuilder gb = new GenotypeBuilder(sample); - - SampleStratification sampleStat = stats.getSampleStatistics(sample); - gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); - - gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); - - genotypes.add(gb.make()); - } vcb = vcb.genotypes(genotypes); vcfWriter.add(vcb.make()); @@ -345,6 +342,8 @@ public class DiagnoseTargets extends LocusWalker { // FORMAT fields for each genotype headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size.")); + headerLines.add(new VCFFormatHeaderLine(LOW_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with low coverage (below the minimum coverage) but not zero.")); + headerLines.add(new VCFFormatHeaderLine(ZERO_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with zero coverage.")); // FILTER fields for (CallableStatus stat : CallableStatus.values()) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 6c20403d1..86e9d0142 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -56,11 +56,10 @@ import java.util.*; final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; - private final ThresHolder thresholds; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { + super(thresholds); this.interval = interval; - this.thresholds = thresholds; this.samples = new HashMap(samples.size()); for (String sample : samples) this.samples.put(sample, new SampleStratification(interval, thresholds)); @@ -125,7 +124,7 @@ final class IntervalStratification extends AbstractStratification { } } - output.addAll(queryStatus(thresholds.intervalMetricList, this)); + output.addAll(queryStatus(thresholds.intervalMetricList)); return output; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java index d6acaf850..5902fce31 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java @@ -46,22 +46,20 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; -import java.util.LinkedList; import java.util.List; final class LocusStratification extends AbstractStratification { private long coverage; private long rawCoverage; - private final List locusStatisticsList; public LocusStratification(ThresHolder thresholds) { this(0,0,thresholds); } protected LocusStratification(int coverage, int rawCoverage, ThresHolder thresholds) { + super(thresholds); this.coverage = coverage; this.rawCoverage = rawCoverage; - this.locusStatisticsList = thresholds.locusMetricList; } @Override @@ -79,14 +77,7 @@ final class LocusStratification extends AbstractStratification { * @return a set of all statuses that apply */ public List callableStatuses() { - List output = new LinkedList(); - for (Metric stats : locusStatisticsList) { - CallableStatus status = stats.status(this); - if (status != null) { - output.add(status); - } - } - return output; + return queryStatus(thresholds.locusMetricList); } @Override diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java index 1085e8cac..7984ba7e7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java @@ -58,6 +58,6 @@ final class PluginUtils { final Map totals = sampleStratification.getStatusTally(); final int size = sampleStratification.getIntervalSize(); final int statusCount = totals.containsKey(CALL) ? totals.get(CALL) : 0; - return ( (double) statusCount / size) >= threshold ? CALL: null; + return ( (double) statusCount / size) > threshold ? CALL: null; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java index b9ae1f3cf..49aa10cf6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java @@ -61,15 +61,14 @@ import java.util.List; final class SampleStratification extends AbstractStratification { private final GenomeLoc interval; private final ArrayList loci; - private final ThresHolder thresholds; private int nReads = -1; private int nBadMates = -1; public SampleStratification(final GenomeLoc interval, final ThresHolder thresholds) { + super(thresholds); this.interval = interval; this.loci = new ArrayList(interval.size()); - this.thresholds = thresholds; nReads = 0; nBadMates = 0; @@ -121,7 +120,7 @@ final class SampleStratification extends AbstractStratification { public Iterable callableStatuses() { final List output = new LinkedList(); - // get the tally of all the locus callable statuses + // get the sample statuses of all the Loci Metrics for (Metric locusStat : thresholds.locusMetricList) { final CallableStatus status = ((LocusMetric) locusStat).sampleStatus(this); if (status != null) { @@ -130,12 +129,7 @@ final class SampleStratification extends AbstractStratification { } // get the sample specific statitics statuses - for (Metric sampleStat : thresholds.sampleMetricList) { - final CallableStatus status = sampleStat.status(this); - if (status != null) { - output.add(status); - } - } + output.addAll(queryStatus(thresholds.sampleMetricList)); // special case, if there are no reads, then there is no sense reporting coverage gaps. if (output.contains(CallableStatus.NO_READS) && output.contains(CallableStatus.COVERAGE_GAPS)) @@ -159,4 +153,17 @@ final class SampleStratification extends AbstractStratification { read.setTemporaryAttribute("seen", true); } } + + public int getNLowCoveredLoci() { + return getCallableStatusCount(CallableStatus.LOW_COVERAGE); + } + + public int getNUncoveredLoci() { + return getCallableStatusCount(CallableStatus.COVERAGE_GAPS); + } + + private int getCallableStatusCount(CallableStatus status) { + final Integer x = getStatusTally().get(status); + return x == null ? 0 : x; + } } From 3dbb86b05253ce407b540cb8fe6d1cd66cb92a0d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 26 Apr 2013 23:29:25 -0400 Subject: [PATCH 2/4] Outputting missing intervals in DiagnoseTargets Problem ------ Diagnose Targets identifies holes in the coverage of a targetted experiment, but it only reports them doesn't list the actual missing loci Solution ------ This commit implements an optional intervals file output listing the exact loci that did not pass filters Itemized changes -------------- * Cache callable statuses (to avoid recalculation) * Add functionality to output missing intervals * Implement new tool to qualify the missing intervals (QualifyMissingIntervals) by gc content, size, type of missing coverage and origin (coding sequence, intron, ...) --- .../AbstractStratification.java | 2 +- .../diagnosetargets/DiagnoseTargets.java | 79 ++++++++++++++++--- .../IntervalStratification.java | 9 ++- .../diagnosetargets/SampleStratification.java | 2 +- .../diagnosetargets/ThresHolder.java | 5 ++ 5 files changed, 85 insertions(+), 12 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java index 8b7f3dbf2..ceccdcb2e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java @@ -120,7 +120,7 @@ abstract class AbstractStratification { * * @return the callable status(es) for the whole object */ - public abstract Iterable callableStatuses(); + public abstract List callableStatuses(); /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 32d866b0a..a3ac21ae0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -65,6 +65,8 @@ import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; +import java.io.FileWriter; +import java.io.IOException; import java.util.*; /** @@ -122,13 +124,12 @@ public class DiagnoseTargets extends LocusWalker { @ArgumentCollection private ThresHolder thresholds = new ThresHolder(); - private Map intervalMap = null; // maps each interval => statistics + private Map intervalMap = null; // maps each interval => statistics private PeekableIterator intervalListIterator; // an iterator to go over all the intervals provided as we traverse the genome private Set samples = null; // all the samples being processed private static final Allele SYMBOLIC_ALLELE = Allele.create("
", false); // avoid creating the symbolic allele multiple times private static final Allele UNCOVERED_ALLELE = Allele.create("A", true); // avoid creating the 'fake' ref allele for uncovered intervals multiple times - - private static final int INITIAL_HASH_SIZE = 500000; + private static final int INITIAL_HASH_SIZE = 50; // enough room for potential overlapping intervals plus recently finished intervals @Override public void initialize() { @@ -149,7 +150,7 @@ public class DiagnoseTargets extends LocusWalker { } @Override - public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public Long map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { GenomeLoc refLocus = ref.getLocus(); // process and remove any intervals in the map that are don't overlap the current locus anymore @@ -187,7 +188,7 @@ public class DiagnoseTargets extends LocusWalker { * @param result number of loci processed by the walker */ @Override - public void onTraversalDone(Long result) { + public void onTraversalDone(final Long result) { for (GenomeLoc interval : intervalMap.keySet()) outputStatsToVCF(intervalMap.get(interval), UNCOVERED_ALLELE); @@ -197,6 +198,14 @@ public class DiagnoseTargets extends LocusWalker { intervalListIterator.next(); interval = intervalListIterator.peek(); } + + if (thresholds.missingTargets != null) { + try { + thresholds.missingTargets.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } } /** @@ -210,7 +219,11 @@ public class DiagnoseTargets extends LocusWalker { final List toRemove = new LinkedList(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { - outputStatsToVCF(intervalMap.get(key), Allele.create(refBase, true)); + final IntervalStratification intervalStats = intervalMap.get(key); + outputStatsToVCF(intervalStats, Allele.create(refBase, true)); + if (hasMissingLoci(intervalStats)) { + outputMissingInterval(intervalStats); + } toRemove.add(key); } } @@ -224,7 +237,7 @@ public class DiagnoseTargets extends LocusWalker { * * @param refLocus the current reference locus */ - private void addNewOverlappingIntervals(GenomeLoc refLocus) { + private void addNewOverlappingIntervals(final GenomeLoc refLocus) { GenomeLoc interval = intervalListIterator.peek(); while (interval != null && !interval.isPast(refLocus)) { intervalMap.put(interval, createIntervalStatistic(interval)); @@ -239,10 +252,9 @@ public class DiagnoseTargets extends LocusWalker { * @param stats The statistics of the interval * @param refAllele the reference allele */ - private void outputStatsToVCF(IntervalStratification stats, Allele refAllele) { + private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) { GenomeLoc interval = stats.getInterval(); - final List alleles = new ArrayList(); final Map attributes = new HashMap(); final ArrayList genotypes = new ArrayList(); @@ -274,6 +286,55 @@ public class DiagnoseTargets extends LocusWalker { vcfWriter.add(vcb.make()); } + private boolean hasMissingStatuses(AbstractStratification stats) { + return !stats.callableStatuses().isEmpty(); + } + + private boolean hasMissingLoci(final IntervalStratification stats) { + return thresholds.missingTargets != null && hasMissingStatuses(stats); + } + + private void outputMissingInterval(final IntervalStratification stats) { + final GenomeLoc interval = stats.getInterval(); + final boolean missing[] = new boolean[interval.size()]; + Arrays.fill(missing, true); + for (AbstractStratification sample : stats.getElements()) { + if (hasMissingStatuses(sample)) { + int pos = 0; + for (AbstractStratification locus : sample.getElements()) { + if (locus.callableStatuses().isEmpty()) { + missing[pos] = false; + } + pos++; + } + } + } + int start = -1; + boolean insideMissing = false; + for (int i = 0; i < missing.length; i++) { + if (missing[i] && !insideMissing) { + start = interval.getStart() + i; + insideMissing = true; + } else if (!missing[i] && insideMissing) { + final int stop = interval.getStart() + i - 1; + outputMissingInterval(interval.getContig(), start, stop); + insideMissing = false; + } + } + if (insideMissing) { + outputMissingInterval(interval.getContig(), start, interval.getStop()); + } + } + + private void outputMissingInterval(final String contig, final int start, final int stop){ + final FileWriter out = thresholds.missingTargets; + try { + out.write(String.format("%s:%d-%d\n", contig, start, stop)); + } catch (IOException e) { + e.printStackTrace(); + } + } + /** * Function that process a set of statuses into strings * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 86e9d0142..3b5a23d51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -56,6 +56,7 @@ import java.util.*; final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; + private List callableStatuses; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { super(thresholds); @@ -113,7 +114,13 @@ final class IntervalStratification extends AbstractStratification { * {@inheritDoc} */ @Override - public Iterable callableStatuses() { + public List callableStatuses() { + if (callableStatuses == null) + callableStatuses = calculateStatus(); + return callableStatuses; + } + + private List calculateStatus() { final List output = new LinkedList(); // check if any of the votes pass the threshold diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java index 49aa10cf6..0f84c7d22 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java @@ -117,7 +117,7 @@ final class SampleStratification extends AbstractStratification { * {@inheritDoc} */ @Override - public Iterable callableStatuses() { + public List callableStatuses() { final List output = new LinkedList(); // get the sample statuses of all the Loci Metrics diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java index b0c999460..8c5a75148 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java @@ -47,7 +47,9 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import java.io.FileWriter; import java.util.LinkedList; import java.util.List; @@ -114,6 +116,9 @@ final class ThresHolder { @Argument(fullName = "quality_status_threshold", shortName = "stQ", doc = "The proportion of the loci needed for calling POOR_QUALITY", required = false) public double qualityStatusThreshold = 0.50; + @Output(fullName = "missing_intervals", shortName = "missing", doc ="Produces a file with the intervals that don't pass filters", required = false) + public FileWriter missingTargets = null; + public final List locusMetricList = new LinkedList(); public final List sampleMetricList = new LinkedList(); public final List intervalMetricList = new LinkedList(); From 9eceae793a249f0025d5f1c18d803121c5564d4c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 2 May 2013 13:41:25 -0400 Subject: [PATCH 3/4] Tool to manipulate intervals outside the GATK Performs basic set operations on intervals like union, intersect and difference between two or more intervals. Useful for techdev and QC purposes. --- .../diagnosetargets/DiagnoseTargets.java | 19 +++++-------------- .../diagnosetargets/ThresHolder.java | 4 ++-- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index a3ac21ae0..4bd08294b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -65,8 +65,7 @@ import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; -import java.io.FileWriter; -import java.io.IOException; +import java.io.PrintStream; import java.util.*; /** @@ -200,11 +199,7 @@ public class DiagnoseTargets extends LocusWalker { } if (thresholds.missingTargets != null) { - try { - thresholds.missingTargets.close(); - } catch (IOException e) { - e.printStackTrace(); - } + thresholds.missingTargets.close(); } } @@ -326,13 +321,9 @@ public class DiagnoseTargets extends LocusWalker { } } - private void outputMissingInterval(final String contig, final int start, final int stop){ - final FileWriter out = thresholds.missingTargets; - try { - out.write(String.format("%s:%d-%d\n", contig, start, stop)); - } catch (IOException e) { - e.printStackTrace(); - } + private void outputMissingInterval(final String contig, final int start, final int stop) { + final PrintStream out = thresholds.missingTargets; + out.println(String.format("%s:%d-%d", contig, start, stop)); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java index 8c5a75148..ebe2192b4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/ThresHolder.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import java.io.FileWriter; +import java.io.PrintStream; import java.util.LinkedList; import java.util.List; @@ -117,7 +117,7 @@ final class ThresHolder { public double qualityStatusThreshold = 0.50; @Output(fullName = "missing_intervals", shortName = "missing", doc ="Produces a file with the intervals that don't pass filters", required = false) - public FileWriter missingTargets = null; + public PrintStream missingTargets = null; public final List locusMetricList = new LinkedList(); public final List sampleMetricList = new LinkedList(); From adcbf947bfd57b1fef9305dd73ef1f0b2d020b04 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 13 May 2013 11:28:44 -0400 Subject: [PATCH 4/4] Update MD5s and the Diagnose Target scala script --- .../diagnosetargets/DiagnoseTargetsIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java index bac09f30d..52e385957 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargetsIntegrationTest.java @@ -66,11 +66,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSingleSample() { - DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "850304909477afa8c2a8f128d6eedde9"); + DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "1771e95aed2b3b240dc353f84e19847d"); } @Test(enabled = true) public void testMultiSample() { - DTTest("testMultiSample ", "-I " + multiSample, "bedd19bcf21d1a779f6706c0351c9d26"); + DTTest("testMultiSample ", "-I " + multiSample, "c7f1691dbe5f121c4a79be823d3057e5"); } }