diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java index dca83af44..8b7f3dbf2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/AbstractStratification.java @@ -63,6 +63,10 @@ abstract class AbstractStratification { private Map statusTally = null; protected ThresHolder thresholds; + public AbstractStratification(ThresHolder thresholds) { + this.thresholds = thresholds; + } + /** * Calculates the average "good" coverage of this sample. Good means "passes the base and * mapping quality requirements. @@ -120,7 +124,7 @@ abstract class AbstractStratification { /** - * Tally up all the callable status of all the loci in this sample. + * Tally up all the callable status of all elements of the stratification. * * @return a map of callable status and counts */ @@ -136,10 +140,10 @@ abstract class AbstractStratification { return statusTally; } - public static List queryStatus(List statList, AbstractStratification stratification) { + public List queryStatus(List statList) { List output = new LinkedList(); for (Metric stat : statList) { - final CallableStatus status = stat.status(stratification); + final CallableStatus status = stat.status(this); if (status != null) { output.add(status); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java index 32f87b973..32d866b0a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/DiagnoseTargets.java @@ -112,6 +112,9 @@ import java.util.*; public class DiagnoseTargets extends LocusWalker { private static final String AVG_INTERVAL_DP_KEY = "IDP"; + private static final String LOW_COVERAGE_LOCI = "LL"; + private static final String ZERO_COVERAGE_LOCI = "ZL"; + @Output(doc = "File to which interval statistics should be written") private VariantContextWriter vcfWriter = null; @@ -134,7 +137,7 @@ public class DiagnoseTargets extends LocusWalker { if (getToolkit().getIntervals() == null || getToolkit().getIntervals().isEmpty()) throw new UserException("This tool only works if you provide one or more intervals (use the -L argument). If you want to run whole genome, use -T DepthOfCoverage instead."); - intervalMap = new HashMap(INITIAL_HASH_SIZE); + intervalMap = new LinkedHashMap(INITIAL_HASH_SIZE); intervalListIterator = new PeekableIterator(getToolkit().getIntervals().iterator()); // get all of the unique sample names for the VCF Header @@ -151,8 +154,8 @@ public class DiagnoseTargets extends LocusWalker { // process and remove any intervals in the map that are don't overlap the current locus anymore // and add all new intervals that may overlap this reference locus - outputFinishedIntervals(refLocus, ref.getBase()); addNewOverlappingIntervals(refLocus); + outputFinishedIntervals(refLocus, ref.getBase()); // at this point, all intervals in intervalMap overlap with this locus, so update all of them for (IntervalStratification intervalStratification : intervalMap.values()) @@ -203,24 +206,17 @@ public class DiagnoseTargets extends LocusWalker { * @param refBase the reference allele */ private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) { - GenomeLoc interval = intervalListIterator.peek(); - - // output empty statistics for uncovered intervals - while (interval != null && interval.isBefore(refLocus)) { - final IntervalStratification stats = intervalMap.get(interval); - outputStatsToVCF(stats != null ? stats : createIntervalStatistic(interval), UNCOVERED_ALLELE); - if (stats != null) intervalMap.remove(interval); - intervalListIterator.next(); - interval = intervalListIterator.peek(); - } - - // remove any potential leftover interval in intervalMap (this will only happen when we have overlapping intervals) + // output any intervals that were finished + final List toRemove = new LinkedList(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { outputStatsToVCF(intervalMap.get(key), Allele.create(refBase, true)); - intervalMap.remove(key); + toRemove.add(key); } } + for (GenomeLoc key : toRemove) { + intervalMap.remove(key); + } } /** @@ -247,10 +243,21 @@ public class DiagnoseTargets extends LocusWalker { GenomeLoc interval = stats.getInterval(); - List alleles = new ArrayList(); - Map attributes = new HashMap(); - ArrayList genotypes = new ArrayList(); + final List alleles = new ArrayList(); + final Map attributes = new HashMap(); + final ArrayList genotypes = new ArrayList(); + for (String sample : samples) { + final GenotypeBuilder gb = new GenotypeBuilder(sample); + + SampleStratification sampleStat = stats.getSampleStatistics(sample); + gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); + gb.attribute(LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci()); + gb.attribute(ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci()); + gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); + + genotypes.add(gb.make()); + } alleles.add(refAllele); alleles.add(SYMBOLIC_ALLELE); VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles); @@ -262,16 +269,6 @@ public class DiagnoseTargets extends LocusWalker { attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); vcb = vcb.attributes(attributes); - for (String sample : samples) { - final GenotypeBuilder gb = new GenotypeBuilder(sample); - - SampleStratification sampleStat = stats.getSampleStatistics(sample); - gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size())); - - gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); - - genotypes.add(gb.make()); - } vcb = vcb.genotypes(genotypes); vcfWriter.add(vcb.make()); @@ -345,6 +342,8 @@ public class DiagnoseTargets extends LocusWalker { // FORMAT fields for each genotype headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size.")); + headerLines.add(new VCFFormatHeaderLine(LOW_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with low coverage (below the minimum coverage) but not zero.")); + headerLines.add(new VCFFormatHeaderLine(ZERO_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with zero coverage.")); // FILTER fields for (CallableStatus stat : CallableStatus.values()) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java index 6c20403d1..86e9d0142 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/IntervalStratification.java @@ -56,11 +56,10 @@ import java.util.*; final class IntervalStratification extends AbstractStratification { private final Map samples; private final GenomeLoc interval; - private final ThresHolder thresholds; public IntervalStratification(Set samples, GenomeLoc interval, ThresHolder thresholds) { + super(thresholds); this.interval = interval; - this.thresholds = thresholds; this.samples = new HashMap(samples.size()); for (String sample : samples) this.samples.put(sample, new SampleStratification(interval, thresholds)); @@ -125,7 +124,7 @@ final class IntervalStratification extends AbstractStratification { } } - output.addAll(queryStatus(thresholds.intervalMetricList, this)); + output.addAll(queryStatus(thresholds.intervalMetricList)); return output; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java index d6acaf850..5902fce31 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/LocusStratification.java @@ -46,22 +46,20 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.diagnosetargets; -import java.util.LinkedList; import java.util.List; final class LocusStratification extends AbstractStratification { private long coverage; private long rawCoverage; - private final List locusStatisticsList; public LocusStratification(ThresHolder thresholds) { this(0,0,thresholds); } protected LocusStratification(int coverage, int rawCoverage, ThresHolder thresholds) { + super(thresholds); this.coverage = coverage; this.rawCoverage = rawCoverage; - this.locusStatisticsList = thresholds.locusMetricList; } @Override @@ -79,14 +77,7 @@ final class LocusStratification extends AbstractStratification { * @return a set of all statuses that apply */ public List callableStatuses() { - List output = new LinkedList(); - for (Metric stats : locusStatisticsList) { - CallableStatus status = stats.status(this); - if (status != null) { - output.add(status); - } - } - return output; + return queryStatus(thresholds.locusMetricList); } @Override diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java index 1085e8cac..7984ba7e7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/PluginUtils.java @@ -58,6 +58,6 @@ final class PluginUtils { final Map totals = sampleStratification.getStatusTally(); final int size = sampleStratification.getIntervalSize(); final int statusCount = totals.containsKey(CALL) ? totals.get(CALL) : 0; - return ( (double) statusCount / size) >= threshold ? CALL: null; + return ( (double) statusCount / size) > threshold ? CALL: null; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java index b9ae1f3cf..49aa10cf6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/diagnosetargets/SampleStratification.java @@ -61,15 +61,14 @@ import java.util.List; final class SampleStratification extends AbstractStratification { private final GenomeLoc interval; private final ArrayList loci; - private final ThresHolder thresholds; private int nReads = -1; private int nBadMates = -1; public SampleStratification(final GenomeLoc interval, final ThresHolder thresholds) { + super(thresholds); this.interval = interval; this.loci = new ArrayList(interval.size()); - this.thresholds = thresholds; nReads = 0; nBadMates = 0; @@ -121,7 +120,7 @@ final class SampleStratification extends AbstractStratification { public Iterable callableStatuses() { final List output = new LinkedList(); - // get the tally of all the locus callable statuses + // get the sample statuses of all the Loci Metrics for (Metric locusStat : thresholds.locusMetricList) { final CallableStatus status = ((LocusMetric) locusStat).sampleStatus(this); if (status != null) { @@ -130,12 +129,7 @@ final class SampleStratification extends AbstractStratification { } // get the sample specific statitics statuses - for (Metric sampleStat : thresholds.sampleMetricList) { - final CallableStatus status = sampleStat.status(this); - if (status != null) { - output.add(status); - } - } + output.addAll(queryStatus(thresholds.sampleMetricList)); // special case, if there are no reads, then there is no sense reporting coverage gaps. if (output.contains(CallableStatus.NO_READS) && output.contains(CallableStatus.COVERAGE_GAPS)) @@ -159,4 +153,17 @@ final class SampleStratification extends AbstractStratification { read.setTemporaryAttribute("seen", true); } } + + public int getNLowCoveredLoci() { + return getCallableStatusCount(CallableStatus.LOW_COVERAGE); + } + + public int getNUncoveredLoci() { + return getCallableStatusCount(CallableStatus.COVERAGE_GAPS); + } + + private int getCallableStatusCount(CallableStatus status) { + final Integer x = getStatusTally().get(status); + return x == null ? 0 : x; + } }