From bdf5945dcca003bcdc4becdd48816ef354b42f97 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Tue, 12 Jun 2012 16:49:52 -0400 Subject: [PATCH] Fixed bugs in DiagnoseTargets DT would not report bad mates! that has been fixed Signed-off-by: Mauricio Carneiro --- .../diagnostics/targets/DiagnoseTargets.java | 13 +++++---- .../diagnostics/targets/SampleStatistics.java | 27 ++++++++++++++----- .../diagnostics/targets/ThresHolder.java | 4 +-- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index 62477a87c..d35a72602 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -250,6 +250,7 @@ public class DiagnoseTargets extends LocusWalker { private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) { GenomeLoc interval = stats.getInterval(); + List alleles = new ArrayList(); Map attributes = new HashMap(); ArrayList genotypes = new ArrayList(); @@ -265,7 +266,9 @@ public class DiagnoseTargets extends LocusWalker { attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage()); vcb = vcb.attributes(attributes); - + if (debug) { + System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage()); + } for (String sample : samples) { Map infos = new HashMap(); SampleStatistics sampleStat = stats.getSample(sample); @@ -277,14 +280,14 @@ public class DiagnoseTargets extends LocusWalker { Set filters = new HashSet(); filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds))); + if (debug) { + System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads()); + } genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false)); } vcb = vcb.genotypes(genotypes); - if (debug) { - System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage()); - } vcfWriter.add(vcb.make()); @@ -306,6 +309,6 @@ public class DiagnoseTargets extends LocusWalker { } private IntervalStatistics createIntervalStatistic(GenomeLoc interval) { - return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/); + return new IntervalStatistics(samples, interval); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java index 25dda1da5..ca6070630 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java @@ -104,19 +104,19 @@ class SampleStatistics { double intervalSize = interval.size(); - if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold()) + if (((double) nBadMates / nReads) >= thresholds.getBadMateStatusThreshold()) output.add(CallableStatus.BAD_MATE); - if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold()) + if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) >= thresholds.getCoverageStatusThreshold()) output.add(CallableStatus.COVERAGE_GAPS); - if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold()) + if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) >= thresholds.getCoverageStatusThreshold()) output.add(CallableStatus.LOW_COVERAGE); - if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold()) + if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) >= thresholds.getExcessiveCoverageThreshold()) output.add(CallableStatus.EXCESSIVE_COVERAGE); - if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold()) + if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) >= thresholds.getQualityStatusThreshold()) output.add(CallableStatus.POOR_QUALITY); if (totals.get(CallableStatus.REF_N) > 0) @@ -161,7 +161,7 @@ class SampleStatistics { // Was this read already processed? if (read.getTemporaryAttribute("checkedBadMate") == null) { nReads++; - if (hasValidMate(read, thresholds)) + if (!hasValidMate(read, thresholds)) nBadMates++; read.setTemporaryAttribute("checkedBadMate", true); } @@ -254,7 +254,7 @@ class SampleStatistics { * reasonable insert size? * inverted? * same orientation? - * todo - same contig? + * same contig? * is pair mapped? * todo - is forced mate? * @@ -264,6 +264,10 @@ class SampleStatistics { if (!read.getReadPairedFlag()) return false; + // different contigs + if (read.getMateReferenceIndex() != read.getReferenceIndex()) + return false; + // unmapped if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag()) return false; @@ -277,10 +281,19 @@ class SampleStatistics { read.getAlignmentStart() < read.getMateAlignmentStart()) return false; + // TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you // mates are too far apart if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize()) return false; return true; } + + public int getnReads() { + return nReads; + } + + public int getnBadMates() { + return nBadMates; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java index 20d1dee62..ae4296449 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java @@ -129,12 +129,12 @@ class ThresHolder { // INFO fields for overall data headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); - headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); + headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size.")); headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); // FORMAT fields for each genotype // todo -- find the appropriate VCF constants - headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size.")); + headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size.")); headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution.")); headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution.")); headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));