From 839b918f58fdc31a03d59d40f161ddbe30c3d4e7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 9 Sep 2013 16:10:39 -0400 Subject: [PATCH 1/2] Length metric updates to QualifyMissingIntervals * add a length of the overlaping interval metric as per CSER request * standardized the distance metrics to be positive when fully overlapping and the longest off-target tail (as a negative number) when not overlapping * add gatkdocs to the tool (finally!) --- .../missing/QualifyMissingIntervals.java | 64 +++++++++++++++---- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index 8cc7bb8f3..9fabd6a37 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -76,10 +76,12 @@ import java.util.List; * * *

Input

@@ -89,7 +91,7 @@ import java.util.List; * *

Output

*

- * GC content calculations per interval. + * GC content, distance from the end of the target, coding sequence intersection, mapping and base quality averages and average depth per "missing" interval. *

* *

Example

@@ -108,12 +110,24 @@ import java.util.List; @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE) public final class QualifyMissingIntervals extends LocusWalker implements NanoSchedulable { + /** + * A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing. + */ @Output protected PrintStream out; + /** + * List of targets used in the experiment. This file will be used to calculate the distance your missing + * intervals are to the targets (usually exons). Typically this is your hybrid selection targets file + * (e.g. Agilent exome target list) + */ @Argument(shortName = "targets", required = true) public String targetsFile; + /** + * List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals + * that overlap the cds and intervals that don't. + */ @Argument(shortName = "cds", required = false) public String cdsFile = null; @@ -224,15 +238,18 @@ public final class QualifyMissingIntervals extends LocusWalker public void onTraversalDone(List> results) { for (Pair r : results) { - GenomeLoc interval = r.getFirst(); - Metrics metrics = r.getSecond(); + final GenomeLoc interval = r.getFirst(); + final Metrics metrics = r.getSecond(); + final List overlappingIntervals = target.getOverlapping(interval); + simpleReport.addRow( interval.toString(), metrics.gccontent(), metrics.baseQual(), metrics.mapQual(), metrics.depth(), - getPositionInTarget(interval), + getPositionInTarget(interval, overlappingIntervals), + getTargetSize(overlappingIntervals), cds.overlaps(interval), interval.size(), interpret(metrics, interval) @@ -242,13 +259,34 @@ public final class QualifyMissingIntervals extends LocusWalker out.close(); } - private int getPositionInTarget(GenomeLoc interval) { - final List hits = target.getOverlapping(interval); - int result = 0; - for (GenomeLoc hit : hits) { - result = interval.getStart() - hit.getStart(); // if there are multiple hits, we'll get the last one. + private int getPositionInTarget(final GenomeLoc interval, final List hits) { + if (hits.size() > 0) { + final GenomeLoc hit = hits.get(0); + + // interval is larger on both ends than the target -- return the maximum distance to either side as a negative number. (min of 2 negative numbers) + if (interval.getStart() < hit.getStart() && interval.getStop() > hit.getStop()) + return Math.min(interval.getStart() - hit.getStart(), + interval.getStop() - hit.getStop()); + + // interval is a left overlap -- return a negative number representing the distance between the two starts + else if (interval.getStart() < hit.getStart()) + return hit.getStart() - interval.getStart(); + + // interval is a right overlap -- return a negative number representing the distance between the two stops + else if (interval.getStop() > hit.getStop()) + return hit.getStop() - interval.getStop(); + + // interval is fully contained -- return the smallest distance to the edge of the target (left or right) as a positive number + else + return Math.min(Math.abs(hit.getStart() - interval.getStart()), + Math.abs(hit.getStop() - interval.getStop())); } - return result; + // if there is no overlapping interval, return int min value. + return Integer.MIN_VALUE; + } + + private int getTargetSize(final List overlappingIntervals) { + return overlappingIntervals.size() > 0 ? overlappingIntervals.get(0).size() : -1; } String interpret(final Metrics metrics, final GenomeLoc interval) { From 63ace685c90d4973c6cb144095db8502e8f602e9 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 10 Sep 2013 10:59:27 -0400 Subject: [PATCH 2/2] add unit tests --- .../missing/QualifyMissingIntervals.java | 23 +++++------ .../QualifyMissingIntervalsUnitTest.java | 38 ++++++++++++++++++- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index 9fabd6a37..014ed6dcb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -259,27 +259,24 @@ public final class QualifyMissingIntervals extends LocusWalker out.close(); } - private int getPositionInTarget(final GenomeLoc interval, final List hits) { - if (hits.size() > 0) { - final GenomeLoc hit = hits.get(0); + protected static int getPositionInTarget(final GenomeLoc interval, final List targets) { + if (targets.size() > 0) { + final GenomeLoc target = targets.get(0); // interval is larger on both ends than the target -- return the maximum distance to either side as a negative number. (min of 2 negative numbers) - if (interval.getStart() < hit.getStart() && interval.getStop() > hit.getStop()) - return Math.min(interval.getStart() - hit.getStart(), - interval.getStop() - hit.getStop()); + if (interval.getStart() < target.getStart() && interval.getStop() > target.getStop()) + return Math.min(target.getStart() - interval.getStart(), target.getStop() - interval.getStop()); // interval is a left overlap -- return a negative number representing the distance between the two starts - else if (interval.getStart() < hit.getStart()) - return hit.getStart() - interval.getStart(); + else if (interval.getStart() < target.getStart()) + return interval.getStart() - target.getStart(); // interval is a right overlap -- return a negative number representing the distance between the two stops - else if (interval.getStop() > hit.getStop()) - return hit.getStop() - interval.getStop(); + else if (interval.getStop() > target.getStop()) + return target.getStop() - interval.getStop(); // interval is fully contained -- return the smallest distance to the edge of the target (left or right) as a positive number - else - return Math.min(Math.abs(hit.getStart() - interval.getStart()), - Math.abs(hit.getStop() - interval.getStop())); + return Math.min(interval.getStart() - target.getStart(), target.getStop() - interval.getStop()); } // if there is no overlapping interval, return int min value. return Integer.MIN_VALUE; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java index 7d6d05736..7ab891bd0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java @@ -46,6 +46,8 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.missing; +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import java.util.List; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; @@ -57,7 +59,6 @@ import org.testng.annotations.Test; * User: carneiro * Date: 9/20/13 * Time: 3:59 PM - * To change this template use File | Settings | File Templates. */ public class QualifyMissingIntervalsUnitTest extends BaseTest { @Test(enabled = true) @@ -92,4 +93,39 @@ public class QualifyMissingIntervalsUnitTest extends BaseTest { for (Metrics m : array) Assert.assertEquals(tool.interpret(m, smallInterval), QualifyMissingIntervals.Interpretation.SMALL_INTERVAL.toString()); } + + @Test(enabled = true) + void testGetPositionInTarget () { + final UnvalidatingGenomeLoc target = new UnvalidatingGenomeLoc("a", 0, 30, 50); + final List targets = new ObjectArrayList<>(1); + targets.add(target); + + // left overlap + UnvalidatingGenomeLoc interval = new UnvalidatingGenomeLoc("a", 0, 10, 50); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -20); + + // right overlap + interval = new UnvalidatingGenomeLoc("a", 0, 40, 60); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10); + + // interval > target with short right tail + interval = new UnvalidatingGenomeLoc("a", 0, 10, 60); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10); + + // interval > target with short left tail + interval = new UnvalidatingGenomeLoc("a", 0, 10, 80); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -30); + + // interval < target with short right tail + interval = new UnvalidatingGenomeLoc("a", 0, 32, 40); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 2); + + // interval < target with short left tail + interval = new UnvalidatingGenomeLoc("a", 0, 40, 42); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 8); + + // no overlap + interval = new UnvalidatingGenomeLoc("a", 0, 40, 42); + Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, new ObjectArrayList()), Integer.MIN_VALUE); + } }