diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java
index 8cc7bb8f3..014ed6dcb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java
@@ -76,10 +76,12 @@ import java.util.List;
*
* - Average Base Quality
* - Average Mapping Quality
+ * - Average Depth
* - GC Content
- * - Position in the target
- * - Coding Sequence / Intron
- * - Length of the uncovered area
+ * - Position in the target (Integer.MIN_VALUE if no overlap)
+ * - Length of the overlapping target (zero if no overlap)
+ * - Coding Sequence / Intron (optional)
+ * - Length of the uncovered interval
*
*
* Input
@@ -89,7 +91,7 @@ import java.util.List;
*
* Output
*
- * GC content calculations per interval.
+ * GC content, distance from the end of the target, coding sequence intersection, mapping and base quality averages and average depth per "missing" interval.
*
*
* Example
@@ -108,12 +110,24 @@ import java.util.List;
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@By(DataSource.REFERENCE)
public final class QualifyMissingIntervals extends LocusWalker implements NanoSchedulable {
+ /**
+ * A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
+ */
@Output
protected PrintStream out;
+ /**
+ * List of targets used in the experiment. This file will be used to calculate the distance your missing
+ * intervals are to the targets (usually exons). Typically this is your hybrid selection targets file
+ * (e.g. Agilent exome target list)
+ */
@Argument(shortName = "targets", required = true)
public String targetsFile;
+ /**
+ * List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals
+ * that overlap the cds and intervals that don't.
+ */
@Argument(shortName = "cds", required = false)
public String cdsFile = null;
@@ -224,15 +238,18 @@ public final class QualifyMissingIntervals extends LocusWalker
public void onTraversalDone(List> results) {
for (Pair r : results) {
- GenomeLoc interval = r.getFirst();
- Metrics metrics = r.getSecond();
+ final GenomeLoc interval = r.getFirst();
+ final Metrics metrics = r.getSecond();
+ final List overlappingIntervals = target.getOverlapping(interval);
+
simpleReport.addRow(
interval.toString(),
metrics.gccontent(),
metrics.baseQual(),
metrics.mapQual(),
metrics.depth(),
- getPositionInTarget(interval),
+ getPositionInTarget(interval, overlappingIntervals),
+ getTargetSize(overlappingIntervals),
cds.overlaps(interval),
interval.size(),
interpret(metrics, interval)
@@ -242,13 +259,31 @@ public final class QualifyMissingIntervals extends LocusWalker
out.close();
}
- private int getPositionInTarget(GenomeLoc interval) {
- final List hits = target.getOverlapping(interval);
- int result = 0;
- for (GenomeLoc hit : hits) {
- result = interval.getStart() - hit.getStart(); // if there are multiple hits, we'll get the last one.
+ protected static int getPositionInTarget(final GenomeLoc interval, final List targets) {
+ if (targets.size() > 0) {
+ final GenomeLoc target = targets.get(0);
+
+ // interval is larger on both ends than the target -- return the maximum distance to either side as a negative number. (min of 2 negative numbers)
+ if (interval.getStart() < target.getStart() && interval.getStop() > target.getStop())
+ return Math.min(target.getStart() - interval.getStart(), target.getStop() - interval.getStop());
+
+ // interval is a left overlap -- return a negative number representing the distance between the two starts
+ else if (interval.getStart() < target.getStart())
+ return interval.getStart() - target.getStart();
+
+ // interval is a right overlap -- return a negative number representing the distance between the two stops
+ else if (interval.getStop() > target.getStop())
+ return target.getStop() - interval.getStop();
+
+ // interval is fully contained -- return the smallest distance to the edge of the target (left or right) as a positive number
+ return Math.min(interval.getStart() - target.getStart(), target.getStop() - interval.getStop());
}
- return result;
+ // if there is no overlapping interval, return int min value.
+ return Integer.MIN_VALUE;
+ }
+
+ private int getTargetSize(final List overlappingIntervals) {
+ return overlappingIntervals.size() > 0 ? overlappingIntervals.get(0).size() : -1;
}
String interpret(final Metrics metrics, final GenomeLoc interval) {
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java
index 7d6d05736..7ab891bd0 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervalsUnitTest.java
@@ -46,6 +46,8 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
+import it.unimi.dsi.fastutil.objects.ObjectArrayList;
+import java.util.List;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
@@ -57,7 +59,6 @@ import org.testng.annotations.Test;
* User: carneiro
* Date: 9/20/13
* Time: 3:59 PM
- * To change this template use File | Settings | File Templates.
*/
public class QualifyMissingIntervalsUnitTest extends BaseTest {
@Test(enabled = true)
@@ -92,4 +93,39 @@ public class QualifyMissingIntervalsUnitTest extends BaseTest {
for (Metrics m : array)
Assert.assertEquals(tool.interpret(m, smallInterval), QualifyMissingIntervals.Interpretation.SMALL_INTERVAL.toString());
}
+
+ @Test(enabled = true)
+ void testGetPositionInTarget () {
+ final UnvalidatingGenomeLoc target = new UnvalidatingGenomeLoc("a", 0, 30, 50);
+ final List targets = new ObjectArrayList<>(1);
+ targets.add(target);
+
+ // left overlap
+ UnvalidatingGenomeLoc interval = new UnvalidatingGenomeLoc("a", 0, 10, 50);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -20);
+
+ // right overlap
+ interval = new UnvalidatingGenomeLoc("a", 0, 40, 60);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10);
+
+ // interval > target with short right tail
+ interval = new UnvalidatingGenomeLoc("a", 0, 10, 60);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10);
+
+ // interval > target with short left tail
+ interval = new UnvalidatingGenomeLoc("a", 0, 10, 80);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -30);
+
+ // interval < target with short right tail
+ interval = new UnvalidatingGenomeLoc("a", 0, 32, 40);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 2);
+
+ // interval < target with short left tail
+ interval = new UnvalidatingGenomeLoc("a", 0, 40, 42);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 8);
+
+ // no overlap
+ interval = new UnvalidatingGenomeLoc("a", 0, 40, 42);
+ Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, new ObjectArrayList()), Integer.MIN_VALUE);
+ }
}