Merge pull request #393 from broadinstitute/mc_qualify_updates_for_cser
Length metric updates to QualifyMissingIntervals
This commit is contained in:
commit
49d489cbf8
|
|
@ -76,10 +76,12 @@ import java.util.List;
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Average Base Quality</li>
|
* <li>Average Base Quality</li>
|
||||||
* <li>Average Mapping Quality</li>
|
* <li>Average Mapping Quality</li>
|
||||||
|
* <li>Average Depth</li>
|
||||||
* <li>GC Content</li>
|
* <li>GC Content</li>
|
||||||
* <li>Position in the target</li>
|
* <li>Position in the target (Integer.MIN_VALUE if no overlap)</li>
|
||||||
* <li>Coding Sequence / Intron</li>
|
* <li>Length of the overlapping target (zero if no overlap)</li>
|
||||||
* <li>Length of the uncovered area</li>
|
* <li>Coding Sequence / Intron (optional)</li>
|
||||||
|
* <li>Length of the uncovered interval</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <h3>Input</h3>
|
* <h3>Input</h3>
|
||||||
|
|
@ -89,7 +91,7 @@ import java.util.List;
|
||||||
*
|
*
|
||||||
* <h3>Output</h3>
|
* <h3>Output</h3>
|
||||||
* <p>
|
* <p>
|
||||||
* GC content calculations per interval.
|
* GC content, distance from the end of the target, coding sequence intersection, mapping and base quality averages and average depth per "missing" interval.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h3>Example</h3>
|
* <h3>Example</h3>
|
||||||
|
|
@ -108,12 +110,24 @@ import java.util.List;
|
||||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
|
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
|
||||||
|
/**
|
||||||
|
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
|
||||||
|
*/
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of targets used in the experiment. This file will be used to calculate the distance your missing
|
||||||
|
* intervals are to the targets (usually exons). Typically this is your hybrid selection targets file
|
||||||
|
* (e.g. Agilent exome target list)
|
||||||
|
*/
|
||||||
@Argument(shortName = "targets", required = true)
|
@Argument(shortName = "targets", required = true)
|
||||||
public String targetsFile;
|
public String targetsFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals
|
||||||
|
* that overlap the cds and intervals that don't.
|
||||||
|
*/
|
||||||
@Argument(shortName = "cds", required = false)
|
@Argument(shortName = "cds", required = false)
|
||||||
public String cdsFile = null;
|
public String cdsFile = null;
|
||||||
|
|
||||||
|
|
@ -224,15 +238,18 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
|
|
||||||
public void onTraversalDone(List<Pair<GenomeLoc, Metrics>> results) {
|
public void onTraversalDone(List<Pair<GenomeLoc, Metrics>> results) {
|
||||||
for (Pair<GenomeLoc, Metrics> r : results) {
|
for (Pair<GenomeLoc, Metrics> r : results) {
|
||||||
GenomeLoc interval = r.getFirst();
|
final GenomeLoc interval = r.getFirst();
|
||||||
Metrics metrics = r.getSecond();
|
final Metrics metrics = r.getSecond();
|
||||||
|
final List<GenomeLoc> overlappingIntervals = target.getOverlapping(interval);
|
||||||
|
|
||||||
simpleReport.addRow(
|
simpleReport.addRow(
|
||||||
interval.toString(),
|
interval.toString(),
|
||||||
metrics.gccontent(),
|
metrics.gccontent(),
|
||||||
metrics.baseQual(),
|
metrics.baseQual(),
|
||||||
metrics.mapQual(),
|
metrics.mapQual(),
|
||||||
metrics.depth(),
|
metrics.depth(),
|
||||||
getPositionInTarget(interval),
|
getPositionInTarget(interval, overlappingIntervals),
|
||||||
|
getTargetSize(overlappingIntervals),
|
||||||
cds.overlaps(interval),
|
cds.overlaps(interval),
|
||||||
interval.size(),
|
interval.size(),
|
||||||
interpret(metrics, interval)
|
interpret(metrics, interval)
|
||||||
|
|
@ -242,13 +259,31 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getPositionInTarget(GenomeLoc interval) {
|
protected static int getPositionInTarget(final GenomeLoc interval, final List<GenomeLoc> targets) {
|
||||||
final List<GenomeLoc> hits = target.getOverlapping(interval);
|
if (targets.size() > 0) {
|
||||||
int result = 0;
|
final GenomeLoc target = targets.get(0);
|
||||||
for (GenomeLoc hit : hits) {
|
|
||||||
result = interval.getStart() - hit.getStart(); // if there are multiple hits, we'll get the last one.
|
// interval is larger on both ends than the target -- return the maximum distance to either side as a negative number. (min of 2 negative numbers)
|
||||||
|
if (interval.getStart() < target.getStart() && interval.getStop() > target.getStop())
|
||||||
|
return Math.min(target.getStart() - interval.getStart(), target.getStop() - interval.getStop());
|
||||||
|
|
||||||
|
// interval is a left overlap -- return a negative number representing the distance between the two starts
|
||||||
|
else if (interval.getStart() < target.getStart())
|
||||||
|
return interval.getStart() - target.getStart();
|
||||||
|
|
||||||
|
// interval is a right overlap -- return a negative number representing the distance between the two stops
|
||||||
|
else if (interval.getStop() > target.getStop())
|
||||||
|
return target.getStop() - interval.getStop();
|
||||||
|
|
||||||
|
// interval is fully contained -- return the smallest distance to the edge of the target (left or right) as a positive number
|
||||||
|
return Math.min(interval.getStart() - target.getStart(), target.getStop() - interval.getStop());
|
||||||
}
|
}
|
||||||
return result;
|
// if there is no overlapping interval, return int min value.
|
||||||
|
return Integer.MIN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getTargetSize(final List<GenomeLoc> overlappingIntervals) {
|
||||||
|
return overlappingIntervals.size() > 0 ? overlappingIntervals.get(0).size() : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
String interpret(final Metrics metrics, final GenomeLoc interval) {
|
String interpret(final Metrics metrics, final GenomeLoc interval) {
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
|
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
|
||||||
|
|
||||||
|
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||||
|
import java.util.List;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||||
|
|
@ -57,7 +59,6 @@ import org.testng.annotations.Test;
|
||||||
* User: carneiro
|
* User: carneiro
|
||||||
* Date: 9/20/13
|
* Date: 9/20/13
|
||||||
* Time: 3:59 PM
|
* Time: 3:59 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class QualifyMissingIntervalsUnitTest extends BaseTest {
|
public class QualifyMissingIntervalsUnitTest extends BaseTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -92,4 +93,39 @@ public class QualifyMissingIntervalsUnitTest extends BaseTest {
|
||||||
for (Metrics m : array)
|
for (Metrics m : array)
|
||||||
Assert.assertEquals(tool.interpret(m, smallInterval), QualifyMissingIntervals.Interpretation.SMALL_INTERVAL.toString());
|
Assert.assertEquals(tool.interpret(m, smallInterval), QualifyMissingIntervals.Interpretation.SMALL_INTERVAL.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
void testGetPositionInTarget () {
|
||||||
|
final UnvalidatingGenomeLoc target = new UnvalidatingGenomeLoc("a", 0, 30, 50);
|
||||||
|
final List<GenomeLoc> targets = new ObjectArrayList<>(1);
|
||||||
|
targets.add(target);
|
||||||
|
|
||||||
|
// left overlap
|
||||||
|
UnvalidatingGenomeLoc interval = new UnvalidatingGenomeLoc("a", 0, 10, 50);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -20);
|
||||||
|
|
||||||
|
// right overlap
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 40, 60);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10);
|
||||||
|
|
||||||
|
// interval > target with short right tail
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 10, 60);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -10);
|
||||||
|
|
||||||
|
// interval > target with short left tail
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 10, 80);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), -30);
|
||||||
|
|
||||||
|
// interval < target with short right tail
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 32, 40);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 2);
|
||||||
|
|
||||||
|
// interval < target with short left tail
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 40, 42);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, targets), 8);
|
||||||
|
|
||||||
|
// no overlap
|
||||||
|
interval = new UnvalidatingGenomeLoc("a", 0, 40, 42);
|
||||||
|
Assert.assertEquals(QualifyMissingIntervals.getPositionInTarget(interval, new ObjectArrayList<GenomeLoc>()), Integer.MIN_VALUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue