From e5106c9924ef4190e72b06b4adb75f0aa79f219a Mon Sep 17 00:00:00 2001 From: andrewk Date: Tue, 24 Nov 2009 18:07:23 +0000 Subject: [PATCH] Hybrid selection performance statistics now include counts of the number of adjacent baits (0,1,2) using OverlapDetector and optionally include assayed bait quantities input via interval lists. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2143 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/HybSelPerformanceWalker.java | 48 ++++++++++++++----- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java index b3a9b67e8..522cfcb17 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java @@ -5,13 +5,13 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; - + import java.util.List; import java.util.Collection; import java.io.IOException; @@ -41,6 +41,9 @@ public class HybSelPerformanceWalker extends LocusWalker= 2) { sum.hitTwice = true; sum.positionsOver2x++;} - if (value >= 10) { sum.positionsOver10x++; } - if (value >= 20) { sum.positionsOver20x++; } - if (value >= 30) { sum.positionsOver30x++; } + public TargetInfo reduce(Integer depth, TargetInfo sum) { + sum.counts += depth; + if (depth >= 2) { sum.hitTwice = true; sum.positionsOver2x++;} + if (depth >= 10) { sum.positionsOver10x++; } + if (depth >= 20) { sum.positionsOver20x++; } + if (depth >= 30) { sum.positionsOver30x++; } return sum; } @@ -125,7 +128,7 @@ public class HybSelPerformanceWalker extends LocusWalker> results) { - out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted\tbases_over_2x\tbases_over_10x\tbases_over_20x\tbases_over_30x\tgene_name"); + out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted\tbases_over_2x\tbases_over_10x\tbases_over_20x\tbases_over_30x\tgene_name\tbait_quantity\tadjacent_baits"); // first zip through and build an overlap detector of all the intervals, so later // we can calculate if this interval is free-standing @@ -143,8 +146,22 @@ public class HybSelPerformanceWalker extends LocusWalker bait_quant = new OverlapDetector(0,0); + if (BAIT_QUANT_FILE != null) { + IntervalList il = IntervalList.fromFile(BAIT_QUANT_FILE); + List l = il.getIntervals(); + bait_quant.addAll(l, l); + } - + // Create a detector of adjacent baits + OverlapDetector adjacent_bait_detector = new OverlapDetector(-1,-1); + for(Pair pair : results) { + GenomeLoc target = pair.getFirst(); + Interval interval = makeInterval(target); + adjacent_bait_detector.addLhs(interval, interval); + } // now zip through and calculate the total average coverage long totalCoverage = 0; @@ -185,16 +202,21 @@ public class HybSelPerformanceWalker extends LocusWalker bait_quant_hits = bait_quant.getOverlaps(targetInterval); + String bait_quant_string = (bait_quant_hits.size() == 1) ? bait_quant_hits.iterator().next().getName() : "0"; + if (bait_quant_hits.size() <= 1) { out.printf("Warning: multiple bait quantity intervals detected; perhaps bait quantity interval lengths don't match primary interval list specified with -L"); } + int adjacent_baits = adjacent_bait_detector.getOverlaps(targetInterval).size() - 1; - - out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n", + out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%d\n", target.getContig(), target.getStart(), target.getStop(), length, gc, avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0), ti.positionsOver2x, ti.positionsOver10x, ti.positionsOver20x, ti.positionsOver30x, - geneName + geneName, + bait_quant_string, + adjacent_baits );