From 6eb87bf58a60e65abf89f7096ecbe06bef790ee4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 6 Oct 2011 15:57:49 -0400 Subject: [PATCH] RTC now caches all intervals as GenomeLocs (which is expected to take < 1Gb whole genome based on back of the envelope calculations with Matt) so that 1) we don't have to worry about emitting outside of the leaves in the hierarchical reductions and 2) we can emit the intervals in sorted order which is a big performance plus for the realigner. Integration tests change only because intervals whose start=stop are now printed as chr:start instead of chr:start-stop. --- .../indels/RealignerTargetCreator.java | 60 ++++++++++--------- ...RealignerTargetCreatorIntegrationTest.java | 4 +- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index e83667e8c..e1333b7f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -50,6 +50,7 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.TreeSet; /** * Emits intervals for the Local Indel Realigner to target for realignment. @@ -253,9 +254,12 @@ public class RealignerTargetCreator extends RodWalker 1 ) + else sum.right = value; - else { - if ( sum.left.isReportableEvent() ) - out.println(sum.left.toString()); - sum.left = value; - } } else { if ( canBeMerged(sum.right, value) ) sum.right = mergeEvents(sum.right, value); else { if ( sum.right.isReportableEvent() ) - out.println(sum.right.toString()); + sum.intervals.add(sum.right.getLoc()); sum.right = value; } } @@ -355,18 +351,26 @@ public class RealignerTargetCreator extends RodWalker intervals = new TreeSet(); public EventPair(Event left, Event right) { this.left = left; this.right = right; } + + public EventPair(Event left, Event right, TreeSet set1, TreeSet set2) { + this.left = left; + this.right = right; + intervals.addAll(set1); + intervals.addAll(set2); + } } class Event { public int furthestStopPos; - public GenomeLoc loc; - public int eventStartPos; + private GenomeLoc loc; + private int eventStartPos; private int eventStopPos; private EVENT_TYPE type; private ArrayList pointEvents = new ArrayList(); @@ -421,8 +425,8 @@ public class RealignerTargetCreator extends RodWalker= 0 && eventStopPos - eventStartPos < maxIntervalSize; } - public String toString() { - return String.format("%s:%d-%d", loc.getContig(), eventStartPos, eventStopPos); + public GenomeLoc getLoc() { + return getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), eventStartPos, eventStopPos); } } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index 1873ccbe2..fdb310428 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -13,13 +13,13 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s", 1, - Arrays.asList("e7accfa58415d6da80383953b1a3a986")); + Arrays.asList("3f0b63a393104d0c4158c7d1538153b8")); executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, - Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); + Arrays.asList("5085054c78e256432dc75c85a9ac631c")); executeTest("test dbsnp", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(