From dde9fd8a1569a2ad8f6329118fddca0732aaa554 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 10 Mar 2010 22:54:58 +0000 Subject: [PATCH] some rods-for-reads cleaning and performance improvements. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2979 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 6 +-- .../ManagingReferenceOrderedView.java | 9 ++-- .../ReadBasedReferenceOrderedView.java | 2 +- .../datasources/providers/RodLocusView.java | 4 +- .../ReferenceOrderedDataSource.java | 41 +++++++++++-------- .../sting/utils/MergingIterator.java | 10 ++--- .../providers/ReferenceOrderedViewTest.java | 6 +-- .../ReferenceOrderedDataPoolTest.java | 19 +++++---- 8 files changed, 51 insertions(+), 46 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 473bd6eb3..4cb029a46 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -266,7 +266,7 @@ public class GenomeAnalysisEngine { validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, rods); - rodDataSources = getReferenceOrderedDataSources(rods); + rodDataSources = getReferenceOrderedDataSources(my_walker, rods); } /** @@ -744,10 +744,10 @@ public class GenomeAnalysisEngine { * @param rods the reference order data to execute using * @return A list of reference-ordered data sources. */ - private List getReferenceOrderedDataSources(List> rods) { + private List getReferenceOrderedDataSources(Walker walker, List> rods) { List dataSources = new ArrayList(); for (ReferenceOrderedData rod : rods) - dataSources.add(new ReferenceOrderedDataSource(rod)); + dataSources.add(new ReferenceOrderedDataSource(walker, rod)); return dataSources; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index fa226f152..af723bfc4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -2,8 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; @@ -38,7 +37,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { */ public ManagingReferenceOrderedView( ShardDataProvider provider ) { for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) - states.add( new ReferenceOrderedDataState( dataSource, ((FlashBackIterator)dataSource.seek(provider.getShard()) )) ); + states.add( new ReferenceOrderedDataState( dataSource, (dataSource.seek(provider.getShard()) )) ); provider.register(this); } @@ -74,9 +73,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { */ class ReferenceOrderedDataState { public final ReferenceOrderedDataSource dataSource; - public final FlashBackIterator iterator; + public final LocationAwareSeekableRODIterator iterator; - public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, FlashBackIterator iterator ) { + public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator ) { this.dataSource = dataSource; this.iterator = iterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index c4c9ef38b..f99ef3a3c 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -102,7 +102,7 @@ class WindowedData { states = new ArrayList(); if (provider != null && provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) - states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))))); + states.add(new RMDDataState(dataSource, (LocationAwareSeekableRODIterator)dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index cf129d074..0205fed4f 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MergingIterator; @@ -68,7 +68,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { // grab the ROD iterator from the data source, and compute the first location in this shard, forwarding // the iterator to immediately before it, so that it can be added to the merging iterator primed for // next() to return the first real ROD in this shard - FlashBackIterator it = (FlashBackIterator)dataSource.seek(provider.getShard()); + LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard()); it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); states.add(new ReferenceOrderedDataState(dataSource,it)); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index f23e59dac..bfd8482dd 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -1,9 +1,14 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -40,9 +45,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Create a new reference-ordered data source. * @param rod */ - public ReferenceOrderedDataSource( ReferenceOrderedData rod) { + public ReferenceOrderedDataSource( Walker walker, ReferenceOrderedData rod) { this.rod = rod; - this.iteratorPool = new ReferenceOrderedDataPool( rod ); + this.iteratorPool = new ReferenceOrderedDataPool( walker, rod ); } /** @@ -66,9 +71,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @param shard Shard that points to the selected position. * @return Iterator through the data. */ - public Iterator seek( Shard shard ) { + public LocationAwareSeekableRODIterator seek( Shard shard ) { DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); - FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); + LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; } @@ -81,7 +86,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { */ public Iterator seek(GenomeLoc loc) { DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc); - FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); + LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; } @@ -90,7 +95,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Close the specified iterator, returning it to the pool. * @param iterator Iterator to close. */ - public void close( FlashBackIterator iterator ) { + public void close( LocationAwareSeekableRODIterator iterator ) { this.iteratorPool.release(iterator); } @@ -99,9 +104,11 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * A pool of reference-ordered data iterators. */ -class ReferenceOrderedDataPool extends ResourcePool { +class ReferenceOrderedDataPool extends ResourcePool { private final ReferenceOrderedData rod; - public ReferenceOrderedDataPool( ReferenceOrderedData rod ) { + boolean flashbackData = false; + public ReferenceOrderedDataPool( Walker walker, ReferenceOrderedData rod ) { + if (walker instanceof ReadWalker) flashbackData = true; // && (rod.getType() != IntervalRod.class) this.rod = rod; } @@ -110,8 +117,8 @@ class ReferenceOrderedDataPool extends ResourcePool resources ) { + public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { if(segment instanceof MappedStreamSegment) { GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); - for( FlashBackIterator RODIterator : resources ) { + for( LocationAwareSeekableRODIterator RODIterator : resources ) { if( (RODIterator.position() == null && RODIterator.hasNext()) || (RODIterator.position() != null && RODIterator.position().isBefore(position)) ) return RODIterator; - if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) { - RODIterator.flashBackTo(position); + if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) { + ((FlashBackIterator)RODIterator).flashBackTo(position); return RODIterator; } @@ -151,15 +158,15 @@ class ReferenceOrderedDataPool extends ResourcePool, Iterable queue = new PriorityQueue(); private class Element implements Comparable { - public FlashBackIterator it = null; - //public E value = null; + public LocationAwareSeekableRODIterator it = null; public GenomeLoc nextLoc = null; public Element(Iterator it) { - if ( it instanceof FlashBackIterator) { - this.it = (FlashBackIterator)it; + if ( it instanceof LocationAwareSeekableRODIterator) { + this.it = (LocationAwareSeekableRODIterator)it; if ( ! it.hasNext() ) throw new StingException("Iterator is empty"); update(); } else { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java index 931f23457..858cff690 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewTest.java @@ -67,7 +67,7 @@ public class ReferenceOrderedViewTest extends BaseTest { public void testSingleBinding() { File file = new File(testDir + "TabularDataTest.dat"); ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null,rod); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); @@ -90,9 +90,9 @@ public class ReferenceOrderedViewTest extends BaseTest { File file = new File(testDir + "TabularDataTest.dat"); ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(rod1); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,rod1); ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,rod2); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java index f5f2fe01e..d40782f0d 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -55,8 +56,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); - FlashBackIterator iterator = (FlashBackIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(null,rod); + LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -76,11 +77,11 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - FlashBackIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod); + LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. - FlashBackIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); + LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -126,8 +127,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod); + LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); @@ -161,8 +162,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest { @Test public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); - FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod); + LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());