some rods-for-reads cleaning and performance improvements.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-03-10 22:54:58 +00:00
parent 4f4555c80f
commit dde9fd8a15
8 changed files with 51 additions and 46 deletions

View File

@ -266,7 +266,7 @@ public class GenomeAnalysisEngine {
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, rods); validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, rods);
rodDataSources = getReferenceOrderedDataSources(rods); rodDataSources = getReferenceOrderedDataSources(my_walker, rods);
} }
/** /**
@ -744,10 +744,10 @@ public class GenomeAnalysisEngine {
* @param rods the reference order data to execute using * @param rods the reference order data to execute using
* @return A list of reference-ordered data sources. * @return A list of reference-ordered data sources.
*/ */
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) { private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Walker walker, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>(); List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods) for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
dataSources.add(new ReferenceOrderedDataSource(rod)); dataSources.add(new ReferenceOrderedDataSource(walker, rod));
return dataSources; return dataSources;
} }

View File

@ -2,8 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List; import java.util.List;
@ -38,7 +37,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
*/ */
public ManagingReferenceOrderedView( ShardDataProvider provider ) { public ManagingReferenceOrderedView( ShardDataProvider provider ) {
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
states.add( new ReferenceOrderedDataState( dataSource, ((FlashBackIterator)dataSource.seek(provider.getShard()) )) ); states.add( new ReferenceOrderedDataState( dataSource, (dataSource.seek(provider.getShard()) )) );
provider.register(this); provider.register(this);
} }
@ -74,9 +73,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
*/ */
class ReferenceOrderedDataState { class ReferenceOrderedDataState {
public final ReferenceOrderedDataSource dataSource; public final ReferenceOrderedDataSource dataSource;
public final FlashBackIterator iterator; public final LocationAwareSeekableRODIterator iterator;
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, FlashBackIterator iterator ) { public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator ) {
this.dataSource = dataSource; this.dataSource = dataSource;
this.iterator = iterator; this.iterator = iterator;
} }

View File

@ -102,7 +102,7 @@ class WindowedData {
states = new ArrayList<RMDDataState>(); states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null) if (provider != null && provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))))); states.add(new RMDDataState(dataSource, (LocationAwareSeekableRODIterator)dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))));
} }
/** /**

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MergingIterator; import org.broadinstitute.sting.utils.MergingIterator;
@ -68,7 +68,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding // grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
// the iterator to immediately before it, so that it can be added to the merging iterator primed for // the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard // next() to return the first real ROD in this shard
FlashBackIterator it = (FlashBackIterator)dataSource.seek(provider.getShard()); LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it)); states.add(new ReferenceOrderedDataState(dataSource,it));

View File

@ -1,9 +1,14 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
@ -40,9 +45,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Create a new reference-ordered data source. * Create a new reference-ordered data source.
* @param rod * @param rod
*/ */
public ReferenceOrderedDataSource( ReferenceOrderedData rod) { public ReferenceOrderedDataSource( Walker walker, ReferenceOrderedData rod) {
this.rod = rod; this.rod = rod;
this.iteratorPool = new ReferenceOrderedDataPool( rod ); this.iteratorPool = new ReferenceOrderedDataPool( walker, rod );
} }
/** /**
@ -66,9 +71,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @param shard Shard that points to the selected position. * @param shard Shard that points to the selected position.
* @return Iterator through the data. * @return Iterator through the data.
*/ */
public Iterator seek( Shard shard ) { public LocationAwareSeekableRODIterator seek( Shard shard ) {
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator; return RODIterator;
} }
@ -81,7 +86,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
*/ */
public Iterator seek(GenomeLoc loc) { public Iterator seek(GenomeLoc loc) {
DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc); DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc);
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator; return RODIterator;
} }
@ -90,7 +95,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Close the specified iterator, returning it to the pool. * Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close. * @param iterator Iterator to close.
*/ */
public void close( FlashBackIterator iterator ) { public void close( LocationAwareSeekableRODIterator iterator ) {
this.iteratorPool.release(iterator); this.iteratorPool.release(iterator);
} }
@ -99,9 +104,11 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/** /**
* A pool of reference-ordered data iterators. * A pool of reference-ordered data iterators.
*/ */
class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBackIterator> { class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod; private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
public ReferenceOrderedDataPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) { boolean flashbackData = false;
public ReferenceOrderedDataPool( Walker walker, ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
if (walker instanceof ReadWalker) flashbackData = true; // && (rod.getType() != IntervalRod.class)
this.rod = rod; this.rod = rod;
} }
@ -110,8 +117,8 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
* to be completely independent of any other iterator. * to be completely independent of any other iterator.
* @return The newly created resource. * @return The newly created resource.
*/ */
public FlashBackIterator createNewResource() { public LocationAwareSeekableRODIterator createNewResource() {
return new FlashBackIterator(rod.iterator()); return (flashbackData) ? new FlashBackIterator(rod.iterator()) : rod.iterator();
} }
/** /**
@ -121,17 +128,17 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
* @param resources @{inheritedDoc} * @param resources @{inheritedDoc}
* @return @{inheritedDoc} * @return @{inheritedDoc}
*/ */
public FlashBackIterator selectBestExistingResource( DataStreamSegment segment, List<FlashBackIterator> resources ) { public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
if(segment instanceof MappedStreamSegment) { if(segment instanceof MappedStreamSegment) {
GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation();
for( FlashBackIterator RODIterator : resources ) { for( LocationAwareSeekableRODIterator RODIterator : resources ) {
if( (RODIterator.position() == null && RODIterator.hasNext()) || if( (RODIterator.position() == null && RODIterator.hasNext()) ||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) ) (RODIterator.position() != null && RODIterator.position().isBefore(position)) )
return RODIterator; return RODIterator;
if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) { if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
RODIterator.flashBackTo(position); ((FlashBackIterator)RODIterator).flashBackTo(position);
return RODIterator; return RODIterator;
} }
@ -151,15 +158,15 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
/** /**
* In this case, the iterator is the resource. Pass it through. * In this case, the iterator is the resource. Pass it through.
*/ */
public FlashBackIterator createIteratorFromResource( DataStreamSegment segment, FlashBackIterator resource ) { public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
return resource; return resource;
} }
/** /**
* kill the buffers in the iterator * kill the buffers in the iterator
*/ */
public void closeResource( FlashBackIterator resource ) { public void closeResource( LocationAwareSeekableRODIterator resource ) {
resource.close(); if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
} }
} }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import java.util.*; import java.util.*;
@ -10,13 +9,12 @@ public class MergingIterator implements Iterator<RODRecordList>, Iterable<RODRec
PriorityQueue<Element> queue = new PriorityQueue<Element>(); PriorityQueue<Element> queue = new PriorityQueue<Element>();
private class Element implements Comparable<Element> { private class Element implements Comparable<Element> {
public FlashBackIterator it = null; public LocationAwareSeekableRODIterator it = null;
//public E value = null;
public GenomeLoc nextLoc = null; public GenomeLoc nextLoc = null;
public Element(Iterator<RODRecordList> it) { public Element(Iterator<RODRecordList> it) {
if ( it instanceof FlashBackIterator) { if ( it instanceof LocationAwareSeekableRODIterator) {
this.it = (FlashBackIterator)it; this.it = (LocationAwareSeekableRODIterator)it;
if ( ! it.hasNext() ) throw new StingException("Iterator is empty"); if ( ! it.hasNext() ) throw new StingException("Iterator is empty");
update(); update();
} else { } else {

View File

@ -67,7 +67,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
public void testSingleBinding() { public void testSingleBinding() {
File file = new File(testDir + "TabularDataTest.dat"); File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class); ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod); ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null,rod);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
@ -90,9 +90,9 @@ public class ReferenceOrderedViewTest extends BaseTest {
File file = new File(testDir + "TabularDataTest.dat"); File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class); ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(rod1); ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,rod1);
ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class); ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2); ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,rod2);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -55,8 +56,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testCreateSingleIterator() { public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); ResourcePool iteratorPool = new ReferenceOrderedDataPool(null,rod);
FlashBackIterator iterator = (FlashBackIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -76,11 +77,11 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testCreateMultipleIterators() { public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
FlashBackIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
// Create a new iterator at position 2. // Create a new iterator at position 2.
FlashBackIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -126,8 +127,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testIteratorConservation() { public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -161,8 +162,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testIteratorCreation() { public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());