some rods-for-reads cleaning and performance improvements.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-03-10 22:54:58 +00:00
parent 4f4555c80f
commit dde9fd8a15
8 changed files with 51 additions and 46 deletions

View File

@ -266,7 +266,7 @@ public class GenomeAnalysisEngine {
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, rods);
rodDataSources = getReferenceOrderedDataSources(rods);
rodDataSources = getReferenceOrderedDataSources(my_walker, rods);
}
/**
@ -744,10 +744,10 @@ public class GenomeAnalysisEngine {
* @param rods the reference order data to execute using
* @return A list of reference-ordered data sources.
*/
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Walker walker, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
dataSources.add(new ReferenceOrderedDataSource(rod));
dataSources.add(new ReferenceOrderedDataSource(walker, rod));
return dataSources;
}

View File

@ -2,8 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
@ -38,7 +37,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
*/
public ManagingReferenceOrderedView( ShardDataProvider provider ) {
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
states.add( new ReferenceOrderedDataState( dataSource, ((FlashBackIterator)dataSource.seek(provider.getShard()) )) );
states.add( new ReferenceOrderedDataState( dataSource, (dataSource.seek(provider.getShard()) )) );
provider.register(this);
}
@ -74,9 +73,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
*/
class ReferenceOrderedDataState {
public final ReferenceOrderedDataSource dataSource;
public final FlashBackIterator iterator;
public final LocationAwareSeekableRODIterator iterator;
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, FlashBackIterator iterator ) {
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator ) {
this.dataSource = dataSource;
this.iterator = iterator;
}

View File

@ -102,7 +102,7 @@ class WindowedData {
states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))));
states.add(new RMDDataState(dataSource, (LocationAwareSeekableRODIterator)dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))));
}
/**

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MergingIterator;
@ -68,7 +68,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard
FlashBackIterator it = (FlashBackIterator)dataSource.seek(provider.getShard());
LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it));

View File

@ -1,9 +1,14 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
@ -40,9 +45,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Create a new reference-ordered data source.
* @param rod
*/
public ReferenceOrderedDataSource( ReferenceOrderedData rod) {
public ReferenceOrderedDataSource( Walker walker, ReferenceOrderedData rod) {
this.rod = rod;
this.iteratorPool = new ReferenceOrderedDataPool( rod );
this.iteratorPool = new ReferenceOrderedDataPool( walker, rod );
}
/**
@ -66,9 +71,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @param shard Shard that points to the selected position.
* @return Iterator through the data.
*/
public Iterator seek( Shard shard ) {
public LocationAwareSeekableRODIterator seek( Shard shard ) {
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
}
@ -81,7 +86,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
*/
public Iterator seek(GenomeLoc loc) {
DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc);
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
}
@ -90,7 +95,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close.
*/
public void close( FlashBackIterator iterator ) {
public void close( LocationAwareSeekableRODIterator iterator ) {
this.iteratorPool.release(iterator);
}
@ -99,9 +104,11 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/**
* A pool of reference-ordered data iterators.
*/
class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBackIterator> {
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
public ReferenceOrderedDataPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
boolean flashbackData = false;
public ReferenceOrderedDataPool( Walker walker, ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
if (walker instanceof ReadWalker) flashbackData = true; // && (rod.getType() != IntervalRod.class)
this.rod = rod;
}
@ -110,8 +117,8 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
* to be completely independent of any other iterator.
* @return The newly created resource.
*/
public FlashBackIterator createNewResource() {
return new FlashBackIterator(rod.iterator());
public LocationAwareSeekableRODIterator createNewResource() {
return (flashbackData) ? new FlashBackIterator(rod.iterator()) : rod.iterator();
}
/**
@ -121,17 +128,17 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
* @param resources @{inheritedDoc}
* @return @{inheritedDoc}
*/
public FlashBackIterator selectBestExistingResource( DataStreamSegment segment, List<FlashBackIterator> resources ) {
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
if(segment instanceof MappedStreamSegment) {
GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation();
for( FlashBackIterator RODIterator : resources ) {
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
return RODIterator;
if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) {
RODIterator.flashBackTo(position);
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
((FlashBackIterator)RODIterator).flashBackTo(position);
return RODIterator;
}
@ -151,15 +158,15 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
/**
* In this case, the iterator is the resource. Pass it through.
*/
public FlashBackIterator createIteratorFromResource( DataStreamSegment segment, FlashBackIterator resource ) {
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
return resource;
}
/**
* kill the buffers in the iterator
*/
public void closeResource( FlashBackIterator resource ) {
resource.close();
public void closeResource( LocationAwareSeekableRODIterator resource ) {
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
}
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import java.util.*;
@ -10,13 +9,12 @@ public class MergingIterator implements Iterator<RODRecordList>, Iterable<RODRec
PriorityQueue<Element> queue = new PriorityQueue<Element>();
private class Element implements Comparable<Element> {
public FlashBackIterator it = null;
//public E value = null;
public LocationAwareSeekableRODIterator it = null;
public GenomeLoc nextLoc = null;
public Element(Iterator<RODRecordList> it) {
if ( it instanceof FlashBackIterator) {
this.it = (FlashBackIterator)it;
if ( it instanceof LocationAwareSeekableRODIterator) {
this.it = (LocationAwareSeekableRODIterator)it;
if ( ! it.hasNext() ) throw new StingException("Iterator is empty");
update();
} else {

View File

@ -67,7 +67,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
public void testSingleBinding() {
File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null,rod);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
@ -90,9 +90,9 @@ public class ReferenceOrderedViewTest extends BaseTest {
File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(rod1);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,rod1);
ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,rod2);
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -55,8 +56,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
FlashBackIterator iterator = (FlashBackIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
ResourcePool iteratorPool = new ReferenceOrderedDataPool(null,rod);
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -76,11 +77,11 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
FlashBackIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
// Create a new iterator at position 2.
FlashBackIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -126,8 +127,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -161,8 +162,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());