some rods-for-reads cleaning and performance improvements.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4f4555c80f
commit
dde9fd8a15
|
|
@ -266,7 +266,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, rods);
|
||||
|
||||
rodDataSources = getReferenceOrderedDataSources(rods);
|
||||
rodDataSources = getReferenceOrderedDataSources(my_walker, rods);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -744,10 +744,10 @@ public class GenomeAnalysisEngine {
|
|||
* @param rods the reference order data to execute using
|
||||
* @return A list of reference-ordered data sources.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(Walker walker, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
|
||||
dataSources.add(new ReferenceOrderedDataSource(rod));
|
||||
dataSources.add(new ReferenceOrderedDataSource(walker, rod));
|
||||
return dataSources;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -38,7 +37,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
*/
|
||||
public ManagingReferenceOrderedView( ShardDataProvider provider ) {
|
||||
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
|
||||
states.add( new ReferenceOrderedDataState( dataSource, ((FlashBackIterator)dataSource.seek(provider.getShard()) )) );
|
||||
states.add( new ReferenceOrderedDataState( dataSource, (dataSource.seek(provider.getShard()) )) );
|
||||
|
||||
provider.register(this);
|
||||
}
|
||||
|
|
@ -74,9 +73,9 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
*/
|
||||
class ReferenceOrderedDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final FlashBackIterator iterator;
|
||||
public final LocationAwareSeekableRODIterator iterator;
|
||||
|
||||
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, FlashBackIterator iterator ) {
|
||||
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator ) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ class WindowedData {
|
|||
states = new ArrayList<RMDDataState>();
|
||||
if (provider != null && provider.getReferenceOrderedData() != null)
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))));
|
||||
states.add(new RMDDataState(dataSource, (LocationAwareSeekableRODIterator)dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MergingIterator;
|
||||
|
|
@ -68,7 +68,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
|
||||
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
|
||||
// next() to return the first real ROD in this shard
|
||||
FlashBackIterator it = (FlashBackIterator)dataSource.seek(provider.getShard());
|
||||
LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard());
|
||||
it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1));
|
||||
|
||||
states.add(new ReferenceOrderedDataState(dataSource,it));
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
|
|
@ -40,9 +45,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* Create a new reference-ordered data source.
|
||||
* @param rod
|
||||
*/
|
||||
public ReferenceOrderedDataSource( ReferenceOrderedData rod) {
|
||||
public ReferenceOrderedDataSource( Walker walker, ReferenceOrderedData rod) {
|
||||
this.rod = rod;
|
||||
this.iteratorPool = new ReferenceOrderedDataPool( rod );
|
||||
this.iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -66,9 +71,9 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @param shard Shard that points to the selected position.
|
||||
* @return Iterator through the data.
|
||||
*/
|
||||
public Iterator seek( Shard shard ) {
|
||||
public LocationAwareSeekableRODIterator seek( Shard shard ) {
|
||||
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
|
||||
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +86,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public Iterator seek(GenomeLoc loc) {
|
||||
DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc);
|
||||
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +95,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* Close the specified iterator, returning it to the pool.
|
||||
* @param iterator Iterator to close.
|
||||
*/
|
||||
public void close( FlashBackIterator iterator ) {
|
||||
public void close( LocationAwareSeekableRODIterator iterator ) {
|
||||
this.iteratorPool.release(iterator);
|
||||
}
|
||||
|
||||
|
|
@ -99,9 +104,11 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBackIterator> {
|
||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
public ReferenceOrderedDataPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
|
||||
boolean flashbackData = false;
|
||||
public ReferenceOrderedDataPool( Walker walker, ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
|
||||
if (walker instanceof ReadWalker) flashbackData = true; // && (rod.getType() != IntervalRod.class)
|
||||
this.rod = rod;
|
||||
}
|
||||
|
||||
|
|
@ -110,8 +117,8 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
|
|||
* to be completely independent of any other iterator.
|
||||
* @return The newly created resource.
|
||||
*/
|
||||
public FlashBackIterator createNewResource() {
|
||||
return new FlashBackIterator(rod.iterator());
|
||||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
return (flashbackData) ? new FlashBackIterator(rod.iterator()) : rod.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,17 +128,17 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
|
|||
* @param resources @{inheritedDoc}
|
||||
* @return @{inheritedDoc}
|
||||
*/
|
||||
public FlashBackIterator selectBestExistingResource( DataStreamSegment segment, List<FlashBackIterator> resources ) {
|
||||
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
|
||||
if(segment instanceof MappedStreamSegment) {
|
||||
GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation();
|
||||
|
||||
for( FlashBackIterator RODIterator : resources ) {
|
||||
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
|
||||
|
||||
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
|
||||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
|
||||
return RODIterator;
|
||||
if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) {
|
||||
RODIterator.flashBackTo(position);
|
||||
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
|
||||
((FlashBackIterator)RODIterator).flashBackTo(position);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
|
|
@ -151,15 +158,15 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
|
|||
/**
|
||||
* In this case, the iterator is the resource. Pass it through.
|
||||
*/
|
||||
public FlashBackIterator createIteratorFromResource( DataStreamSegment segment, FlashBackIterator resource ) {
|
||||
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
|
||||
return resource;
|
||||
}
|
||||
|
||||
/**
|
||||
* kill the buffers in the iterator
|
||||
*/
|
||||
public void closeResource( FlashBackIterator resource ) {
|
||||
resource.close();
|
||||
public void closeResource( LocationAwareSeekableRODIterator resource ) {
|
||||
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -10,13 +9,12 @@ public class MergingIterator implements Iterator<RODRecordList>, Iterable<RODRec
|
|||
PriorityQueue<Element> queue = new PriorityQueue<Element>();
|
||||
|
||||
private class Element implements Comparable<Element> {
|
||||
public FlashBackIterator it = null;
|
||||
//public E value = null;
|
||||
public LocationAwareSeekableRODIterator it = null;
|
||||
public GenomeLoc nextLoc = null;
|
||||
|
||||
public Element(Iterator<RODRecordList> it) {
|
||||
if ( it instanceof FlashBackIterator) {
|
||||
this.it = (FlashBackIterator)it;
|
||||
if ( it instanceof LocationAwareSeekableRODIterator) {
|
||||
this.it = (LocationAwareSeekableRODIterator)it;
|
||||
if ( ! it.hasNext() ) throw new StingException("Iterator is empty");
|
||||
update();
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
public void testSingleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null,rod);
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
||||
|
|
@ -90,9 +90,9 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
|
||||
ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(rod1);
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,rod1);
|
||||
ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2);
|
||||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,rod2);
|
||||
|
||||
|
||||
Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
|
@ -55,8 +56,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
FlashBackIterator iterator = (FlashBackIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(null,rod);
|
||||
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -76,11 +77,11 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
FlashBackIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
|
||||
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
FlashBackIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
|
||||
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -126,8 +127,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorConservation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -161,8 +162,8 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorCreation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
FlashBackIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(null,rod);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
|
|||
Loading…
Reference in New Issue