From d61a5261c11f05cee471b80ccb2d180c71ea6dc7 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 21 May 2009 20:09:32 +0000 Subject: [PATCH] Better integration of reference-ordered data into the data sharding system. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@779 348d0f76-0448-11de-a6fe-93d51630548a --- .../providers/ReferenceOrderedView.java | 84 ++++++++ .../providers/ShardDataProvider.java | 31 ++- .../gatk/dataSources/providers/View.java | 23 ++ .../simpleDataSources/IteratorPool.java | 106 ++++++++++ .../ReferenceMetaDataSource.java | 125 ----------- .../ReferenceOrderedDataSource.java | 75 +++++++ .../sting/gatk/executive/MicroScheduler.java | 19 +- .../gatk/refdata/ReferenceOrderedData.java | 10 + .../sting/gatk/traversals/TraverseLoci.java | 6 +- .../simpleDataSources/IteratorPoolTest.java | 197 ++++++++++++++++++ .../gatk/traversals/TraverseReadsTest.java | 4 +- 11 files changed, 548 insertions(+), 132 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPool.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceMetaDataSource.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceOrderedDataSource.java create mode 100755 java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPoolTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java new file mode 100755 index 000000000..bf8602366 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java @@ -0,0 +1,84 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.List; +import java.util.ArrayList; +/** + * User: hanna + * Date: May 21, 2009 + * Time: 2:49:17 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A view into the reference-ordered data in the provider. + */ +public class ReferenceOrderedView implements View { + /** + * The provider that's supplying our backing data. + */ + private final ShardDataProvider provider; + + /** + * The data sources along with their current states. + */ + private List states = new ArrayList(); + + /** + * Create a new view of reference-ordered data. + * @param provider + */ + public ReferenceOrderedView( ShardDataProvider provider ) { + this.provider = provider; + for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) + states.add( new ReferenceOrderedDataState( dataSource, (ReferenceOrderedData.RODIterator)dataSource.seek(provider.getShard()) ) ); + + provider.register(this); + } + + /** + * Gets an object which can track the reference-ordered data at every locus. + * @param loc Locus at which to track. + * @return A tracker containing information about this locus. + */ + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { + RefMetaDataTracker tracks = new RefMetaDataTracker(); + for (ReferenceOrderedDataState state: states ) + tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) ); + return tracks; + } + + /** + * Closes the current view. + */ + public void close() { + for( ReferenceOrderedDataState state: states ) + state.dataSource.close( state.iterator ); + + // Clear out the existing data so that post-close() accesses to this data will fail-fast. + states = null; + } + + /** + * Models the traversal state of a given ROD lane. + */ + private class ReferenceOrderedDataState { + public final ReferenceOrderedDataSource dataSource; + public final ReferenceOrderedData.RODIterator iterator; + + public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, ReferenceOrderedData.RODIterator iterator ) { + this.dataSource = dataSource; + this.iterator = iterator; + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java index a05c62e04..1ee0c10dc 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLoc; @@ -11,6 +12,7 @@ import net.sf.samtools.SAMRecord; import java.io.File; import java.util.ArrayList; +import java.util.List; /** * User: hanna * Date: May 8, 2009 @@ -29,6 +31,11 @@ import java.util.ArrayList; * tries to assemble as much as possible with it. */ public class ShardDataProvider { + /** + * An ArrayList of all the views that are examining this data. + */ + private List registeredViews = new ArrayList(); + /** * The shard over which we're providing data. */ @@ -44,6 +51,11 @@ public class ShardDataProvider { */ private final ReferenceProvider referenceProvider; + /** + * Sources of reference-ordered data. + */ + private final List referenceOrderedData; + /** * Retrieves the shard associated with this data provider. * @return The shard associated with this data provider. @@ -77,6 +89,15 @@ public class ShardDataProvider { return reads; } + /** + * Gets a window into the reference-ordered data. Package protected so that only + * views can access it. + * @return List of reference-ordered data sources. + */ + List getReferenceOrderedData() { + return referenceOrderedData; + } + /** * Gets the reference base associated with this particular point on the genome. * @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region. @@ -101,11 +122,12 @@ public class ShardDataProvider { * @param reads A window into the reads for a given region. * @param reference A getter for a section of the reference. */ - public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference ) { + public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List rods) { this.shard = shard; // Provide basic reads information. this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList())); this.referenceProvider = (reference != null) ? new ReferenceProvider(reference,shard) : null; + this.referenceOrderedData = rods; } /** @@ -117,12 +139,19 @@ public class ShardDataProvider { this.shard = shard; this.reads = reads; this.referenceProvider = null; + this.referenceOrderedData = null; + } + + void register( View view ) { + this.registeredViews.add(view); } /** * Retire this shard. */ public void close() { + for( View view: registeredViews ) + view.close(); reads.close(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java new file mode 100755 index 000000000..1db38e456 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; +/** + * User: hanna + * Date: May 21, 2009 + * Time: 3:14:56 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Represents a view into given data. + */ +public interface View { + /** + * Inform this view that the data provided to it no longer exists. + */ + public void close(); +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPool.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPool.java new file mode 100755 index 000000000..01cf6abcc --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPool.java @@ -0,0 +1,106 @@ +package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.util.List; +import java.util.ArrayList; +/** + * User: hanna + * Date: May 21, 2009 + * Time: 10:55:26 AM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A pool of open iterators. Currently highly specialized to RODs, but could theoretically be + * generalized to a pool of arbitrary seekable, closeable iterators. Not thread-safe. + */ +class IteratorPool { + private final ReferenceOrderedData rod; + + /** + * All iterators of this reference-ordered data. + */ + private List allIterators = new ArrayList(); + + /** + * All iterators that are not currently in service. + */ + private List availableIterators = new ArrayList(); + + /** + * Create a new iterator pool given the current ROD. + * @param rod Reference-ordered data. + */ + public IteratorPool( ReferenceOrderedData rod ) { + this.rod = rod; + } + + /** + * Get an iterator whose position is before the specified location. Create a new one if none exists. + * @param position Target position for the iterator. + * @return + */ + public ReferenceOrderedData.RODIterator iterator( GenomeLoc position ) { + // Grab the first iterator in the list whose position is before the requested position. + ReferenceOrderedData.RODIterator selectedIterator = null; + for( ReferenceOrderedData.RODIterator iterator: availableIterators ) { + if( (iterator.position() == null && iterator.hasNext()) || + (iterator.position() != null && iterator.position().isBefore(position)) ) { + selectedIterator = iterator; + break; + } + } + + // No iterator found? Create another. It is expected that + // each iterator created will have its own file handle. + if( selectedIterator == null ) { + selectedIterator = rod.iterator(); + allIterators.add(selectedIterator); + } + + // Remove the iterator from the list of available iterators. + if( availableIterators.contains(selectedIterator) ) + availableIterators.remove(selectedIterator); + + return selectedIterator; + } + + /** + * Close the given iterator, returning it to the pool. + * @param iterator Iterator to return to the pool. + */ + public void close( ReferenceOrderedData.RODIterator iterator ) { + if( !allIterators.contains(iterator) ) + throw new StingException("Iterator does not belong to the given pool."); + availableIterators.add(iterator); + } + + /** + * Operating stats...get the number of total iterators. Package-protected + * for unit testing. + * @return An integer number of total iterators. + */ + int numIterators() { + return allIterators.size(); + } + + /** + * Operating stats...get the number of available iterators. Package-protected + * for unit testing. + * @return An integer number of available iterators. + */ + int numAvailableIterators() { + return availableIterators.size(); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceMetaDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceMetaDataSource.java deleted file mode 100644 index 065c2add5..000000000 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceMetaDataSource.java +++ /dev/null @@ -1,125 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; - -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -/** - * - * User: aaron - * Date: Apr 6, 2009 - * Time: 4:33:10 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - - -/** - * @author aaron - * @version 1.0 - * @date Apr 6, 2009 - *

- * Class ReferenceMetaDataSource - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class ReferenceMetaDataSource implements SimpleDataSource { - - // our enumerated types - public enum RODTYPE { - DBSNP, HAPMAP - } - - // these could go on the stack, but a heap copy isn't too bad - private List myData = null; - private List.RODIterator> rodIters = null; - private List> rods = null; - - /** - * Prepare the list of reference ordered data iterators for each of the rods - * - * @return A list of ROD iterators for getting data from each ROD - */ - protected List.RODIterator> initializeRODs() { - // set up reference ordered data - rodIters = new ArrayList.RODIterator>(); - for (ReferenceOrderedData data : rods) { - rodIters.add(data.iterator()); - } - return rodIters; - } - - /** - * Builds a list of the reference ordered datum at loc from each of the iterators. This function - * assumes you are accessing the data in order. You can't use this function for random access. Each - * successive call moves you along the file, consuming all data before loc. - * - * @param rodIters Iterators to access the RODs - * @param loc The location to get the rods at - * @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list - */ - protected List getReferenceOrderedDataAtLocus(List.RODIterator> rodIters, - final GenomeLoc loc) { - List data = new ArrayList(); - for (ReferenceOrderedData.RODIterator iter : rodIters) { - data.add(iter.seekForward(loc)); - } - return data; - } - - /** - * Query the data source for a region of interest, specified by the genome location. - * The iterator will generate successive calls - * - * @param shard the genome location to extract data for - * @return an iterator of the appropriate type, that is limited by the region - */ - public Iterator seek(Shard shard) { - if (shard.getShardType() == Shard.ShardType.LOCUS) { - myData = getReferenceOrderedDataAtLocus(rodIters, shard.getGenomeLoc()); - return myData.iterator(); - } else { - throw new StingException("ReferenceMetaDataSource can only take LocusShards"); - } - } - - public ReferenceMetaDataSource(HashMap files) { - /* - // setup a rod list - List> rods = new ArrayList>(); - - // cycle through the passed in rod's - - Set fileNames = files.keySet(); - for (String file : fileNames) { - switch (files.get(file)) { - - case DBSNP: { - ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File(file), rodDbSNP.class); - //dbsnp.testMe(); - rods.add(dbsnp); // { gff, dbsnp }; - } - case HAPMAP: { - ReferenceOrderedData hapmap = new ReferenceOrderedData(new File(file), HapMapAlleleFrequenciesROD.class); - //dbsnp.testMe(); - rods.add(hapmap); // { gff, dbsnp }; - } - } - } - */ - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceOrderedDataSource.java new file mode 100755 index 000000000..ed75a7aca --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceOrderedDataSource.java @@ -0,0 +1,75 @@ +package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; +/** + * User: hanna + * Date: May 21, 2009 + * Time: 10:04:12 AM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A data source which provides a single type of reference-ordered data. + */ +public class ReferenceOrderedDataSource implements SimpleDataSource { + /** + * The reference-ordered data itself. + */ + private final ReferenceOrderedData rod; + + /** + * A pool of iterators for navigating through the genome. + */ + private IteratorPool iteratorPool = null; + + /** + * Create a new reference-ordered data source. + * @param rod + */ + public ReferenceOrderedDataSource( ReferenceOrderedData rod) { + this.rod = rod; + this.iteratorPool = new IteratorPool( rod ); + } + + /** + * Return the name of the underlying reference-ordered data. + * @return Name of the underlying rod. + */ + public String getName() { + return this.rod.getName(); + } + + /** + * Seek to the specified position and return an iterator through the data. + * @param shard Shard that points to the selected position. + * @return Iterator through the data. + */ + public Iterator seek( Shard shard ) { + ReferenceOrderedData.RODIterator iterator = iteratorPool.iterator(shard.getGenomeLoc()); + return iterator; + } + + /** + * Close the specified iterator, returning it to the pool. + * @param iterator Iterator to close. + */ + public void close( ReferenceOrderedData.RODIterator iterator ) { + this.iteratorPool.close(iterator); + } +} + + diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index ee0fa5bd7..95032dfad 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.traversals.TraverseReads; @@ -24,6 +25,9 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.io.File; import java.io.FileNotFoundException; import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.ArrayList; /** * Created by IntelliJ IDEA. @@ -45,6 +49,7 @@ public abstract class MicroScheduler { protected final IndexedFastaSequenceFile reference; private final SAMDataSource reads; + private final List rods; /** * MicroScheduler factory function. Create a microscheduler appropriate for reducing the @@ -78,6 +83,7 @@ public abstract class MicroScheduler { this.reads = getReadsDataSource( reads ); this.reference = openReferenceSequenceFile( refFile ); + this.rods = getReferenceOrderedDataSources( rods ); } /** @@ -141,7 +147,7 @@ public abstract class MicroScheduler { * @return An accessor for all the data in this shard. */ protected ShardDataProvider getShardDataProvider( Shard shard ) { - return new ShardDataProvider( shard, reads, reference ); + return new ShardDataProvider( shard, reads, reference, rods ); } /** @@ -163,6 +169,17 @@ public abstract class MicroScheduler { return dataSource; } + /** + * Open the reference-ordered data sources. + * @return A list of reference-ordered data sources. + */ + private List getReferenceOrderedDataSources( List> rods) { + List dataSources = new ArrayList(); + for( ReferenceOrderedData rod: rods ) + dataSources.add( new ReferenceOrderedDataSource(rod) ); + return dataSources; + } + /** * Opens a reference sequence file paired with an index. * @param refFile Handle to a reference sequence file. Non-null. diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 921193ce1..26085aa18 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -303,6 +303,16 @@ public class ReferenceOrderedData implements return prev; } + /** + * Returns the current position of this iterator. + * @return Current position of the iterator, or null if no position exists. + */ + public GenomeLoc position() { + if( prev != null ) + return prev.getLocation(); + return null; + } + /** * Seeks forward in the file until we reach (or cross) a record at contig / pos * If we don't find anything and cross beyond contig / pos, we return null diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 8c08b83ae..0e0b91474 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue; import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue; import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue; +import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -27,8 +28,6 @@ import java.io.File; * A simple, short-term solution to iterating over all reference positions over a series of * genomic locations. Simply overloads the superclass traverse function to go over the entire * interval's reference positions. - * mhanna - Added better data source integration. - * TODO: Gain confidence in this implementation and remove the original. */ public class TraverseLoci extends TraversalEngine { @@ -62,6 +61,7 @@ public class TraverseLoci extends TraversalEngine { LocusIterator locusIterator = null; LocusContextQueue locusContextQueue = null; + ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); DataSource dataSource = WalkerManager.getWalkerDataSource(walker); switch( dataSource ) { @@ -85,7 +85,7 @@ public class TraverseLoci extends TraversalEngine { TraversalStatistics.nRecords++; // Iterate forward to get all reference ordered data covering this locus - final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site ); + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(site); LocusContext locus = locusContextQueue.seek( site ).peek(); char refBase = dataProvider.getReferenceBase( site ); diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPoolTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPoolTest.java new file mode 100755 index 000000000..4ed02d88b --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/IteratorPoolTest.java @@ -0,0 +1,197 @@ +package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.Assert; +import static org.junit.Assert.assertTrue; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.TabularROD; + +import java.io.File; +import java.io.FileNotFoundException; +/** + * User: hanna + * Date: May 21, 2009 + * Time: 11:03:04 AM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Test the contents and number of iterators in the pool. + */ + +public class IteratorPoolTest extends BaseTest { + + private static File sequenceFile = new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"); + + private ReferenceOrderedData rod = null; + + private final GenomeLoc testSite1 = new GenomeLoc("chrM",10); + private final GenomeLoc testSite2 = new GenomeLoc("chrM",20); + private final GenomeLoc testSite3 = new GenomeLoc("chrM",30); + + @BeforeClass + public static void init() throws FileNotFoundException { + GenomeLoc.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile)); + TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX); + } + + @Before + public void setUp() { + File file = new File(testDir + "TabularDataTest.dat"); + rod = new ReferenceOrderedData("tableTest", file, TabularROD.class); + } + + @Test + public void testCreateSingleIterator() { + IteratorPool iteratorPool = new IteratorPool(rod); + ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 ); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); + + TabularROD datum = (TabularROD)iterator.next(); + + assertTrue(datum.getLocation().equals(testSite1)); + assertTrue(datum.get("COL1").equals("A")); + assertTrue(datum.get("COL2").equals("B")); + assertTrue(datum.get("COL3").equals("C")); + + iteratorPool.close(iterator); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); + } + + @Test + public void testCreateMultipleIterators() { + IteratorPool iteratorPool = new IteratorPool(rod); + ReferenceOrderedData.RODIterator iterator1 = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 ); + + // Create a new iterator at position 2. + ReferenceOrderedData.RODIterator iterator2 = iteratorPool.iterator( testSite2 ); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); + + // Test out-of-order access: first iterator2, then iterator1. + // Ugh...first call to a region needs to be a seek. + TabularROD datum = (TabularROD)iterator2.seekForward(testSite2); + assertTrue(datum.getLocation().equals(testSite2)); + assertTrue(datum.get("COL1").equals("C")); + assertTrue(datum.get("COL2").equals("D")); + assertTrue(datum.get("COL3").equals("E")); + + datum = (TabularROD)iterator1.next(); + assertTrue(datum.getLocation().equals(testSite1)); + assertTrue(datum.get("COL1").equals("A")); + assertTrue(datum.get("COL2").equals("B")); + assertTrue(datum.get("COL3").equals("C")); + + // Advance iterator2, and make sure both iterator's contents are still correct. + datum = (TabularROD)iterator2.next(); + assertTrue(datum.getLocation().equals(testSite3)); + assertTrue(datum.get("COL1").equals("F")); + assertTrue(datum.get("COL2").equals("G")); + assertTrue(datum.get("COL3").equals("H")); + + datum = (TabularROD)iterator1.next(); + assertTrue(datum.getLocation().equals(testSite2)); + assertTrue(datum.get("COL1").equals("C")); + assertTrue(datum.get("COL2").equals("D")); + assertTrue(datum.get("COL3").equals("E")); + + // Cleanup, and make sure the number of iterators dies appropriately. + iteratorPool.close(iterator1); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); + + iteratorPool.close(iterator2); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators()); + } + + @Test + public void testIteratorConservation() { + IteratorPool iteratorPool = new IteratorPool(rod); + ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 ); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); + + TabularROD datum = (TabularROD)iterator.next(); + assertTrue(datum.getLocation().equals(testSite1)); + assertTrue(datum.get("COL1").equals("A")); + assertTrue(datum.get("COL2").equals("B")); + assertTrue(datum.get("COL3").equals("C")); + + iteratorPool.close(iterator); + + // Create another iterator after the current iterator. + iterator = iteratorPool.iterator(testSite3); + + // Make sure that the previously acquired iterator was reused. + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); + + datum = (TabularROD)iterator.seekForward(testSite3); + assertTrue(datum.getLocation().equals(testSite3)); + assertTrue(datum.get("COL1").equals("F")); + assertTrue(datum.get("COL2").equals("G")); + assertTrue(datum.get("COL3").equals("H")); + + iteratorPool.close(iterator); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); + } + + @Test + public void testIteratorCreation() { + IteratorPool iteratorPool = new IteratorPool(rod); + ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite3 ); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); + + TabularROD datum = (TabularROD)iterator.seekForward(testSite3); + assertTrue(datum.getLocation().equals(testSite3)); + assertTrue(datum.get("COL1").equals("F")); + assertTrue(datum.get("COL2").equals("G")); + assertTrue(datum.get("COL3").equals("H")); + + iteratorPool.close(iterator); + + // Create another iterator after the current iterator. + iterator = iteratorPool.iterator(testSite1); + + // Make sure that the previously acquired iterator was reused. + Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators()); + + datum = (TabularROD)iterator.next(); + assertTrue(datum.getLocation().equals(testSite1)); + assertTrue(datum.get("COL1").equals("A")); + assertTrue(datum.get("COL2").equals("B")); + assertTrue(datum.get("COL3").equals("C")); + + iteratorPool.close(iterator); + + Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); + Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators()); + } + +} diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index 98c62ffe3..d1023c8cd 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -135,7 +135,7 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null); + ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null); accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator); dataProvider.close(); @@ -183,7 +183,7 @@ public class TraverseReadsTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null); + ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null); accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator); dataProvider.close(); }