Better integration of reference-ordered data into the data sharding system.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@779 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0d58e4ccc9
commit
d61a5261c1
|
|
@ -0,0 +1,84 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 2:49:17 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A view into the reference-ordered data in the provider.
|
||||
*/
|
||||
public class ReferenceOrderedView implements View {
|
||||
/**
|
||||
* The provider that's supplying our backing data.
|
||||
*/
|
||||
private final ShardDataProvider provider;
|
||||
|
||||
/**
|
||||
* The data sources along with their current states.
|
||||
*/
|
||||
private List<ReferenceOrderedDataState> states = new ArrayList<ReferenceOrderedDataState>();
|
||||
|
||||
/**
|
||||
* Create a new view of reference-ordered data.
|
||||
* @param provider
|
||||
*/
|
||||
public ReferenceOrderedView( ShardDataProvider provider ) {
|
||||
this.provider = provider;
|
||||
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() )
|
||||
states.add( new ReferenceOrderedDataState( dataSource, (ReferenceOrderedData.RODIterator)dataSource.seek(provider.getShard()) ) );
|
||||
|
||||
provider.register(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an object which can track the reference-ordered data at every locus.
|
||||
* @param loc Locus at which to track.
|
||||
* @return A tracker containing information about this locus.
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
RefMetaDataTracker tracks = new RefMetaDataTracker();
|
||||
for (ReferenceOrderedDataState state: states )
|
||||
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
|
||||
return tracks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
public void close() {
|
||||
for( ReferenceOrderedDataState state: states )
|
||||
state.dataSource.close( state.iterator );
|
||||
|
||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||
states = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Models the traversal state of a given ROD lane.
|
||||
*/
|
||||
private class ReferenceOrderedDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final ReferenceOrderedData.RODIterator iterator;
|
||||
|
||||
public ReferenceOrderedDataState( ReferenceOrderedDataSource dataSource, ReferenceOrderedData.RODIterator iterator ) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -11,6 +12,7 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 8, 2009
|
||||
|
|
@ -29,6 +31,11 @@ import java.util.ArrayList;
|
|||
* tries to assemble as much as possible with it.
|
||||
*/
|
||||
public class ShardDataProvider {
|
||||
/**
|
||||
* An ArrayList of all the views that are examining this data.
|
||||
*/
|
||||
private List<View> registeredViews = new ArrayList<View>();
|
||||
|
||||
/**
|
||||
* The shard over which we're providing data.
|
||||
*/
|
||||
|
|
@ -44,6 +51,11 @@ public class ShardDataProvider {
|
|||
*/
|
||||
private final ReferenceProvider referenceProvider;
|
||||
|
||||
/**
|
||||
* Sources of reference-ordered data.
|
||||
*/
|
||||
private final List<ReferenceOrderedDataSource> referenceOrderedData;
|
||||
|
||||
/**
|
||||
* Retrieves the shard associated with this data provider.
|
||||
* @return The shard associated with this data provider.
|
||||
|
|
@ -77,6 +89,15 @@ public class ShardDataProvider {
|
|||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a window into the reference-ordered data. Package protected so that only
|
||||
* views can access it.
|
||||
* @return List of reference-ordered data sources.
|
||||
*/
|
||||
List<ReferenceOrderedDataSource> getReferenceOrderedData() {
|
||||
return referenceOrderedData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the reference base associated with this particular point on the genome.
|
||||
* @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region.
|
||||
|
|
@ -101,11 +122,12 @@ public class ShardDataProvider {
|
|||
* @param reads A window into the reads for a given region.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference ) {
|
||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List<ReferenceOrderedDataSource> rods) {
|
||||
this.shard = shard;
|
||||
// Provide basic reads information.
|
||||
this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList<File>()));
|
||||
this.referenceProvider = (reference != null) ? new ReferenceProvider(reference,shard) : null;
|
||||
this.referenceOrderedData = rods;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,12 +139,19 @@ public class ShardDataProvider {
|
|||
this.shard = shard;
|
||||
this.reads = reads;
|
||||
this.referenceProvider = null;
|
||||
this.referenceOrderedData = null;
|
||||
}
|
||||
|
||||
void register( View view ) {
|
||||
this.registeredViews.add(view);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retire this shard.
|
||||
*/
|
||||
public void close() {
|
||||
for( View view: registeredViews )
|
||||
view.close();
|
||||
reads.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 3:14:56 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Represents a view into given data.
|
||||
*/
|
||||
public interface View {
|
||||
/**
|
||||
* Inform this view that the data provided to it no longer exists.
|
||||
*/
|
||||
public void close();
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:55:26 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A pool of open iterators. Currently highly specialized to RODs, but could theoretically be
|
||||
* generalized to a pool of arbitrary seekable, closeable iterators. Not thread-safe.
|
||||
*/
|
||||
class IteratorPool {
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
|
||||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
private List<ReferenceOrderedData.RODIterator> allIterators = new ArrayList<ReferenceOrderedData.RODIterator>();
|
||||
|
||||
/**
|
||||
* All iterators that are not currently in service.
|
||||
*/
|
||||
private List<ReferenceOrderedData.RODIterator> availableIterators = new ArrayList<ReferenceOrderedData.RODIterator>();
|
||||
|
||||
/**
|
||||
* Create a new iterator pool given the current ROD.
|
||||
* @param rod Reference-ordered data.
|
||||
*/
|
||||
public IteratorPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
|
||||
this.rod = rod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an iterator whose position is before the specified location. Create a new one if none exists.
|
||||
* @param position Target position for the iterator.
|
||||
* @return
|
||||
*/
|
||||
public ReferenceOrderedData.RODIterator iterator( GenomeLoc position ) {
|
||||
// Grab the first iterator in the list whose position is before the requested position.
|
||||
ReferenceOrderedData.RODIterator selectedIterator = null;
|
||||
for( ReferenceOrderedData.RODIterator iterator: availableIterators ) {
|
||||
if( (iterator.position() == null && iterator.hasNext()) ||
|
||||
(iterator.position() != null && iterator.position().isBefore(position)) ) {
|
||||
selectedIterator = iterator;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// No iterator found? Create another. It is expected that
|
||||
// each iterator created will have its own file handle.
|
||||
if( selectedIterator == null ) {
|
||||
selectedIterator = rod.iterator();
|
||||
allIterators.add(selectedIterator);
|
||||
}
|
||||
|
||||
// Remove the iterator from the list of available iterators.
|
||||
if( availableIterators.contains(selectedIterator) )
|
||||
availableIterators.remove(selectedIterator);
|
||||
|
||||
return selectedIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the given iterator, returning it to the pool.
|
||||
* @param iterator Iterator to return to the pool.
|
||||
*/
|
||||
public void close( ReferenceOrderedData.RODIterator iterator ) {
|
||||
if( !allIterators.contains(iterator) )
|
||||
throw new StingException("Iterator does not belong to the given pool.");
|
||||
availableIterators.add(iterator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of total iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of total iterators.
|
||||
*/
|
||||
int numIterators() {
|
||||
return allIterators.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of available iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of available iterators.
|
||||
*/
|
||||
int numAvailableIterators() {
|
||||
return availableIterators.size();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 4:33:10 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Class ReferenceMetaDataSource
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class ReferenceMetaDataSource implements SimpleDataSource {
|
||||
|
||||
// our enumerated types
|
||||
public enum RODTYPE {
|
||||
DBSNP, HAPMAP
|
||||
}
|
||||
|
||||
// these could go on the stack, but a heap copy isn't too bad
|
||||
private List<ReferenceOrderedDatum> myData = null;
|
||||
private List<ReferenceOrderedData<? extends ReferenceOrderedDatum>.RODIterator> rodIters = null;
|
||||
private List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = null;
|
||||
|
||||
/**
|
||||
* Prepare the list of reference ordered data iterators for each of the rods
|
||||
*
|
||||
* @return A list of ROD iterators for getting data from each ROD
|
||||
*/
|
||||
protected List<ReferenceOrderedData<? extends ReferenceOrderedDatum>.RODIterator> initializeRODs() {
|
||||
// set up reference ordered data
|
||||
rodIters = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>.RODIterator>();
|
||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> data : rods) {
|
||||
rodIters.add(data.iterator());
|
||||
}
|
||||
return rodIters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a list of the reference ordered datum at loc from each of the iterators. This function
|
||||
* assumes you are accessing the data in order. You can't use this function for random access. Each
|
||||
* successive call moves you along the file, consuming all data before loc.
|
||||
*
|
||||
* @param rodIters Iterators to access the RODs
|
||||
* @param loc The location to get the rods at
|
||||
* @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list
|
||||
*/
|
||||
protected List<ReferenceOrderedDatum> getReferenceOrderedDataAtLocus(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>.RODIterator> rodIters,
|
||||
final GenomeLoc loc) {
|
||||
List<ReferenceOrderedDatum> data = new ArrayList<ReferenceOrderedDatum>();
|
||||
for (ReferenceOrderedData<? extends ReferenceOrderedDatum>.RODIterator iter : rodIters) {
|
||||
data.add(iter.seekForward(loc));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the data source for a region of interest, specified by the genome location.
|
||||
* The iterator will generate successive calls
|
||||
*
|
||||
* @param shard the genome location to extract data for
|
||||
* @return an iterator of the appropriate type, that is limited by the region
|
||||
*/
|
||||
public Iterator<ReferenceOrderedDatum> seek(Shard shard) {
|
||||
if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
myData = getReferenceOrderedDataAtLocus(rodIters, shard.getGenomeLoc());
|
||||
return myData.iterator();
|
||||
} else {
|
||||
throw new StingException("ReferenceMetaDataSource can only take LocusShards");
|
||||
}
|
||||
}
|
||||
|
||||
public ReferenceMetaDataSource(HashMap<String, RODTYPE> files) {
|
||||
/*
|
||||
// setup a rod list
|
||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
||||
|
||||
// cycle through the passed in rod's
|
||||
|
||||
Set<String> fileNames = files.keySet();
|
||||
for (String file : fileNames) {
|
||||
switch (files.get(file)) {
|
||||
|
||||
case DBSNP: {
|
||||
ReferenceOrderedData<rodDbSNP> dbsnp = new ReferenceOrderedData<rodDbSNP>(new File(file), rodDbSNP.class);
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
}
|
||||
case HAPMAP: {
|
||||
ReferenceOrderedData<HapMapAlleleFrequenciesROD> hapmap = new ReferenceOrderedData<HapMapAlleleFrequenciesROD>(new File(file), HapMapAlleleFrequenciesROD.class);
|
||||
//dbsnp.testMe();
|
||||
rods.add(hapmap); // { gff, dbsnp };
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:04:12 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A data source which provides a single type of reference-ordered data.
|
||||
*/
|
||||
public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||
/**
|
||||
* The reference-ordered data itself.
|
||||
*/
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
|
||||
/**
|
||||
* A pool of iterators for navigating through the genome.
|
||||
*/
|
||||
private IteratorPool iteratorPool = null;
|
||||
|
||||
/**
|
||||
* Create a new reference-ordered data source.
|
||||
* @param rod
|
||||
*/
|
||||
public ReferenceOrderedDataSource( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod) {
|
||||
this.rod = rod;
|
||||
this.iteratorPool = new IteratorPool( rod );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the name of the underlying reference-ordered data.
|
||||
* @return Name of the underlying rod.
|
||||
*/
|
||||
public String getName() {
|
||||
return this.rod.getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to the specified position and return an iterator through the data.
|
||||
* @param shard Shard that points to the selected position.
|
||||
* @return Iterator through the data.
|
||||
*/
|
||||
public Iterator seek( Shard shard ) {
|
||||
ReferenceOrderedData.RODIterator iterator = iteratorPool.iterator(shard.getGenomeLoc());
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the specified iterator, returning it to the pool.
|
||||
* @param iterator Iterator to close.
|
||||
*/
|
||||
public void close( ReferenceOrderedData.RODIterator iterator ) {
|
||||
this.iteratorPool.close(iterator);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseReads;
|
||||
|
|
@ -24,6 +25,9 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -45,6 +49,7 @@ public abstract class MicroScheduler {
|
|||
protected final IndexedFastaSequenceFile reference;
|
||||
|
||||
private final SAMDataSource reads;
|
||||
private final List<ReferenceOrderedDataSource> rods;
|
||||
|
||||
/**
|
||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||
|
|
@ -78,6 +83,7 @@ public abstract class MicroScheduler {
|
|||
|
||||
this.reads = getReadsDataSource( reads );
|
||||
this.reference = openReferenceSequenceFile( refFile );
|
||||
this.rods = getReferenceOrderedDataSources( rods );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -141,7 +147,7 @@ public abstract class MicroScheduler {
|
|||
* @return An accessor for all the data in this shard.
|
||||
*/
|
||||
protected ShardDataProvider getShardDataProvider( Shard shard ) {
|
||||
return new ShardDataProvider( shard, reads, reference );
|
||||
return new ShardDataProvider( shard, reads, reference, rods );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -163,6 +169,17 @@ public abstract class MicroScheduler {
|
|||
return dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the reference-ordered data sources.
|
||||
* @return A list of reference-ordered data sources.
|
||||
*/
|
||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources( List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod: rods )
|
||||
dataSources.add( new ReferenceOrderedDataSource(rod) );
|
||||
return dataSources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens a reference sequence file paired with an index.
|
||||
* @param refFile Handle to a reference sequence file. Non-null.
|
||||
|
|
|
|||
|
|
@ -303,6 +303,16 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
return prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current position of this iterator.
|
||||
* @return Current position of the iterator, or null if no position exists.
|
||||
*/
|
||||
public GenomeLoc position() {
|
||||
if( prev != null )
|
||||
return prev.getLocation();
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seeks forward in the file until we reach (or cross) a record at contig / pos
|
||||
* If we don't find anything and cross beyond contig / pos, we return null
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
|||
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -27,8 +28,6 @@ import java.io.File;
|
|||
* A simple, short-term solution to iterating over all reference positions over a series of
|
||||
* genomic locations. Simply overloads the superclass traverse function to go over the entire
|
||||
* interval's reference positions.
|
||||
* mhanna - Added better data source integration.
|
||||
* TODO: Gain confidence in this implementation and remove the original.
|
||||
*/
|
||||
public class TraverseLoci extends TraversalEngine {
|
||||
|
||||
|
|
@ -62,6 +61,7 @@ public class TraverseLoci extends TraversalEngine {
|
|||
|
||||
LocusIterator locusIterator = null;
|
||||
LocusContextQueue locusContextQueue = null;
|
||||
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
|
||||
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
switch( dataSource ) {
|
||||
|
|
@ -85,7 +85,7 @@ public class TraverseLoci extends TraversalEngine {
|
|||
TraversalStatistics.nRecords++;
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site );
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(site);
|
||||
|
||||
LocusContext locus = locusContextQueue.seek( site ).peek();
|
||||
char refBase = dataProvider.getReferenceBase( site );
|
||||
|
|
|
|||
|
|
@ -0,0 +1,197 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 11:03:04 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test the contents and number of iterators in the pool.
|
||||
*/
|
||||
|
||||
public class IteratorPoolTest extends BaseTest {
|
||||
|
||||
private static File sequenceFile = new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta");
|
||||
|
||||
private ReferenceOrderedData<? extends ReferenceOrderedDatum> rod = null;
|
||||
|
||||
private final GenomeLoc testSite1 = new GenomeLoc("chrM",10);
|
||||
private final GenomeLoc testSite2 = new GenomeLoc("chrM",20);
|
||||
private final GenomeLoc testSite3 = new GenomeLoc("chrM",30);
|
||||
|
||||
@BeforeClass
|
||||
public static void init() throws FileNotFoundException {
|
||||
GenomeLoc.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
|
||||
TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX);
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
||||
TabularROD datum = (TabularROD)iterator.next();
|
||||
|
||||
assertTrue(datum.getLocation().equals(testSite1));
|
||||
assertTrue(datum.get("COL1").equals("A"));
|
||||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedData.RODIterator iterator1 = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
ReferenceOrderedData.RODIterator iterator2 = iteratorPool.iterator( testSite2 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
||||
// Test out-of-order access: first iterator2, then iterator1.
|
||||
// Ugh...first call to a region needs to be a seek.
|
||||
TabularROD datum = (TabularROD)iterator2.seekForward(testSite2);
|
||||
assertTrue(datum.getLocation().equals(testSite2));
|
||||
assertTrue(datum.get("COL1").equals("C"));
|
||||
assertTrue(datum.get("COL2").equals("D"));
|
||||
assertTrue(datum.get("COL3").equals("E"));
|
||||
|
||||
datum = (TabularROD)iterator1.next();
|
||||
assertTrue(datum.getLocation().equals(testSite1));
|
||||
assertTrue(datum.get("COL1").equals("A"));
|
||||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
// Advance iterator2, and make sure both iterator's contents are still correct.
|
||||
datum = (TabularROD)iterator2.next();
|
||||
assertTrue(datum.getLocation().equals(testSite3));
|
||||
assertTrue(datum.get("COL1").equals("F"));
|
||||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
datum = (TabularROD)iterator1.next();
|
||||
assertTrue(datum.getLocation().equals(testSite2));
|
||||
assertTrue(datum.get("COL1").equals("C"));
|
||||
assertTrue(datum.get("COL2").equals("D"));
|
||||
assertTrue(datum.get("COL3").equals("E"));
|
||||
|
||||
// Cleanup, and make sure the number of iterators dies appropriately.
|
||||
iteratorPool.close(iterator1);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
|
||||
iteratorPool.close(iterator2);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIteratorConservation() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
||||
TabularROD datum = (TabularROD)iterator.next();
|
||||
assertTrue(datum.getLocation().equals(testSite1));
|
||||
assertTrue(datum.get("COL1").equals("A"));
|
||||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(testSite3);
|
||||
|
||||
// Make sure that the previously acquired iterator was reused.
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
||||
datum = (TabularROD)iterator.seekForward(testSite3);
|
||||
assertTrue(datum.getLocation().equals(testSite3));
|
||||
assertTrue(datum.get("COL1").equals("F"));
|
||||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIteratorCreation() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedData.RODIterator iterator = (ReferenceOrderedData.RODIterator)iteratorPool.iterator( testSite3 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
||||
TabularROD datum = (TabularROD)iterator.seekForward(testSite3);
|
||||
assertTrue(datum.getLocation().equals(testSite3));
|
||||
assertTrue(datum.get("COL1").equals("F"));
|
||||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(testSite1);
|
||||
|
||||
// Make sure that the previously acquired iterator was reused.
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
|
||||
datum = (TabularROD)iterator.next();
|
||||
assertTrue(datum.getLocation().equals(testSite1));
|
||||
assertTrue(datum.get("COL1").equals("A"));
|
||||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -135,7 +135,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
|
||||
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
|
||||
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue