Pooling resources to lower memory consumption.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@962 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
87ba8b3451
commit
dc6a9ca196
|
|
@ -130,7 +130,7 @@ public class GATKArgumentCollection {
|
|||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation (LENIENT|SILENT|STRICT)", required = false)
|
||||
public String strictnessLevel = "strict";
|
||||
public String strictnessLevel = "silent";
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
|
||||
|
|
|
|||
|
|
@ -8,8 +8,6 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.IntervalRodIterator;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -112,7 +110,7 @@ public class GenomeAnalysisEngine {
|
|||
logger.info("Strictness is " + strictness);
|
||||
|
||||
// perform validation steps that are common to all the engines
|
||||
genericEngineSetup(strictness);
|
||||
genericEngineSetup();
|
||||
|
||||
// parse out any genomic location they've provided
|
||||
//List<GenomeLoc> locationsList = setupIntervalRegion();
|
||||
|
|
@ -154,7 +152,7 @@ public class GenomeAnalysisEngine {
|
|||
ValidationStringency strictness = getValidationStringency();
|
||||
|
||||
logger.info("Strictness is " + strictness);
|
||||
genericEngineSetup(strictness);
|
||||
genericEngineSetup();
|
||||
|
||||
// store the results of the walker run
|
||||
walkerReturn = engine.traverse(my_walker);
|
||||
|
|
@ -177,13 +175,13 @@ public class GenomeAnalysisEngine {
|
|||
// create the MicroScheduler
|
||||
if( argCollection.walkAllLoci )
|
||||
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
|
||||
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
}
|
||||
else if (my_walker instanceof ReadWalker) {
|
||||
if (argCollection.referenceFile == null)
|
||||
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
||||
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
}
|
||||
|
||||
|
|
@ -193,11 +191,11 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
/**
|
||||
* commands that get executed for each engine, regardless of the type
|
||||
*
|
||||
* @param strictness our current strictness level
|
||||
*/
|
||||
private void genericEngineSetup(ValidationStringency strictness) {
|
||||
engine.setStrictness(strictness);
|
||||
private void genericEngineSetup() {
|
||||
Reads sourceInfo = extractSourceInfoFromArguments(argCollection);
|
||||
|
||||
engine.setStrictness(sourceInfo.getValidationStringency());
|
||||
|
||||
engine.setMaxReads(argCollection.maximumEngineIterations);
|
||||
engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads);
|
||||
|
|
@ -207,7 +205,7 @@ public class GenomeAnalysisEngine {
|
|||
engine.setLocation(parseIntervalRegion(argCollection.intervals, false));
|
||||
}
|
||||
|
||||
engine.setReadFilters(new Reads(argCollection));
|
||||
engine.setReadFilters(sourceInfo);
|
||||
|
||||
engine.setThreadedIO(argCollection.enabledThreadedIO);
|
||||
engine.setWalkOverAllSites(argCollection.walkAllLoci);
|
||||
|
|
@ -233,6 +231,21 @@ public class GenomeAnalysisEngine {
|
|||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bundles all the source information about the reads into a unified data structure.
|
||||
* @param argCollection The collection of arguments passed to the engine.
|
||||
* @return The reads object providing reads source info.
|
||||
*/
|
||||
private Reads extractSourceInfoFromArguments( GATKArgumentCollection argCollection ) {
|
||||
return new Reads( argCollection.samFiles,
|
||||
getValidationStringency(),
|
||||
argCollection.downsampleFraction,
|
||||
argCollection.downsampleCoverage,
|
||||
argCollection.maximumReadSorts,
|
||||
!argCollection.unsafe,
|
||||
argCollection.filterZeroMappingQualityReads );
|
||||
}
|
||||
|
||||
private void validateInputsAgainstWalker(Walker walker,
|
||||
GATKArgumentCollection arguments,
|
||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
|
|
@ -275,12 +288,12 @@ public class GenomeAnalysisEngine {
|
|||
* @return the validation stringency
|
||||
*/
|
||||
private ValidationStringency getValidationStringency() {
|
||||
ValidationStringency strictness;
|
||||
ValidationStringency strictness = ValidationStringency.SILENT;
|
||||
try {
|
||||
strictness = Enum.valueOf(ValidationStringency.class, argCollection.strictnessLevel);
|
||||
strictness = Enum.valueOf(ValidationStringency.class, argCollection.strictnessLevel.toUpperCase().trim());
|
||||
}
|
||||
catch (IllegalArgumentException ex) {
|
||||
strictness = ValidationStringency.STRICT;
|
||||
logger.debug("Unable to parse strictness from command line. Assuming SILENT");
|
||||
}
|
||||
return strictness;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
|
|
@ -25,7 +27,7 @@ import java.util.List;
|
|||
*/
|
||||
public class Reads {
|
||||
private List<File> readsFiles = null;
|
||||
|
||||
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
||||
private Double downsamplingFraction = null;
|
||||
private Integer downsampleToCoverage = null;
|
||||
private Integer maxOnFlySorts = null;
|
||||
|
|
@ -40,6 +42,14 @@ public class Reads {
|
|||
return readsFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* How strict should validation be?
|
||||
* @return Stringency of validation.
|
||||
*/
|
||||
public SAMFileReader.ValidationStringency getValidationStringency() {
|
||||
return validationStringency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of reads to downsample.
|
||||
* @return Downsample fraction.
|
||||
|
|
@ -88,14 +98,27 @@ public class Reads {
|
|||
* Extract the command-line arguments having to do with reads input
|
||||
* files and store them in an easy-to-work-with package. Constructor
|
||||
* is package protected.
|
||||
* @param arguments GATK parsed command-line arguments.
|
||||
* @param samFiles list of reads files.
|
||||
* @param strictness Stringency of reads file parsing.
|
||||
* @param downsampleFraction fraction of reads to downsample.
|
||||
* @param downsampleCoverage downsampling per-locus.
|
||||
* @param maxOnFlySorts how many sorts to perform on-the-fly.
|
||||
* @param beSafe Whether to enable safety checking.
|
||||
* @param filterZeroMappingQualityReads whether to filter zero mapping quality reads.
|
||||
*/
|
||||
Reads( GATKArgumentCollection arguments ) {
|
||||
this.readsFiles = arguments.samFiles;
|
||||
if (arguments.downsampleFraction != null) downsamplingFraction = arguments.downsampleFraction;
|
||||
if (arguments.downsampleCoverage != null) downsampleToCoverage = arguments.downsampleCoverage;
|
||||
if (arguments.maximumReadSorts != null) maxOnFlySorts = arguments.maximumReadSorts;
|
||||
if (arguments.filterZeroMappingQualityReads != null) filterZeroMappingQualityReads = arguments.filterZeroMappingQualityReads;
|
||||
beSafe = !arguments.unsafe;
|
||||
Reads( List<File> samFiles,
|
||||
SAMFileReader.ValidationStringency strictness,
|
||||
Double downsampleFraction,
|
||||
Integer downsampleCoverage,
|
||||
Integer maxOnFlySorts,
|
||||
Boolean beSafe,
|
||||
Boolean filterZeroMappingQualityReads ) {
|
||||
this.readsFiles = samFiles;
|
||||
this.validationStringency = strictness;
|
||||
this.downsamplingFraction = downsampleFraction;
|
||||
this.downsampleToCoverage = downsampleCoverage;
|
||||
this.maxOnFlySorts = maxOnFlySorts;
|
||||
this.beSafe = beSafe;
|
||||
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,107 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:55:26 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A pool of open iterators. Currently highly specialized to RODs, but could theoretically be
|
||||
* generalized to a pool of arbitrary seekable, closeable iterators. Not thread-safe.
|
||||
*/
|
||||
class IteratorPool {
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
|
||||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
private List<RODIterator> allIterators = new ArrayList<RODIterator>();
|
||||
|
||||
/**
|
||||
* All iterators that are not currently in service.
|
||||
*/
|
||||
private List<RODIterator> availableIterators = new ArrayList<RODIterator>();
|
||||
|
||||
/**
|
||||
* Create a new iterator pool given the current ROD.
|
||||
* @param rod Reference-ordered data.
|
||||
*/
|
||||
public IteratorPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
|
||||
this.rod = rod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an iterator whose position is before the specified location. Create a new one if none exists.
|
||||
* @param position Target position for the iterator.
|
||||
* @return
|
||||
*/
|
||||
public RODIterator iterator( GenomeLoc position ) {
|
||||
// Grab the first iterator in the list whose position is before the requested position.
|
||||
RODIterator selectedIterator = null;
|
||||
for( RODIterator iterator: availableIterators ) {
|
||||
if( (iterator.position() == null && iterator.hasNext()) ||
|
||||
(iterator.position() != null && iterator.position().isBefore(position)) ) {
|
||||
selectedIterator = iterator;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// No iterator found? Create another. It is expected that
|
||||
// each iterator created will have its own file handle.
|
||||
if( selectedIterator == null ) {
|
||||
selectedIterator = rod.iterator();
|
||||
allIterators.add(selectedIterator);
|
||||
}
|
||||
|
||||
// Remove the iterator from the list of available iterators.
|
||||
if( availableIterators.contains(selectedIterator) )
|
||||
availableIterators.remove(selectedIterator);
|
||||
|
||||
return selectedIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the given iterator, returning it to the pool.
|
||||
* @param iterator Iterator to return to the pool.
|
||||
*/
|
||||
public void close( RODIterator iterator ) {
|
||||
if( !allIterators.contains(iterator) )
|
||||
throw new StingException("Iterator does not belong to the given pool.");
|
||||
availableIterators.add(iterator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of total iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of total iterators.
|
||||
*/
|
||||
int numIterators() {
|
||||
return allIterators.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of available iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of available iterators.
|
||||
*/
|
||||
int numAvailableIterators() {
|
||||
return availableIterators.size();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -5,11 +5,9 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|||
import org.broadinstitute.sting.gatk.refdata.RODIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
|
|
@ -30,20 +28,20 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* The reference-ordered data itself.
|
||||
*/
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
private final ReferenceOrderedData rod;
|
||||
|
||||
/**
|
||||
* A pool of iterators for navigating through the genome.
|
||||
*/
|
||||
private IteratorPool iteratorPool = null;
|
||||
private ReferenceOrderedDataPool iteratorPool = null;
|
||||
|
||||
/**
|
||||
* Create a new reference-ordered data source.
|
||||
* @param rod
|
||||
*/
|
||||
public ReferenceOrderedDataSource( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod) {
|
||||
public ReferenceOrderedDataSource( ReferenceOrderedData rod) {
|
||||
this.rod = rod;
|
||||
this.iteratorPool = new IteratorPool( rod );
|
||||
this.iteratorPool = new ReferenceOrderedDataPool( rod );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -69,7 +67,59 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @param iterator Iterator to close.
|
||||
*/
|
||||
public void close( RODIterator iterator ) {
|
||||
this.iteratorPool.close(iterator);
|
||||
this.iteratorPool.release(iterator);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<RODIterator,RODIterator> {
|
||||
private final ReferenceOrderedData<? extends ReferenceOrderedDatum> rod;
|
||||
|
||||
public ReferenceOrderedDataPool( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod ) {
|
||||
this.rod = rod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new iterator from the existing reference-ordered data. This new iterator is expected
|
||||
* to be completely independent of any other iterator.
|
||||
* @param position @{inheritedDoc}
|
||||
* @return The newly created resource.
|
||||
*/
|
||||
public RODIterator createNewResource( GenomeLoc position ) {
|
||||
return rod.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as
|
||||
* the first one encountered that is at or before the given position.
|
||||
* @param position @{inheritedDoc}
|
||||
* @param resources @{inheritedDoc}
|
||||
* @return @{inheritedDoc}
|
||||
*/
|
||||
public RODIterator selectBestExistingResource( GenomeLoc position, List<RODIterator> resources ) {
|
||||
for( RODIterator iterator: resources ) {
|
||||
if( (iterator.position() == null && iterator.hasNext()) ||
|
||||
(iterator.position() != null && iterator.position().isBefore(position)) )
|
||||
return iterator;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, the iterator is the resource. Pass it through.
|
||||
*/
|
||||
public RODIterator createIteratorFromResource( GenomeLoc position, RODIterator resource ) {
|
||||
return resource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Don't worry about closing the resource; let the file handles expire naturally for the moment.
|
||||
*/
|
||||
public void closeResource( RODIterator resource ) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
* Time: 10:55:26 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A pool of open resources, all of which can create a closeable iterator.
|
||||
*/
|
||||
abstract class ResourcePool <T,I extends Iterator> {
|
||||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
private List<T> allResources = new ArrayList<T>();
|
||||
|
||||
/**
|
||||
* All iterators that are not currently in service.
|
||||
*/
|
||||
private List<T> availableResources = new ArrayList<T>();
|
||||
|
||||
/**
|
||||
* Which iterators are assigned to which pools.
|
||||
*/
|
||||
private Map<I,T> resourceAssignments = new HashMap<I,T>();
|
||||
|
||||
/**
|
||||
* Get an iterator whose position is before the specified location. Create a new one if none exists.
|
||||
* @param position Target position for the iterator.
|
||||
* @return An iterator that can traverse the selected region. Should be able to iterate concurrently with other
|
||||
* iterators from tihs pool.
|
||||
*/
|
||||
public I iterator( GenomeLoc position ) {
|
||||
// Grab the first iterator in the list whose position is before the requested position.
|
||||
T selectedResource = null;
|
||||
synchronized(this) {
|
||||
selectedResource = selectBestExistingResource( position, availableResources );
|
||||
|
||||
// Remove the iterator from the list of available iterators.
|
||||
if( selectedResource != null )
|
||||
availableResources.remove(selectedResource);
|
||||
}
|
||||
|
||||
// No iterator found? Create another. It is expected that
|
||||
// each iterator created will have its own file handle.
|
||||
if( selectedResource == null ) {
|
||||
selectedResource = createNewResource(position);
|
||||
addNewResource( selectedResource );
|
||||
}
|
||||
|
||||
I iterator = createIteratorFromResource( position, selectedResource );
|
||||
|
||||
// Make a note of this assignment for proper releasing later.
|
||||
resourceAssignments.put( iterator, selectedResource );
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the lock on the given iterator, returning it to the pool.
|
||||
* @param iterator Iterator to return to the pool.
|
||||
*/
|
||||
public void release( I iterator ) {
|
||||
synchronized(this) {
|
||||
// Find and remove the resource from the list of allocated resources.
|
||||
T resource = resourceAssignments.get( iterator );
|
||||
resourceAssignments.remove(resource);
|
||||
|
||||
// Return the resource to the pool.
|
||||
if( !allResources.contains(resource) )
|
||||
throw new StingException("Iterator does not belong to the given pool.");
|
||||
availableResources.add(resource);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a resource to the list of available resources. Useful if derived classes
|
||||
* want to seed the pool with a set of at a given time (like at initialization).
|
||||
* @param resource The new resource to add.
|
||||
*/
|
||||
protected void addNewResource( T resource ) {
|
||||
synchronized(this) {
|
||||
allResources.add(resource);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If no appropriate resources are found in the pool, the system can create a new resource.
|
||||
* Delegate the creation of the resource to the subclass.
|
||||
* @param position Position for the new resource. This information may or may not inform the new resource.
|
||||
* @return The new resource created.
|
||||
*/
|
||||
protected abstract T createNewResource( GenomeLoc position );
|
||||
|
||||
/**
|
||||
* Find the most appropriate resource to acquire the specified data.
|
||||
* @param position The data over which the resource is required.
|
||||
* @param availableResources A list of candidate resources to evaluate.
|
||||
* @return The best choice of the availableResources, or null if no resource meets the criteria.
|
||||
*/
|
||||
protected abstract T selectBestExistingResource( GenomeLoc position, List<T> availableResources );
|
||||
|
||||
/**
|
||||
* Create an iterator over the specified resource.
|
||||
* @param position The bounds of iteration. The first element of the iterator through the last element should all
|
||||
* be in the range described by position.
|
||||
* @return A new iterator over the given data.
|
||||
*/
|
||||
protected abstract I createIteratorFromResource( GenomeLoc position, T resource );
|
||||
|
||||
/**
|
||||
* Retire this resource from service.
|
||||
* @param resource The resource to retire.
|
||||
*/
|
||||
protected abstract void closeResource(T resource);
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of total iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of total iterators.
|
||||
*/
|
||||
int numIterators() {
|
||||
return allResources.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Operating stats...get the number of available iterators. Package-protected
|
||||
* for unit testing.
|
||||
* @return An integer number of available iterators.
|
||||
*/
|
||||
int numAvailableIterators() {
|
||||
return availableResources.size();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
|
||||
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
|
@ -18,6 +19,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
|
|
@ -54,22 +56,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
private boolean intoUnmappedReads = false;
|
||||
private int readsSeenAtLastPos = 0;
|
||||
|
||||
|
||||
/**
|
||||
* package protected getter and setter for the iterator generator
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
IteratorGenerator getIterGen() {
|
||||
return iterGen;
|
||||
}
|
||||
|
||||
void setIterGen( IteratorGenerator iterGen ) {
|
||||
this.iterGen = iterGen;
|
||||
}
|
||||
|
||||
// where we get out iterators from
|
||||
private IteratorGenerator iterGen;
|
||||
// A pool of SAM iterators.
|
||||
private SAMIteratorPool iteratorPool = null;
|
||||
|
||||
/**
|
||||
* constructor, given sam files
|
||||
|
|
@ -91,8 +79,15 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||
}
|
||||
}
|
||||
iterGen = new MSR2IteratorGenerator(reads, byReads);
|
||||
iteratorPool = new SAMIteratorPool(reads,byReads);
|
||||
}
|
||||
|
||||
/**
|
||||
* For unit testing, add a custom iterator pool.
|
||||
* @param iteratorPool Custom mock iterator pool.
|
||||
*/
|
||||
void setResourcePool( SAMIteratorPool iteratorPool ) {
|
||||
this.iteratorPool = iteratorPool;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -105,11 +100,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
*
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
||||
// make a merging iterator for this record
|
||||
CloseableIterator<SAMRecord> iter = iterGen.seek(location);
|
||||
// return the iterator
|
||||
return StingSAMIteratorAdapter.adapt(reads, iter);
|
||||
public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||
return iteratorPool.iterator(location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -153,7 +145,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @return SAM file header.
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.iterGen.getHeader();
|
||||
return iteratorPool.getHeader();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -169,16 +161,16 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
private BoundedReadIterator seekRead( ReadShard shard ) throws SimpleDataSourceLoadException {
|
||||
|
||||
BoundedReadIterator bound = null;
|
||||
CloseableIterator<SAMRecord> iter = null;
|
||||
StingSAMIterator iter = null;
|
||||
|
||||
if (!intoUnmappedReads) {
|
||||
if (lastReadPos == null) {
|
||||
lastReadPos = new GenomeLoc(iterGen.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
iter = iterGen.seek(lastReadPos);
|
||||
lastReadPos = new GenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
iter = iteratorPool.iterator(lastReadPos);
|
||||
return InitialReadIterator(shard.getSize(), iter);
|
||||
} else {
|
||||
lastReadPos.setStop(-1);
|
||||
iter = iterGen.seek(lastReadPos);
|
||||
iter = iteratorPool.iterator(lastReadPos);
|
||||
bound = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iter));
|
||||
}
|
||||
}
|
||||
|
|
@ -187,8 +179,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
if (iter != null) {
|
||||
iter.close();
|
||||
}
|
||||
iter = iterGen.seek(null);
|
||||
bound = toUnmappedReads(shard.getSize(), (PeekingStingIterator) iter);
|
||||
iter = iteratorPool.iterator(null);
|
||||
bound = toUnmappedReads(shard.getSize(), iter);
|
||||
}
|
||||
if (bound == null) {
|
||||
shard.signalDone();
|
||||
|
|
@ -219,39 +211,41 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||
* @throws SimpleDataSourceLoadException
|
||||
*/
|
||||
<U extends PeekingStingIterator> BoundedReadIterator toUnmappedReads( long readCount, U iter ) throws SimpleDataSourceLoadException {
|
||||
BoundedReadIterator toUnmappedReads( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException {
|
||||
PeekableIterator<SAMRecord> peekable = new PeekableIterator<SAMRecord>(iter);
|
||||
|
||||
int count = 0;
|
||||
int cnt = 0;
|
||||
SAMRecord d = null;
|
||||
while (iter.hasNext()) {
|
||||
d = iter.peek();
|
||||
while (peekable.hasNext()) {
|
||||
d = peekable.peek();
|
||||
int x = d.getReferenceIndex();
|
||||
if (x < 0)
|
||||
// we have the magic read that starts the unmapped read segment!
|
||||
break;
|
||||
cnt++;
|
||||
iter.next();
|
||||
peekable.next();
|
||||
}
|
||||
|
||||
// check to see what happened, did we run out of reads?
|
||||
if (!iter.hasNext()) {
|
||||
if (!peekable.hasNext()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// now walk until we've taken the unmapped read count
|
||||
while (iter.hasNext() && count < this.readsTaken) {
|
||||
iter.next();
|
||||
while (peekable.hasNext() && count < this.readsTaken) {
|
||||
peekable.next();
|
||||
count++;
|
||||
}
|
||||
|
||||
// check to see what happened, did we run out of reads?
|
||||
if (!iter.hasNext()) {
|
||||
if (!peekable.hasNext()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// we're not out of unmapped reads, so increment our read cout
|
||||
this.readsTaken += readCount;
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, peekable), readCount);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -293,7 +287,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
readsTaken = readCount;
|
||||
readsSeenAtLastPos = 0;
|
||||
lastReadPos.setStop(-1);
|
||||
CloseableIterator<SAMRecord> ret = iterGen.seek(lastReadPos);
|
||||
CloseableIterator<SAMRecord> ret = iteratorPool.iterator(lastReadPos);
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
|
||||
}
|
||||
}
|
||||
|
|
@ -360,42 +354,81 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
return bound;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
class SAMIteratorPool extends ResourcePool<SamFileHeaderMerger,StingSAMIterator> {
|
||||
/**
|
||||
* Source information about the reads.
|
||||
*/
|
||||
protected Reads reads;
|
||||
|
||||
/**
|
||||
* Is this a by-reads traversal or a by-locus?
|
||||
*/
|
||||
protected boolean byReads;
|
||||
|
||||
/**
|
||||
* File header for the combined file.
|
||||
*/
|
||||
protected SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* iterator generator
|
||||
* <p/>
|
||||
* This class generates iterators for the SAMDataSource. This class was introduced for testing purposes,
|
||||
* since it became increasingly hard to test the SAM data source code. The class defines two abstraact
|
||||
* methods:
|
||||
* <p/>
|
||||
* -seek( GenomeLoc ) which returns an iterator seeked to the genome loc, and if null is passed to the default
|
||||
* location (which is implementation specific).
|
||||
* <p/>
|
||||
* -getHeader(), which returns a SAMFileHeader for the specified IteratorGenerator. I hope we can phase this
|
||||
* method out, since it doesn't seem necessary, and it would be much cleaner with out it.
|
||||
*/
|
||||
abstract class IteratorGenerator {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
|
||||
|
||||
public SAMIteratorPool( Reads reads, boolean byReads ) {
|
||||
this.reads = reads;
|
||||
this.byReads = byReads;
|
||||
|
||||
SamFileHeaderMerger merger = createNewResource( null );
|
||||
this.header = merger.getMergedHeader();
|
||||
// Add this resource to the pool.
|
||||
this.addNewResource( merger );
|
||||
}
|
||||
|
||||
/**
|
||||
* seek to a location
|
||||
*
|
||||
* @param seekTo the genome loc to seek to
|
||||
*
|
||||
* @return StingSAMIterator
|
||||
* Get the combined header for all files in the iterator pool.
|
||||
*/
|
||||
public abstract CloseableIterator<SAMRecord> seek( GenomeLoc seekTo );
|
||||
public SAMFileHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the merged header
|
||||
*
|
||||
* @return the merged header
|
||||
*/
|
||||
public abstract SAMFileHeader getHeader();
|
||||
protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List<SamFileHeaderMerger> mergers) {
|
||||
if( mergers.size() == 0 )
|
||||
return null;
|
||||
return mergers.get(0);
|
||||
}
|
||||
|
||||
protected SamFileHeaderMerger createNewResource( GenomeLoc position ) {
|
||||
return createHeaderMerger( reads, SAMFileHeader.SortOrder.coordinate );
|
||||
}
|
||||
|
||||
protected StingSAMIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) {
|
||||
final MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2(headerMerger, reads);
|
||||
|
||||
if( loc != null ) {
|
||||
if (byReads)
|
||||
iterator.queryContained(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
||||
else
|
||||
iterator.queryOverlapping(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
||||
}
|
||||
|
||||
return new StingSAMIterator() {
|
||||
public Reads getSourceInfo() { return reads; }
|
||||
public void close() {
|
||||
iterator.close();
|
||||
release(this);
|
||||
}
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
public boolean hasNext() { return iterator.hasNext(); }
|
||||
public SAMRecord next() { return iterator.next(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); }
|
||||
};
|
||||
}
|
||||
|
||||
protected void closeResource( SamFileHeaderMerger resource ) {
|
||||
for( SAMFileReader reader: resource.getReaders() )
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a SAM/BAM, given an input file.
|
||||
|
|
@ -404,7 +437,7 @@ abstract class IteratorGenerator {
|
|||
*
|
||||
* @return a SAMFileReader for the file, null if we're attempting to read a list
|
||||
*/
|
||||
protected static SAMFileReader initializeSAMFile( final File samFile, SAMFileReader.ValidationStringency strictness ) {
|
||||
protected SAMFileReader initializeSAMFile( final File samFile, SAMFileReader.ValidationStringency strictness ) {
|
||||
if (samFile.toString().endsWith(".list")) {
|
||||
return null;
|
||||
} else {
|
||||
|
|
@ -425,11 +458,11 @@ abstract class IteratorGenerator {
|
|||
* @return a list of SAMFileReaders that represent the stored file names
|
||||
* @throws SimpleDataSourceLoadException if there's a problem loading the files
|
||||
*/
|
||||
protected static List<SAMFileReader> GetReaderList( Reads reads, SAMFileReader.ValidationStringency strictness ) throws SimpleDataSourceLoadException {
|
||||
protected List<SAMFileReader> GetReaderList( Reads reads ) throws SimpleDataSourceLoadException {
|
||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
||||
for (File f : reads.getReadsFiles()) {
|
||||
SAMFileReader reader = initializeSAMFile(f, strictness);
|
||||
SAMFileReader reader = initializeSAMFile(f, reads.getValidationStringency());
|
||||
|
||||
if (reader.getFileHeader().getReadGroups().size() < 1) {
|
||||
//logger.warn("Setting header in reader " + f.getName());
|
||||
|
|
@ -453,60 +486,8 @@ abstract class IteratorGenerator {
|
|||
*
|
||||
* @return a SamFileHeaderMerger that includes the set of SAM files we were created with
|
||||
*/
|
||||
protected static SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileReader.ValidationStringency strictness, SAMFileHeader.SortOrder SORT_ORDER ) {
|
||||
List<SAMFileReader> lst = GetReaderList(reads, strictness);
|
||||
protected SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER ) {
|
||||
List<SAMFileReader> lst = GetReaderList(reads);
|
||||
return new SamFileHeaderMerger(lst, SORT_ORDER, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* MSR2IteratorGenerator
|
||||
* <p/>
|
||||
* generates a MerginsSAMIterator2, given a genomic location. The constructor takes the reads structure,
|
||||
* and a flag indicating if we're dealing with reads or loci (to determine the correct query function).
|
||||
*/
|
||||
class MSR2IteratorGenerator extends IteratorGenerator {
|
||||
/** our read pile */
|
||||
private Reads reads;
|
||||
|
||||
private SamFileHeaderMerger header;
|
||||
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
// are we by reads or by loci
|
||||
protected boolean byReads = true;
|
||||
|
||||
/** our SAM data files */
|
||||
private final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
public MSR2IteratorGenerator( Reads reads, boolean byReads ) {
|
||||
this.reads = reads;
|
||||
this.header = IteratorGenerator.createHeaderMerger(reads, strictness, sortOrder);
|
||||
this.byReads = byReads;
|
||||
}
|
||||
|
||||
public CloseableIterator<SAMRecord> seek( GenomeLoc seekTo ) {
|
||||
SamFileHeaderMerger mg = createHeaderMerger(reads, strictness, sortOrder);
|
||||
MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(mg, reads);
|
||||
if (seekTo != null) {
|
||||
if (byReads)
|
||||
iter.queryContained(seekTo.getContig(), (int) seekTo.getStart(), (int) seekTo.getStop());
|
||||
else
|
||||
iter.queryOverlapping(seekTo.getContig(), (int) seekTo.getStart(), (int) seekTo.getStop());
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the merged header
|
||||
*
|
||||
* @return the merged header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return header.getMergedHeader();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -55,9 +55,6 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
// our unmapped read flag
|
||||
private boolean doNotUseThatUnmappedReadPile = false;
|
||||
|
||||
// are we open
|
||||
private boolean isOpen = false;
|
||||
|
||||
// the next read we've buffered
|
||||
private SAMRecord record = null;
|
||||
|
||||
|
|
@ -67,10 +64,6 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
* @param readCount
|
||||
*/
|
||||
public BoundedReadIterator(StingSAMIterator iter, long readCount) {
|
||||
if (iter != null) {
|
||||
isOpen = true;
|
||||
|
||||
}
|
||||
this.iterator = iter;
|
||||
this.readCount = readCount;
|
||||
}
|
||||
|
|
@ -103,7 +96,7 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
* @return
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
if (isOpen && iterator.hasNext() && currentCount < readCount) {
|
||||
if (iterator.hasNext() && currentCount < readCount) {
|
||||
record = iterator.next();
|
||||
++currentCount;
|
||||
if (record.getAlignmentStart() == 0 && doNotUseThatUnmappedReadPile) {
|
||||
|
|
@ -111,9 +104,6 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
}
|
||||
return true;
|
||||
} else {
|
||||
if (isOpen) {
|
||||
close();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -137,7 +127,6 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
* close the iterator
|
||||
*/
|
||||
public void close() {
|
||||
isOpen = false;
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,6 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
|||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
//reader.close();
|
||||
Iterator<SAMRecord> recordIter = reader.query(contig, start, stop, contained);
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
|
|
@ -257,13 +256,12 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
|||
|
||||
|
||||
/**
|
||||
* closes all the file handles for the readers....DO THIS or you will run out of handles
|
||||
* closes all open iterators....DO THIS or you will run out of handles
|
||||
* with sharding.
|
||||
*/
|
||||
public void close() {
|
||||
for (SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
reader.close();
|
||||
}
|
||||
for( ComparableSamRecordIterator iterator: pq )
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import java.io.FileNotFoundException;
|
|||
* Test the contents and number of iterators in the pool.
|
||||
*/
|
||||
|
||||
public class IteratorPoolTest extends BaseTest {
|
||||
public class ReferenceOrderedDataPoolTest extends BaseTest {
|
||||
|
||||
private static File sequenceFile = new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta");
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
RODIterator iterator = (RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
|
|
@ -69,7 +69,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -77,7 +77,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
RODIterator iterator1 = (RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
|
|
@ -114,12 +114,12 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL3").equals("E"));
|
||||
|
||||
// Cleanup, and make sure the number of iterators dies appropriately.
|
||||
iteratorPool.close(iterator1);
|
||||
iteratorPool.release(iterator1);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
|
||||
iteratorPool.close(iterator2);
|
||||
iteratorPool.release(iterator2);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -127,7 +127,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorConservation() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
RODIterator iterator = (RODIterator)iteratorPool.iterator( testSite1 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
|
|
@ -139,7 +139,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(testSite3);
|
||||
|
|
@ -154,7 +154,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -162,7 +162,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorCreation() {
|
||||
IteratorPool iteratorPool = new IteratorPool(rod);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
RODIterator iterator = (RODIterator)iteratorPool.iterator( testSite3 );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
|
|
@ -174,7 +174,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(testSite1);
|
||||
|
|
@ -189,7 +189,7 @@ public class IteratorPoolTest extends BaseTest {
|
|||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.close(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 2, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -22,6 +22,7 @@ import org.junit.Test;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -76,7 +77,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testToUnmappedReads() {
|
||||
ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
try {
|
||||
int unmappedReadsSeen = 0;
|
||||
|
|
@ -109,7 +110,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testShardingOfReadsSize14() {
|
||||
ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 14;
|
||||
try {
|
||||
|
|
@ -118,7 +119,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
|
||||
|
||||
data.setIterGen(gen);
|
||||
data.setResourcePool(gen);
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
||||
while (shardStrategy.hasNext()) {
|
||||
|
||||
|
|
@ -159,7 +160,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testShardingOfReadsSize25() {
|
||||
ArtificialIteratorGenerator gen = new ArtificialIteratorGenerator(1,10,100,1000);
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 25;
|
||||
try {
|
||||
|
|
@ -168,7 +169,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
|
||||
|
||||
data.setIterGen(gen);
|
||||
data.setResourcePool(gen);
|
||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
||||
while (shardStrategy.hasNext()) {
|
||||
|
||||
|
|
@ -212,28 +213,25 @@ public class SAMByReadsTest extends BaseTest {
|
|||
/**
|
||||
* use this to inject into SAMDataSource for testing
|
||||
*/
|
||||
class ArtificialIteratorGenerator extends IteratorGenerator {
|
||||
class ArtificialResourcePool extends SAMIteratorPool {
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
// the header
|
||||
private SAMFileHeader header;
|
||||
private int endingChr;
|
||||
private int startingChr;
|
||||
private int readCount;
|
||||
private int readSize;
|
||||
/** our SAM data files */
|
||||
private final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
public ArtificialIteratorGenerator( int startingChr, int endingChr, int readCount, int readSize) {
|
||||
public ArtificialResourcePool( int startingChr, int endingChr, int readCount, int readSize) {
|
||||
super( new Reads(Collections.<File>emptyList()),true );
|
||||
header = ArtificialSamUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + readSize);
|
||||
|
||||
}
|
||||
|
||||
public CloseableIterator<SAMRecord> seek( GenomeLoc seekTo ) {
|
||||
@Override
|
||||
public StingSAMIterator iterator( GenomeLoc loc ) {
|
||||
ArtificialSAMQueryIterator iter = ArtificialSamUtils.queryReadIterator(1, 10, 100, 1000);
|
||||
if (seekTo != null) {
|
||||
iter.queryContained(seekTo.getContig(), (int)seekTo.getStart(), (int)seekTo.getStop());
|
||||
if (loc != null) {
|
||||
iter.queryContained(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
|
@ -244,6 +242,6 @@ class ArtificialIteratorGenerator extends IteratorGenerator {
|
|||
* @return the merged header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.header;
|
||||
return this.header;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue