added support for the -M option in traversals.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-08 15:12:24 +00:00
parent e2ed56dc96
commit a8a2d0eab9
13 changed files with 214 additions and 157 deletions

View File

@ -105,8 +105,8 @@ public class GATKArgumentCollection {
public Boolean walkAllLoci = false;
@Element(required=false)
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
public String maximumReads = "-1";
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
public Integer maximumEngineIterations = -1;
@Element(required=false)
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
@ -215,7 +215,7 @@ public class GATKArgumentCollection {
if (!other.samFiles.equals(this.samFiles)) {
return false;
}
if (!other.maximumReads.equals(this.maximumReads)) {
if (!other.maximumEngineIterations.equals(this.maximumEngineIterations)) {
return false;
}
if (!other.strictnessLevel.equals(this.strictnessLevel)) {

View File

@ -122,7 +122,7 @@ public class GenomeAnalysisEngine {
locs = GenomeLocSortedSet.createSetFromList(locationsList);
// excute the microscheduler, storing the results
walkerReturn = microScheduler.execute(my_walker, locs);
walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
}
@ -199,7 +199,7 @@ public class GenomeAnalysisEngine {
private void genericEngineSetup(ValidationStringency strictness) {
engine.setStrictness(strictness);
engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));
engine.setMaxReads(argCollection.maximumEngineIterations);
engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads);
// we default interval files over the genome region string

View File

@ -52,8 +52,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
*
* @param dic the seq dictionary
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
super(dic);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
currentExp = 0;
}
@ -76,8 +77,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
* @param startSize the starting size of the shard
* @param lst locations to iterate from
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
super(dic, lst);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
this.currentExp = 0;
}

View File

@ -51,8 +51,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
*
* @param dic the seq dictionary
*/
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
super(dic);
this.limitingFactor = limitByCount;
this.nextShardSize = startSize;
}
@ -73,8 +74,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
* @param startSize the starting size of the shard
* @param lst locations to iterate from
*/
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
super(dic, lst);
this.limitingFactor = limitByCount;
this.nextShardSize = startSize;
}

View File

@ -51,14 +51,18 @@ public abstract class LocusShardStrategy implements ShardStrategy {
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(LocusShardStrategy.class);
/** the number of iterations before we stop */
protected long limitingFactor = -1;
private boolean stopDueToLimitingFactor = false;
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param dic the seq dictionary
*/
LocusShardStrategy(SAMSequenceDictionary dic) {
LocusShardStrategy( SAMSequenceDictionary dic ) {
this.dic = dic;
limitingFactor = -1;
mLoc = new GenomeLoc(0, 0, 0);
if (dic.getSequences().size() > 0) {
nextContig = true;
@ -70,12 +74,13 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*
* @param old the old strategy
*/
LocusShardStrategy(LocusShardStrategy old) {
LocusShardStrategy( LocusShardStrategy old ) {
this.dic = old.dic;
this.mLoc = old.mLoc;
this.seqLoc = old.seqLoc;
this.lastGenomeLocSize = old.lastGenomeLocSize;
this.nextContig = old.nextContig;
this.limitingFactor = old.limitingFactor;
}
@ -85,7 +90,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
* @param dic the seq dictionary
* @param intervals file
*/
LocusShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
LocusShardStrategy( SAMSequenceDictionary dic, GenomeLocSortedSet intervals ) {
this.dic = dic;
this.intervals = intervals.clone();
// set the starting point to the beginning interval
@ -132,6 +137,15 @@ public abstract class LocusShardStrategy implements ShardStrategy {
long proposedSize = nextShardSize();
long nextStart = mLoc.getStop() + 1;
if (this.limitingFactor > 0) {
if (proposedSize < limitingFactor) {
limitingFactor = limitingFactor - proposedSize;
} else {
proposedSize = limitingFactor;
this.stopDueToLimitingFactor = true;
}
}
// if we don't have an interval set, use the non interval based approach. Simple, eh?
if (this.intervals == null) {
return nonIntervaledNext(length, proposedSize, nextStart);
@ -148,8 +162,8 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*
* @return the shard that represents this data
*/
private Shard intervaledNext(long proposedSize) {
if ((this.intervals == null) || (intervals.isEmpty())) {
private Shard intervaledNext( long proposedSize ) {
if (( this.intervals == null ) || ( intervals.isEmpty() )) {
throw new StingException("LocusShardStrategy: genomic regions list is empty in next() function.");
}
@ -175,12 +189,12 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*
* @return the shard to return to the user
*/
private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) {
private Shard nonIntervaledNext( long length, long proposedSize, long nextStart ) {
// can we fit it into the current seq size?
if (nextStart + proposedSize - 1 < length) {
lastGenomeLocSize = proposedSize;
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
return LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1));
return LocusShard.toShard(mLoc);
}
// else we can't make it in the current location, we have to stitch one together
else {
@ -207,7 +221,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
private void jumpContig() {
++seqLoc;
if (!(seqLoc < dic.getSequences().size())) {
if (!( seqLoc < dic.getSequences().size() )) {
nextContig = false;
return;
}
@ -223,11 +237,14 @@ public abstract class LocusShardStrategy implements ShardStrategy {
* @return
*/
public boolean hasNext() {
if (this.stopDueToLimitingFactor) {
return false;
}
// if we don't have an interval file, use the non interval based approach.
if (this.intervals == null) {
return nextContig;
} else {
return (this.intervals.size() > 0);
return ( this.intervals.size() > 0 );
}
}

View File

@ -54,14 +54,19 @@ public class ReadShardStrategy implements ShardStrategy {
// our hasnext flag
boolean hasNext = true;
// our limiting factor
long limitedSize = -1;
boolean stopDueToLimitingFactor = false;
/**
* the default constructor
* @param dic the sequence dictionary to use
* @param size the read count to iterate over
*/
ReadShardStrategy(SAMSequenceDictionary dic, long size) {
ReadShardStrategy(SAMSequenceDictionary dic, long size, long limitedSize) {
this.dic = dic;
readCount = size;
readCount = size;
this.limitedSize = limitedSize;
}
/**
@ -69,10 +74,24 @@ public class ReadShardStrategy implements ShardStrategy {
* @return
*/
public boolean hasNext() {
if (stopDueToLimitingFactor) {
return false;
}
return hasNext;
}
public Shard next() {
if (limitedSize > 0) {
if (limitedSize > readCount) {
limitedSize = limitedSize - readCount;
}
else {
readCount = limitedSize;
limitedSize = 0;
stopDueToLimitingFactor = true;
}
}
return new ReadShard((int)readCount, this);
}

View File

@ -50,13 +50,27 @@ public class ShardStrategyFactory {
* @return
*/
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
return ShardStrategyFactory.shatter(strat, dic, startingSize, -1L);
}
/**
* get a new shatter strategy
*
* @param strat what's our strategy - SHATTER_STRATEGY type
* @param dic the seq dictionary
* @param startingSize the starting size
* @return
*/
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
switch (strat) {
case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize);
return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize);
return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
case READS:
return new ReadShardStrategy(dic, startingSize);
return new ReadShardStrategy(dic, startingSize, limitByCount);
case INTERVAL:
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
default:
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
}
@ -73,11 +87,24 @@ public class ShardStrategyFactory {
* @return
*/
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
return ShardStrategyFactory.shatter(strat, dic, startingSize, lst, -1l);
}
/**
* get a new shatter strategy
*
* @param strat what's our strategy - SHATTER_STRATEGY type
* @param dic the seq dictionary
* @param startingSize the starting size
* @return
*/
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
switch (strat) {
case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize, lst);
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case INTERVAL:
case READS:
return new IntervalShardStrategy(startingSize, lst);

View File

@ -45,82 +45,69 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
*/
private static final int MAX_OUTSTANDING_OUTPUT_MERGES = 50;
/**
* Manage currently running threads.
*/
/** Manage currently running threads. */
private ExecutorService threadPool;
private Queue<Shard> traverseTasks = new LinkedList<Shard>();
private Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();
private Queue<OutputMerger> outputMergeTasks = new LinkedList<OutputMerger>();
/**
* How many total tasks were in the queue at the start of run.
*/
private int totalTraversals = 0;
/** How many total tasks were in the queue at the start of run. */
private int totalTraversals = 0;
/**
* How many shard traversals have run to date?
*/
/** How many shard traversals have run to date? */
private int totalCompletedTraversals = 0;
/**
* What is the total time spent traversing shards?
*/
/** What is the total time spent traversing shards? */
private long totalShardTraverseTime = 0;
/**
* What is the total time spent tree reducing shard output?
*/
/** What is the total time spent tree reducing shard output? */
private long totalTreeReduceTime = 0;
/**
* How many tree reduces have been completed?
*/
/** How many tree reduces have been completed? */
private long totalCompletedTreeReduces = 0;
/**
* What is the total time spent merging output?
*/
/** What is the total time spent merging output? */
private long totalOutputMergeTime = 0;
/**
* Create a new hierarchical microscheduler to process the given reads and reference.
* @param reads Reads file(s) to process.
* @param refFile Reference for driving the traversal.
*
* @param reads Reads file(s) to process.
* @param refFile Reference for driving the traversal.
* @param nThreadsToUse maximum number of threads to use to do the work
*/
protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
super( walker, reads, refFile, rods );
super(walker, reads, refFile, rods);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
try {
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName name = new ObjectName("org.broadinstitute.sting.gatk.executive:type=HierarchicalMicroScheduler");
mbs.registerMBean(this,name);
mbs.registerMBean(this, name);
}
catch( JMException ex ) {
catch (JMException ex) {
throw new StingException("Unable to register microscheduler with JMX", ex);
}
}
public Object execute( Walker walker, GenomeLocSortedSet intervals ) {
public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) {
// Fast fail for walkers not supporting TreeReducible interface.
if( !(walker instanceof TreeReducible) )
if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
ShardStrategy shardStrategy = getShardStrategy( walker, reference, intervals );
ReduceTree reduceTree = new ReduceTree( this );
ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations);
ReduceTree reduceTree = new ReduceTree(this);
walker.initialize();
for(Shard shard: shardStrategy)
for (Shard shard : shardStrategy)
traverseTasks.add(shard);
totalTraversals = traverseTasks.size();
while( isShardTraversePending() || isTreeReducePending() ) {
while (isShardTraversePending() || isTreeReducePending()) {
// Too many files sitting around taking up space? Merge them.
if( isMergeLimitExceeded() )
if (isMergeLimitExceeded())
mergeExistingOutput();
// Wait for the next slot in the queue to become free.
@ -128,10 +115,10 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
// Pick the next most appropriate task and run it. In the interest of
// memory conservation, hierarchical reduces always run before traversals.
if( isTreeReduceReady() )
queueNextTreeReduce( walker );
else if( isShardTraversePending() )
queueNextShardTraverse( walker, reduceTree );
if (isTreeReduceReady())
queueNextTreeReduce(walker);
else if (isShardTraversePending())
queueNextShardTraverse(walker, reduceTree);
}
// Merge any lingering output files. If these files aren't ready,
@ -144,10 +131,10 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
try {
result = reduceTree.getResult().get();
}
catch(Exception ex) {
throw new StingException("Unable to retrieve result", ex );
catch (Exception ex) {
throw new StingException("Unable to retrieve result", ex);
}
traversalEngine.printOnTraversalDone(result);
walker.onTraversalDone(result);
@ -156,6 +143,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Returns true if there are unscheduled shard traversal waiting to run.
*
* @return true if a shard traversal is waiting; false otherwise.
*/
protected boolean isShardTraversePending() {
@ -165,10 +153,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Returns true if there are tree reduces that can be run without
* blocking.
*
* @return true if a tree reduce is ready; false otherwise.
*/
protected boolean isTreeReduceReady() {
if( reduceTasks.size() == 0 )
if (reduceTasks.size() == 0)
return false;
return reduceTasks.peek().isReadyForReduce();
}
@ -177,6 +166,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
* Returns true if there are tree reduces that need to be run before
* the computation is complete. Returns true if any entries are in the queue,
* blocked or otherwise.
*
* @return true if a tree reduce is pending; false otherwise.
*/
protected boolean isTreeReducePending() {
@ -186,17 +176,18 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Returns whether the maximum number of files is sitting in the temp directory
* waiting to be merged back in.
*
* @return True if the merging needs to take priority. False otherwise.
*/
protected boolean isMergeLimitExceeded() {
if( outputMergeTasks.size() < MAX_OUTSTANDING_OUTPUT_MERGES )
if (outputMergeTasks.size() < MAX_OUTSTANDING_OUTPUT_MERGES)
return false;
// If any of the first MAX_OUTSTANDING merges aren't ready, the merge limit
// has not been exceeded.
OutputMerger[] outputMergers = outputMergeTasks.toArray( new OutputMerger[0] );
for( int i = 0; i < MAX_OUTSTANDING_OUTPUT_MERGES; i++ ) {
if( !outputMergers[i].isComplete() )
OutputMerger[] outputMergers = outputMergeTasks.toArray(new OutputMerger[0]);
for (int i = 0; i < MAX_OUTSTANDING_OUTPUT_MERGES; i++) {
if (!outputMergers[i].isComplete())
return false;
}
@ -207,10 +198,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Returns whether there is output waiting to be merged into the global output
* streams right now.
*
* @return True if this output is ready to be merged. False otherwise.
*/
protected boolean isOutputMergeReady() {
if( outputMergeTasks.size() > 0 )
if (outputMergeTasks.size() > 0)
return outputMergeTasks.peek().isComplete();
else
return false;
@ -224,105 +216,99 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
long startTime = System.currentTimeMillis();
OutputTracker outputTracker = GenomeAnalysisEngine.instance.getOutputTracker();
while( isOutputMergeReady() )
outputMergeTasks.remove().mergeInto( outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream() );
while (isOutputMergeReady())
outputMergeTasks.remove().mergeInto(outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream());
long endTime = System.currentTimeMillis();
totalOutputMergeTime += (endTime - startTime);
totalOutputMergeTime += ( endTime - startTime );
}
/**
* Merge any output that hasn't yet been taken care of by the blocking thread.
*/
/** Merge any output that hasn't yet been taken care of by the blocking thread. */
protected void mergeRemainingOutput() {
long startTime = System.currentTimeMillis();
OutputTracker outputTracker = GenomeAnalysisEngine.instance.getOutputTracker();
while( outputMergeTasks.size() > 0 ) {
while (outputMergeTasks.size() > 0) {
OutputMerger outputMerger = outputMergeTasks.remove();
synchronized(outputMerger) {
if( !outputMerger.isComplete() )
synchronized (outputMerger) {
if (!outputMerger.isComplete())
outputMerger.waitForOutputComplete();
}
outputMerger.mergeInto( outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream() );
outputMerger.mergeInto(outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream());
}
long endTime = System.currentTimeMillis();
totalOutputMergeTime += (endTime - startTime);
totalOutputMergeTime += ( endTime - startTime );
}
/**
* Queues the next traversal of a walker from the traversal tasks queue.
* @param walker Walker to apply to the dataset.
*
* @param walker Walker to apply to the dataset.
* @param reduceTree Tree of reduces to which to add this shard traverse.
*/
protected Future queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
if( traverseTasks.size() == 0 )
throw new IllegalStateException( "Cannot traverse; no pending traversals exist.");
if (traverseTasks.size() == 0)
throw new IllegalStateException("Cannot traverse; no pending traversals exist.");
Shard shard = traverseTasks.remove();
OutputMerger outputMerger = new OutputMerger();
ShardTraverser traverser = new ShardTraverser( this,
getTraversalEngine(),
walker,
shard,
getShardDataProvider(shard),
outputMerger );
ShardTraverser traverser = new ShardTraverser(this,
getTraversalEngine(),
walker,
shard,
getShardDataProvider(shard),
outputMerger);
Future traverseResult = threadPool.submit(traverser);
// Add this traverse result to the reduce tree. The reduce tree will call a callback to throw its entries on the queue.
reduceTree.addEntry( traverseResult );
reduceTree.addEntry(traverseResult);
// No more data? Let the reduce tree know so it can finish processing what it's got.
if( !isShardTraversePending() )
if (!isShardTraversePending())
reduceTree.complete();
outputMergeTasks.add(outputMerger);
outputMergeTasks.add(outputMerger);
return traverseResult;
}
/**
* Pulls the next reduce from the queue and runs it.
*/
/** Pulls the next reduce from the queue and runs it. */
protected void queueNextTreeReduce( Walker walker ) {
if( reduceTasks.size() == 0 )
throw new IllegalStateException( "Cannot reduce; no pending reduces exist.");
if (reduceTasks.size() == 0)
throw new IllegalStateException("Cannot reduce; no pending reduces exist.");
TreeReduceTask reducer = reduceTasks.remove();
reducer.setWalker( (TreeReducible)walker );
reducer.setWalker((TreeReducible) walker);
threadPool.submit( reducer );
threadPool.submit(reducer);
}
/**
* Blocks until a free slot appears in the thread queue.
*/
/** Blocks until a free slot appears in the thread queue. */
protected void waitForFreeQueueSlot() {
ThreadPoolMonitor monitor = new ThreadPoolMonitor();
synchronized(monitor) {
threadPool.submit( monitor );
synchronized (monitor) {
threadPool.submit(monitor);
monitor.watch();
}
}
/**
* Callback for adding reduce tasks to the run queue.
*
* @return A new, composite future of the result of this reduce.
*/
public Future notifyReduce( Future lhs, Future rhs ) {
TreeReduceTask reducer = new TreeReduceTask( new TreeReducer( this, lhs, rhs ) );
TreeReduceTask reducer = new TreeReduceTask(new TreeReducer(this, lhs, rhs));
reduceTasks.add(reducer);
return reducer;
return reducer;
}
/**
* A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics.
*/
/** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */
private class TreeReduceTask extends FutureTask {
private TreeReducer treeReducer = null;
@ -332,7 +318,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
}
public void setWalker( TreeReducible walker ) {
treeReducer.setWalker( walker );
treeReducer.setWalker(walker);
}
public boolean isReadyForReduce() {
@ -342,6 +328,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Used by the ShardTraverser to report time consumed traversing a given shard.
*
* @param shardTraversalTime Elapsed time traversing a given shard.
*/
synchronized void reportShardTraverseTime( long shardTraversalTime ) {
@ -351,6 +338,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/**
* Used by the TreeReducer to report time consumed reducing two shards.
*
* @param treeReduceTime Elapsed time reducing two shards.
*/
synchronized void reportTreeReduceTime( long treeReduceTime ) {
@ -359,69 +347,51 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public int getTotalNumberOfShards() {
return totalTraversals;
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public int getRemainingNumberOfShards() {
return traverseTasks.size();
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public int getNumberOfTasksInReduceQueue() {
return reduceTasks.size();
return reduceTasks.size();
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public int getNumberOfTasksInIOQueue() {
return outputMergeTasks.size();
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public long getTotalShardTraverseTimeMillis() {
return totalShardTraverseTime;
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public long getAvgShardTraverseTimeMillis() {
if( totalCompletedTraversals == 0 )
if (totalCompletedTraversals == 0)
return 0;
return totalShardTraverseTime / totalCompletedTraversals;
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public long getTotalTreeReduceTimeMillis() {
return totalTreeReduceTime;
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public long getAvgTreeReduceTimeMillis() {
if( totalCompletedTreeReduces == 0 )
if (totalCompletedTreeReduces == 0)
return 0;
return totalTreeReduceTime / totalCompletedTreeReduces;
}
/**
* {@inheritDoc}
*/
/** {@inheritDoc} */
public long getTotalOutputMergeTimeMillis() {
return totalOutputMergeTime;
}

View File

@ -31,9 +31,10 @@ public class LinearMicroScheduler extends MicroScheduler {
*
* @param walker Computation to perform over dataset.
* @param locations Subset of the dataset over which to walk.
* @param maxIterations the maximum number of iterations we're to perform
*/
public Object execute(Walker walker, GenomeLocSortedSet locations) {
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations);
public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) {
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations);
walker.initialize();
Accumulator accumulator = Accumulator.create(walker);

View File

@ -99,9 +99,10 @@ public abstract class MicroScheduler {
* Walks a walker over the given list of intervals.
* @param walker Computation to perform over dataset.
* @param intervals A list of intervals over which to walk. Null for whole dataset.
* @param maxIterations the maximum number of iterations we're to perform
* @return the return type of the walker
*/
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals);
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations);
/**
* Get the sharding strategy given a driving data source.
@ -110,7 +111,10 @@ public abstract class MicroScheduler {
* @param intervals Intervals to use when limiting sharding.
* @return Sharding strategy for this driving data source.
*/
protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
protected ShardStrategy getShardStrategy(Walker walker,
ReferenceSequenceFile drivingDataSource,
GenomeLocSortedSet intervals,
Integer maxIterations) {
ShardStrategy shardStrategy = null;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
if (walker instanceof LocusWalker) {
@ -122,11 +126,11 @@ public abstract class MicroScheduler {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals);
intervals, maxIterations);
} else
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE);
SHARD_SIZE, maxIterations);
} else if (walker instanceof ReadWalker) {
@ -136,11 +140,11 @@ public abstract class MicroScheduler {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals);
intervals, maxIterations);
} else {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE);
SHARD_SIZE, maxIterations);
}
} else
throw new StingException("Unable to support walker of type" + walker.getClass().getName());

View File

@ -73,7 +73,7 @@ public class GATKArgumentCollectionTest extends BaseTest {
List<File> input = new ArrayList<File>();
input.add(new File("test.file"));
collect.samFiles = input;
collect.maximumReads = "-1";
collect.maximumEngineIterations = -1;
collect.strictnessLevel = "strict";
collect.referenceFile = new File("referenceFile".toLowerCase());
collect.analysisName = "analysisName".toLowerCase();

View File

@ -58,7 +58,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
@Test
public void testSetup() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
int counter = 0;
while(strat.hasNext()) {
Shard d = strat.next();
@ -71,7 +71,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
@Test
public void testAdjustSize() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
strat.adjustNextShardSize(1000);
int counter = 0;
while(strat.hasNext()) {
@ -86,7 +86,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
@Test
public void testUnevenSplit() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, -1);
int counter = 0;
while(strat.hasNext()) {
Shard d = strat.next();
@ -100,4 +100,19 @@ public class LinearLocusShardStrategyTest extends BaseTest {
}
assertTrue(counter == 10);
}
@Test
public void testDashMOption() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, 200);
int counter = 0;
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199);
++counter;
}
assertTrue(counter == 1);
}
}

View File

@ -129,7 +129,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
// setup the test files
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
Reads reads = new Reads(fl);
Reads reads = new Reads(fl);
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();