added support for the -M option in traversals.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e2ed56dc96
commit
a8a2d0eab9
|
|
@ -105,8 +105,8 @@ public class GATKArgumentCollection {
|
||||||
public Boolean walkAllLoci = false;
|
public Boolean walkAllLoci = false;
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
|
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
|
||||||
public String maximumReads = "-1";
|
public Integer maximumEngineIterations = -1;
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
||||||
|
|
@ -215,7 +215,7 @@ public class GATKArgumentCollection {
|
||||||
if (!other.samFiles.equals(this.samFiles)) {
|
if (!other.samFiles.equals(this.samFiles)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.maximumReads.equals(this.maximumReads)) {
|
if (!other.maximumEngineIterations.equals(this.maximumEngineIterations)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.strictnessLevel.equals(this.strictnessLevel)) {
|
if (!other.strictnessLevel.equals(this.strictnessLevel)) {
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ public class GenomeAnalysisEngine {
|
||||||
locs = GenomeLocSortedSet.createSetFromList(locationsList);
|
locs = GenomeLocSortedSet.createSetFromList(locationsList);
|
||||||
|
|
||||||
// excute the microscheduler, storing the results
|
// excute the microscheduler, storing the results
|
||||||
walkerReturn = microScheduler.execute(my_walker, locs);
|
walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -199,7 +199,7 @@ public class GenomeAnalysisEngine {
|
||||||
private void genericEngineSetup(ValidationStringency strictness) {
|
private void genericEngineSetup(ValidationStringency strictness) {
|
||||||
engine.setStrictness(strictness);
|
engine.setStrictness(strictness);
|
||||||
|
|
||||||
engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));
|
engine.setMaxReads(argCollection.maximumEngineIterations);
|
||||||
engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads);
|
engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads);
|
||||||
|
|
||||||
// we default interval files over the genome region string
|
// we default interval files over the genome region string
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
|
||||||
*
|
*
|
||||||
* @param dic the seq dictionary
|
* @param dic the seq dictionary
|
||||||
*/
|
*/
|
||||||
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
|
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
|
||||||
super(dic);
|
super(dic);
|
||||||
|
this.limitingFactor = limitByCount;
|
||||||
this.baseSize = startSize;
|
this.baseSize = startSize;
|
||||||
currentExp = 0;
|
currentExp = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -76,8 +77,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
|
||||||
* @param startSize the starting size of the shard
|
* @param startSize the starting size of the shard
|
||||||
* @param lst locations to iterate from
|
* @param lst locations to iterate from
|
||||||
*/
|
*/
|
||||||
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
|
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
|
||||||
super(dic, lst);
|
super(dic, lst);
|
||||||
|
this.limitingFactor = limitByCount;
|
||||||
this.baseSize = startSize;
|
this.baseSize = startSize;
|
||||||
this.currentExp = 0;
|
this.currentExp = 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
|
||||||
*
|
*
|
||||||
* @param dic the seq dictionary
|
* @param dic the seq dictionary
|
||||||
*/
|
*/
|
||||||
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
|
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
|
||||||
super(dic);
|
super(dic);
|
||||||
|
this.limitingFactor = limitByCount;
|
||||||
this.nextShardSize = startSize;
|
this.nextShardSize = startSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -73,8 +74,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
|
||||||
* @param startSize the starting size of the shard
|
* @param startSize the starting size of the shard
|
||||||
* @param lst locations to iterate from
|
* @param lst locations to iterate from
|
||||||
*/
|
*/
|
||||||
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
|
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
|
||||||
super(dic, lst);
|
super(dic, lst);
|
||||||
|
this.limitingFactor = limitByCount;
|
||||||
this.nextShardSize = startSize;
|
this.nextShardSize = startSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,9 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
private static Logger logger = Logger.getLogger(LocusShardStrategy.class);
|
private static Logger logger = Logger.getLogger(LocusShardStrategy.class);
|
||||||
|
|
||||||
|
/** the number of iterations before we stop */
|
||||||
|
protected long limitingFactor = -1;
|
||||||
|
private boolean stopDueToLimitingFactor = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the constructor, taking a seq dictionary to parse out contigs
|
* the constructor, taking a seq dictionary to parse out contigs
|
||||||
|
|
@ -59,6 +62,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
*/
|
*/
|
||||||
LocusShardStrategy( SAMSequenceDictionary dic ) {
|
LocusShardStrategy( SAMSequenceDictionary dic ) {
|
||||||
this.dic = dic;
|
this.dic = dic;
|
||||||
|
limitingFactor = -1;
|
||||||
mLoc = new GenomeLoc(0, 0, 0);
|
mLoc = new GenomeLoc(0, 0, 0);
|
||||||
if (dic.getSequences().size() > 0) {
|
if (dic.getSequences().size() > 0) {
|
||||||
nextContig = true;
|
nextContig = true;
|
||||||
|
|
@ -76,6 +80,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
this.seqLoc = old.seqLoc;
|
this.seqLoc = old.seqLoc;
|
||||||
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
||||||
this.nextContig = old.nextContig;
|
this.nextContig = old.nextContig;
|
||||||
|
this.limitingFactor = old.limitingFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -132,6 +137,15 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
long proposedSize = nextShardSize();
|
long proposedSize = nextShardSize();
|
||||||
long nextStart = mLoc.getStop() + 1;
|
long nextStart = mLoc.getStop() + 1;
|
||||||
|
|
||||||
|
if (this.limitingFactor > 0) {
|
||||||
|
if (proposedSize < limitingFactor) {
|
||||||
|
limitingFactor = limitingFactor - proposedSize;
|
||||||
|
} else {
|
||||||
|
proposedSize = limitingFactor;
|
||||||
|
this.stopDueToLimitingFactor = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// if we don't have an interval set, use the non interval based approach. Simple, eh?
|
// if we don't have an interval set, use the non interval based approach. Simple, eh?
|
||||||
if (this.intervals == null) {
|
if (this.intervals == null) {
|
||||||
return nonIntervaledNext(length, proposedSize, nextStart);
|
return nonIntervaledNext(length, proposedSize, nextStart);
|
||||||
|
|
@ -180,7 +194,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
if (nextStart + proposedSize - 1 < length) {
|
if (nextStart + proposedSize - 1 < length) {
|
||||||
lastGenomeLocSize = proposedSize;
|
lastGenomeLocSize = proposedSize;
|
||||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
|
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
|
||||||
return LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1));
|
return LocusShard.toShard(mLoc);
|
||||||
}
|
}
|
||||||
// else we can't make it in the current location, we have to stitch one together
|
// else we can't make it in the current location, we have to stitch one together
|
||||||
else {
|
else {
|
||||||
|
|
@ -223,6 +237,9 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
if (this.stopDueToLimitingFactor) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// if we don't have an interval file, use the non interval based approach.
|
// if we don't have an interval file, use the non interval based approach.
|
||||||
if (this.intervals == null) {
|
if (this.intervals == null) {
|
||||||
return nextContig;
|
return nextContig;
|
||||||
|
|
|
||||||
|
|
@ -54,14 +54,19 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
// our hasnext flag
|
// our hasnext flag
|
||||||
boolean hasNext = true;
|
boolean hasNext = true;
|
||||||
|
|
||||||
|
// our limiting factor
|
||||||
|
long limitedSize = -1;
|
||||||
|
boolean stopDueToLimitingFactor = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the default constructor
|
* the default constructor
|
||||||
* @param dic the sequence dictionary to use
|
* @param dic the sequence dictionary to use
|
||||||
* @param size the read count to iterate over
|
* @param size the read count to iterate over
|
||||||
*/
|
*/
|
||||||
ReadShardStrategy(SAMSequenceDictionary dic, long size) {
|
ReadShardStrategy(SAMSequenceDictionary dic, long size, long limitedSize) {
|
||||||
this.dic = dic;
|
this.dic = dic;
|
||||||
readCount = size;
|
readCount = size;
|
||||||
|
this.limitedSize = limitedSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -69,10 +74,24 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
if (stopDueToLimitingFactor) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return hasNext;
|
return hasNext;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Shard next() {
|
public Shard next() {
|
||||||
|
if (limitedSize > 0) {
|
||||||
|
if (limitedSize > readCount) {
|
||||||
|
limitedSize = limitedSize - readCount;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
readCount = limitedSize;
|
||||||
|
limitedSize = 0;
|
||||||
|
stopDueToLimitingFactor = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new ReadShard((int)readCount, this);
|
return new ReadShard((int)readCount, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,13 +50,27 @@ public class ShardStrategyFactory {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
||||||
|
return ShardStrategyFactory.shatter(strat, dic, startingSize, -1L);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get a new shatter strategy
|
||||||
|
*
|
||||||
|
* @param strat what's our strategy - SHATTER_STRATEGY type
|
||||||
|
* @param dic the seq dictionary
|
||||||
|
* @param startingSize the starting size
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
|
||||||
switch (strat) {
|
switch (strat) {
|
||||||
case LINEAR:
|
case LINEAR:
|
||||||
return new LinearLocusShardStrategy(dic, startingSize);
|
return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
|
||||||
case EXPONENTIAL:
|
case EXPONENTIAL:
|
||||||
return new ExpGrowthLocusShardStrategy(dic, startingSize);
|
return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
|
||||||
case READS:
|
case READS:
|
||||||
return new ReadShardStrategy(dic, startingSize);
|
return new ReadShardStrategy(dic, startingSize, limitByCount);
|
||||||
|
case INTERVAL:
|
||||||
|
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
|
||||||
default:
|
default:
|
||||||
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
||||||
}
|
}
|
||||||
|
|
@ -73,11 +87,24 @@ public class ShardStrategyFactory {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
|
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
|
||||||
|
return ShardStrategyFactory.shatter(strat, dic, startingSize, lst, -1l);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get a new shatter strategy
|
||||||
|
*
|
||||||
|
* @param strat what's our strategy - SHATTER_STRATEGY type
|
||||||
|
* @param dic the seq dictionary
|
||||||
|
* @param startingSize the starting size
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
|
||||||
switch (strat) {
|
switch (strat) {
|
||||||
case LINEAR:
|
case LINEAR:
|
||||||
return new LinearLocusShardStrategy(dic, startingSize, lst);
|
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
|
||||||
case EXPONENTIAL:
|
case EXPONENTIAL:
|
||||||
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
|
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
|
||||||
case INTERVAL:
|
case INTERVAL:
|
||||||
case READS:
|
case READS:
|
||||||
return new IntervalShardStrategy(startingSize, lst);
|
return new IntervalShardStrategy(startingSize, lst);
|
||||||
|
|
|
||||||
|
|
@ -45,47 +45,34 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
*/
|
*/
|
||||||
private static final int MAX_OUTSTANDING_OUTPUT_MERGES = 50;
|
private static final int MAX_OUTSTANDING_OUTPUT_MERGES = 50;
|
||||||
|
|
||||||
/**
|
/** Manage currently running threads. */
|
||||||
* Manage currently running threads.
|
|
||||||
*/
|
|
||||||
private ExecutorService threadPool;
|
private ExecutorService threadPool;
|
||||||
|
|
||||||
private Queue<Shard> traverseTasks = new LinkedList<Shard>();
|
private Queue<Shard> traverseTasks = new LinkedList<Shard>();
|
||||||
private Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();
|
private Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();
|
||||||
private Queue<OutputMerger> outputMergeTasks = new LinkedList<OutputMerger>();
|
private Queue<OutputMerger> outputMergeTasks = new LinkedList<OutputMerger>();
|
||||||
|
|
||||||
/**
|
/** How many total tasks were in the queue at the start of run. */
|
||||||
* How many total tasks were in the queue at the start of run.
|
|
||||||
*/
|
|
||||||
private int totalTraversals = 0;
|
private int totalTraversals = 0;
|
||||||
|
|
||||||
/**
|
/** How many shard traversals have run to date? */
|
||||||
* How many shard traversals have run to date?
|
|
||||||
*/
|
|
||||||
private int totalCompletedTraversals = 0;
|
private int totalCompletedTraversals = 0;
|
||||||
|
|
||||||
/**
|
/** What is the total time spent traversing shards? */
|
||||||
* What is the total time spent traversing shards?
|
|
||||||
*/
|
|
||||||
private long totalShardTraverseTime = 0;
|
private long totalShardTraverseTime = 0;
|
||||||
|
|
||||||
/**
|
/** What is the total time spent tree reducing shard output? */
|
||||||
* What is the total time spent tree reducing shard output?
|
|
||||||
*/
|
|
||||||
private long totalTreeReduceTime = 0;
|
private long totalTreeReduceTime = 0;
|
||||||
|
|
||||||
/**
|
/** How many tree reduces have been completed? */
|
||||||
* How many tree reduces have been completed?
|
|
||||||
*/
|
|
||||||
private long totalCompletedTreeReduces = 0;
|
private long totalCompletedTreeReduces = 0;
|
||||||
|
|
||||||
/**
|
/** What is the total time spent merging output? */
|
||||||
* What is the total time spent merging output?
|
|
||||||
*/
|
|
||||||
private long totalOutputMergeTime = 0;
|
private long totalOutputMergeTime = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new hierarchical microscheduler to process the given reads and reference.
|
* Create a new hierarchical microscheduler to process the given reads and reference.
|
||||||
|
*
|
||||||
* @param reads Reads file(s) to process.
|
* @param reads Reads file(s) to process.
|
||||||
* @param refFile Reference for driving the traversal.
|
* @param refFile Reference for driving the traversal.
|
||||||
* @param nThreadsToUse maximum number of threads to use to do the work
|
* @param nThreadsToUse maximum number of threads to use to do the work
|
||||||
|
|
@ -104,12 +91,12 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object execute( Walker walker, GenomeLocSortedSet intervals ) {
|
public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) {
|
||||||
// Fast fail for walkers not supporting TreeReducible interface.
|
// Fast fail for walkers not supporting TreeReducible interface.
|
||||||
if (!( walker instanceof TreeReducible ))
|
if (!( walker instanceof TreeReducible ))
|
||||||
throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
|
throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
|
||||||
|
|
||||||
ShardStrategy shardStrategy = getShardStrategy( walker, reference, intervals );
|
ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations);
|
||||||
ReduceTree reduceTree = new ReduceTree(this);
|
ReduceTree reduceTree = new ReduceTree(this);
|
||||||
|
|
||||||
walker.initialize();
|
walker.initialize();
|
||||||
|
|
@ -156,6 +143,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if there are unscheduled shard traversal waiting to run.
|
* Returns true if there are unscheduled shard traversal waiting to run.
|
||||||
|
*
|
||||||
* @return true if a shard traversal is waiting; false otherwise.
|
* @return true if a shard traversal is waiting; false otherwise.
|
||||||
*/
|
*/
|
||||||
protected boolean isShardTraversePending() {
|
protected boolean isShardTraversePending() {
|
||||||
|
|
@ -165,6 +153,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
/**
|
/**
|
||||||
* Returns true if there are tree reduces that can be run without
|
* Returns true if there are tree reduces that can be run without
|
||||||
* blocking.
|
* blocking.
|
||||||
|
*
|
||||||
* @return true if a tree reduce is ready; false otherwise.
|
* @return true if a tree reduce is ready; false otherwise.
|
||||||
*/
|
*/
|
||||||
protected boolean isTreeReduceReady() {
|
protected boolean isTreeReduceReady() {
|
||||||
|
|
@ -177,6 +166,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
* Returns true if there are tree reduces that need to be run before
|
* Returns true if there are tree reduces that need to be run before
|
||||||
* the computation is complete. Returns true if any entries are in the queue,
|
* the computation is complete. Returns true if any entries are in the queue,
|
||||||
* blocked or otherwise.
|
* blocked or otherwise.
|
||||||
|
*
|
||||||
* @return true if a tree reduce is pending; false otherwise.
|
* @return true if a tree reduce is pending; false otherwise.
|
||||||
*/
|
*/
|
||||||
protected boolean isTreeReducePending() {
|
protected boolean isTreeReducePending() {
|
||||||
|
|
@ -186,6 +176,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
/**
|
/**
|
||||||
* Returns whether the maximum number of files is sitting in the temp directory
|
* Returns whether the maximum number of files is sitting in the temp directory
|
||||||
* waiting to be merged back in.
|
* waiting to be merged back in.
|
||||||
|
*
|
||||||
* @return True if the merging needs to take priority. False otherwise.
|
* @return True if the merging needs to take priority. False otherwise.
|
||||||
*/
|
*/
|
||||||
protected boolean isMergeLimitExceeded() {
|
protected boolean isMergeLimitExceeded() {
|
||||||
|
|
@ -207,6 +198,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
/**
|
/**
|
||||||
* Returns whether there is output waiting to be merged into the global output
|
* Returns whether there is output waiting to be merged into the global output
|
||||||
* streams right now.
|
* streams right now.
|
||||||
|
*
|
||||||
* @return True if this output is ready to be merged. False otherwise.
|
* @return True if this output is ready to be merged. False otherwise.
|
||||||
*/
|
*/
|
||||||
protected boolean isOutputMergeReady() {
|
protected boolean isOutputMergeReady() {
|
||||||
|
|
@ -232,9 +224,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
totalOutputMergeTime += ( endTime - startTime );
|
totalOutputMergeTime += ( endTime - startTime );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Merge any output that hasn't yet been taken care of by the blocking thread. */
|
||||||
* Merge any output that hasn't yet been taken care of by the blocking thread.
|
|
||||||
*/
|
|
||||||
protected void mergeRemainingOutput() {
|
protected void mergeRemainingOutput() {
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
|
@ -255,6 +245,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queues the next traversal of a walker from the traversal tasks queue.
|
* Queues the next traversal of a walker from the traversal tasks queue.
|
||||||
|
*
|
||||||
* @param walker Walker to apply to the dataset.
|
* @param walker Walker to apply to the dataset.
|
||||||
* @param reduceTree Tree of reduces to which to add this shard traverse.
|
* @param reduceTree Tree of reduces to which to add this shard traverse.
|
||||||
*/
|
*/
|
||||||
|
|
@ -286,9 +277,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
return traverseResult;
|
return traverseResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Pulls the next reduce from the queue and runs it. */
|
||||||
* Pulls the next reduce from the queue and runs it.
|
|
||||||
*/
|
|
||||||
protected void queueNextTreeReduce( Walker walker ) {
|
protected void queueNextTreeReduce( Walker walker ) {
|
||||||
if (reduceTasks.size() == 0)
|
if (reduceTasks.size() == 0)
|
||||||
throw new IllegalStateException("Cannot reduce; no pending reduces exist.");
|
throw new IllegalStateException("Cannot reduce; no pending reduces exist.");
|
||||||
|
|
@ -298,9 +287,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
threadPool.submit(reducer);
|
threadPool.submit(reducer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Blocks until a free slot appears in the thread queue. */
|
||||||
* Blocks until a free slot appears in the thread queue.
|
|
||||||
*/
|
|
||||||
protected void waitForFreeQueueSlot() {
|
protected void waitForFreeQueueSlot() {
|
||||||
ThreadPoolMonitor monitor = new ThreadPoolMonitor();
|
ThreadPoolMonitor monitor = new ThreadPoolMonitor();
|
||||||
synchronized (monitor) {
|
synchronized (monitor) {
|
||||||
|
|
@ -311,6 +298,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Callback for adding reduce tasks to the run queue.
|
* Callback for adding reduce tasks to the run queue.
|
||||||
|
*
|
||||||
* @return A new, composite future of the result of this reduce.
|
* @return A new, composite future of the result of this reduce.
|
||||||
*/
|
*/
|
||||||
public Future notifyReduce( Future lhs, Future rhs ) {
|
public Future notifyReduce( Future lhs, Future rhs ) {
|
||||||
|
|
@ -320,9 +308,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */
|
||||||
* A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics.
|
|
||||||
*/
|
|
||||||
private class TreeReduceTask extends FutureTask {
|
private class TreeReduceTask extends FutureTask {
|
||||||
private TreeReducer treeReducer = null;
|
private TreeReducer treeReducer = null;
|
||||||
|
|
||||||
|
|
@ -342,6 +328,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by the ShardTraverser to report time consumed traversing a given shard.
|
* Used by the ShardTraverser to report time consumed traversing a given shard.
|
||||||
|
*
|
||||||
* @param shardTraversalTime Elapsed time traversing a given shard.
|
* @param shardTraversalTime Elapsed time traversing a given shard.
|
||||||
*/
|
*/
|
||||||
synchronized void reportShardTraverseTime( long shardTraversalTime ) {
|
synchronized void reportShardTraverseTime( long shardTraversalTime ) {
|
||||||
|
|
@ -351,6 +338,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by the TreeReducer to report time consumed reducing two shards.
|
* Used by the TreeReducer to report time consumed reducing two shards.
|
||||||
|
*
|
||||||
* @param treeReduceTime Elapsed time reducing two shards.
|
* @param treeReduceTime Elapsed time reducing two shards.
|
||||||
*/
|
*/
|
||||||
synchronized void reportTreeReduceTime( long treeReduceTime ) {
|
synchronized void reportTreeReduceTime( long treeReduceTime ) {
|
||||||
|
|
@ -359,69 +347,51 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public int getTotalNumberOfShards() {
|
public int getTotalNumberOfShards() {
|
||||||
return totalTraversals;
|
return totalTraversals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public int getRemainingNumberOfShards() {
|
public int getRemainingNumberOfShards() {
|
||||||
return traverseTasks.size();
|
return traverseTasks.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public int getNumberOfTasksInReduceQueue() {
|
public int getNumberOfTasksInReduceQueue() {
|
||||||
return reduceTasks.size();
|
return reduceTasks.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public int getNumberOfTasksInIOQueue() {
|
public int getNumberOfTasksInIOQueue() {
|
||||||
return outputMergeTasks.size();
|
return outputMergeTasks.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public long getTotalShardTraverseTimeMillis() {
|
public long getTotalShardTraverseTimeMillis() {
|
||||||
return totalShardTraverseTime;
|
return totalShardTraverseTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public long getAvgShardTraverseTimeMillis() {
|
public long getAvgShardTraverseTimeMillis() {
|
||||||
if (totalCompletedTraversals == 0)
|
if (totalCompletedTraversals == 0)
|
||||||
return 0;
|
return 0;
|
||||||
return totalShardTraverseTime / totalCompletedTraversals;
|
return totalShardTraverseTime / totalCompletedTraversals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public long getTotalTreeReduceTimeMillis() {
|
public long getTotalTreeReduceTimeMillis() {
|
||||||
return totalTreeReduceTime;
|
return totalTreeReduceTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public long getAvgTreeReduceTimeMillis() {
|
public long getAvgTreeReduceTimeMillis() {
|
||||||
if (totalCompletedTreeReduces == 0)
|
if (totalCompletedTreeReduces == 0)
|
||||||
return 0;
|
return 0;
|
||||||
return totalTreeReduceTime / totalCompletedTreeReduces;
|
return totalTreeReduceTime / totalCompletedTreeReduces;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** {@inheritDoc} */
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public long getTotalOutputMergeTimeMillis() {
|
public long getTotalOutputMergeTimeMillis() {
|
||||||
return totalOutputMergeTime;
|
return totalOutputMergeTime;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,9 +31,10 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
*
|
*
|
||||||
* @param walker Computation to perform over dataset.
|
* @param walker Computation to perform over dataset.
|
||||||
* @param locations Subset of the dataset over which to walk.
|
* @param locations Subset of the dataset over which to walk.
|
||||||
|
* @param maxIterations the maximum number of iterations we're to perform
|
||||||
*/
|
*/
|
||||||
public Object execute(Walker walker, GenomeLocSortedSet locations) {
|
public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) {
|
||||||
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations);
|
ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations);
|
||||||
|
|
||||||
walker.initialize();
|
walker.initialize();
|
||||||
Accumulator accumulator = Accumulator.create(walker);
|
Accumulator accumulator = Accumulator.create(walker);
|
||||||
|
|
|
||||||
|
|
@ -99,9 +99,10 @@ public abstract class MicroScheduler {
|
||||||
* Walks a walker over the given list of intervals.
|
* Walks a walker over the given list of intervals.
|
||||||
* @param walker Computation to perform over dataset.
|
* @param walker Computation to perform over dataset.
|
||||||
* @param intervals A list of intervals over which to walk. Null for whole dataset.
|
* @param intervals A list of intervals over which to walk. Null for whole dataset.
|
||||||
|
* @param maxIterations the maximum number of iterations we're to perform
|
||||||
* @return the return type of the walker
|
* @return the return type of the walker
|
||||||
*/
|
*/
|
||||||
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals);
|
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the sharding strategy given a driving data source.
|
* Get the sharding strategy given a driving data source.
|
||||||
|
|
@ -110,7 +111,10 @@ public abstract class MicroScheduler {
|
||||||
* @param intervals Intervals to use when limiting sharding.
|
* @param intervals Intervals to use when limiting sharding.
|
||||||
* @return Sharding strategy for this driving data source.
|
* @return Sharding strategy for this driving data source.
|
||||||
*/
|
*/
|
||||||
protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
|
protected ShardStrategy getShardStrategy(Walker walker,
|
||||||
|
ReferenceSequenceFile drivingDataSource,
|
||||||
|
GenomeLocSortedSet intervals,
|
||||||
|
Integer maxIterations) {
|
||||||
ShardStrategy shardStrategy = null;
|
ShardStrategy shardStrategy = null;
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
||||||
if (walker instanceof LocusWalker) {
|
if (walker instanceof LocusWalker) {
|
||||||
|
|
@ -122,11 +126,11 @@ public abstract class MicroScheduler {
|
||||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE,
|
SHARD_SIZE,
|
||||||
intervals);
|
intervals, maxIterations);
|
||||||
} else
|
} else
|
||||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE);
|
SHARD_SIZE, maxIterations);
|
||||||
|
|
||||||
} else if (walker instanceof ReadWalker) {
|
} else if (walker instanceof ReadWalker) {
|
||||||
|
|
||||||
|
|
@ -136,11 +140,11 @@ public abstract class MicroScheduler {
|
||||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE,
|
SHARD_SIZE,
|
||||||
intervals);
|
intervals, maxIterations);
|
||||||
} else {
|
} else {
|
||||||
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
drivingDataSource.getSequenceDictionary(),
|
drivingDataSource.getSequenceDictionary(),
|
||||||
SHARD_SIZE);
|
SHARD_SIZE, maxIterations);
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ public class GATKArgumentCollectionTest extends BaseTest {
|
||||||
List<File> input = new ArrayList<File>();
|
List<File> input = new ArrayList<File>();
|
||||||
input.add(new File("test.file"));
|
input.add(new File("test.file"));
|
||||||
collect.samFiles = input;
|
collect.samFiles = input;
|
||||||
collect.maximumReads = "-1";
|
collect.maximumEngineIterations = -1;
|
||||||
collect.strictnessLevel = "strict";
|
collect.strictnessLevel = "strict";
|
||||||
collect.referenceFile = new File("referenceFile".toLowerCase());
|
collect.referenceFile = new File("referenceFile".toLowerCase());
|
||||||
collect.analysisName = "analysisName".toLowerCase();
|
collect.analysisName = "analysisName".toLowerCase();
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetup() {
|
public void testSetup() {
|
||||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while(strat.hasNext()) {
|
while(strat.hasNext()) {
|
||||||
Shard d = strat.next();
|
Shard d = strat.next();
|
||||||
|
|
@ -71,7 +71,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAdjustSize() {
|
public void testAdjustSize() {
|
||||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
|
||||||
strat.adjustNextShardSize(1000);
|
strat.adjustNextShardSize(1000);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while(strat.hasNext()) {
|
while(strat.hasNext()) {
|
||||||
|
|
@ -86,7 +86,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUnevenSplit() {
|
public void testUnevenSplit() {
|
||||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, -1);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while(strat.hasNext()) {
|
while(strat.hasNext()) {
|
||||||
Shard d = strat.next();
|
Shard d = strat.next();
|
||||||
|
|
@ -100,4 +100,19 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
||||||
}
|
}
|
||||||
assertTrue(counter == 10);
|
assertTrue(counter == 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDashMOption() {
|
||||||
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, 200);
|
||||||
|
int counter = 0;
|
||||||
|
while(strat.hasNext()) {
|
||||||
|
Shard d = strat.next();
|
||||||
|
assertTrue(d instanceof LocusShard);
|
||||||
|
assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199);
|
||||||
|
++counter;
|
||||||
|
}
|
||||||
|
assertTrue(counter == 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue