-Replaced adaptive and linear strategies with an adaptive linear strategy
-Added the exponential growth strategy -Added factory code that allows you to transitition between strategies, so if you want to move from linear to exp at a point, and then back when you've hit a runtime threshold, it will take care of it for you. -Changed the code to return a Shard instead of a GenomeLoc git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@313 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6369d23b43
commit
08fddd43af
|
|
@ -0,0 +1,78 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* User: aaron
|
||||||
|
* Date: Apr 6, 2009
|
||||||
|
* Time: 8:23:19 PM
|
||||||
|
*
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
*
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* @version 1.0
|
||||||
|
* @date Apr 6, 2009
|
||||||
|
* <p/>
|
||||||
|
* Class LinearShard
|
||||||
|
* <p/>
|
||||||
|
* A linear strategy, very very similar to adaptive
|
||||||
|
*/
|
||||||
|
public class ExpGrowthShardStrategy extends ShardStrategy {
|
||||||
|
|
||||||
|
// fixed size
|
||||||
|
private long baseSize = 100000;
|
||||||
|
private long currentExp = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the constructor, taking a seq dictionary to parse out contigs
|
||||||
|
*
|
||||||
|
* @param dic the seq dictionary
|
||||||
|
*/
|
||||||
|
ExpGrowthShardStrategy(SAMSequenceDictionary dic, long startSize) {
|
||||||
|
super(dic);
|
||||||
|
this.baseSize = startSize;
|
||||||
|
currentExp = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the constructor, taking a seq dictionary to parse out contigs
|
||||||
|
*
|
||||||
|
* @param strat the shatter to convert from
|
||||||
|
*/
|
||||||
|
ExpGrowthShardStrategy(ShardStrategy strat) {
|
||||||
|
super(strat);
|
||||||
|
this.baseSize = strat.nextShardSize();
|
||||||
|
currentExp = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the next shards size
|
||||||
|
*
|
||||||
|
* @param size adjust the next size to this
|
||||||
|
*/
|
||||||
|
public void adjustNextShardSize(long size) {
|
||||||
|
baseSize = size;
|
||||||
|
currentExp = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is how the various shards strategies implements their approach
|
||||||
|
*
|
||||||
|
* @return the next shard size
|
||||||
|
*/
|
||||||
|
protected long nextShardSize() {
|
||||||
|
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -6,7 +6,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
* Date: Apr 6, 2009
|
* Date: Apr 6, 2009
|
||||||
* Time: 8:23:19 PM
|
* Time: 7:18:19 PM
|
||||||
*
|
*
|
||||||
* The Broad Institute
|
* The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
|
@ -24,13 +24,13 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
* @version 1.0
|
* @version 1.0
|
||||||
* @date Apr 6, 2009
|
* @date Apr 6, 2009
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class LinearShard
|
* Class AdaptiveShard
|
||||||
* <p/>
|
* <p/>
|
||||||
* A descriptions should go here. Blame aaron if it's missing.
|
* allows you to change the sharding length as you traverse
|
||||||
*/
|
*/
|
||||||
public class LinearShardStrategy extends ShardStrategy {
|
class LinearShardStrategy extends ShardStrategy {
|
||||||
|
|
||||||
// fixed size
|
// default the next size to 100,000
|
||||||
private long nextShardSize = 100000;
|
private long nextShardSize = 100000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -43,6 +43,26 @@ public class LinearShardStrategy extends ShardStrategy {
|
||||||
this.nextShardSize = startSize;
|
this.nextShardSize = startSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the constructor, taking a seq dictionary to parse out contigs
|
||||||
|
*
|
||||||
|
* @param strat the shatter to convert from
|
||||||
|
*/
|
||||||
|
LinearShardStrategy(ShardStrategy strat) {
|
||||||
|
super(strat);
|
||||||
|
this.nextShardSize = strat.nextShardSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the next shards size
|
||||||
|
*
|
||||||
|
* @param size adjust the next size to this
|
||||||
|
*/
|
||||||
|
public void adjustNextShardSize(long size) {
|
||||||
|
nextShardSize = size;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is how the various shards strategies implements their approach
|
* This is how the various shards strategies implements their approach
|
||||||
*
|
*
|
||||||
|
|
@ -51,4 +71,5 @@ public class LinearShardStrategy extends ShardStrategy {
|
||||||
protected long nextShardSize() {
|
protected long nextShardSize() {
|
||||||
return nextShardSize;
|
return nextShardSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* User: aaron
|
||||||
|
* Date: Apr 7, 2009
|
||||||
|
* Time: 1:19:49 PM
|
||||||
|
*
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
*
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* @version 1.0
|
||||||
|
* @date Apr 7, 2009
|
||||||
|
* <p/>
|
||||||
|
* Class Shard
|
||||||
|
* <p/>
|
||||||
|
* This is the base class for shards. Right now it does little more then
|
||||||
|
* wrap GenomeLoc (actually nothing more), but it's good to have the class
|
||||||
|
* in place so it's easier to change guts later.
|
||||||
|
*/
|
||||||
|
public class Shard {
|
||||||
|
|
||||||
|
// currently our location
|
||||||
|
final GenomeLoc mLoc;
|
||||||
|
|
||||||
|
public Shard(GenomeLoc loc) {
|
||||||
|
this.mLoc = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return the genome location represented by this shard */
|
||||||
|
public GenomeLoc getGenomeLoc() {
|
||||||
|
return mLoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return a shard representing the passed in GenomeLoc
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static Shard toShard(GenomeLoc loc) {
|
||||||
|
return new Shard(loc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -29,11 +29,11 @@ import java.util.Iterator;
|
||||||
* <p/>
|
* <p/>
|
||||||
* The shard interface, which controls how data is divided
|
* The shard interface, which controls how data is divided
|
||||||
*/
|
*/
|
||||||
public abstract class ShardStrategy implements Iterator<GenomeLoc> {
|
public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard> {
|
||||||
|
|
||||||
// this stores the seq dictionary, which is a reference for the
|
// this stores the seq dictionary, which is a reference for the
|
||||||
// lengths and names of contigs, which you need to generate an iterative stratagy
|
// lengths and names of contigs, which you need to generate an iterative stratagy
|
||||||
protected static SAMSequenceDictionary dic = null;
|
protected final SAMSequenceDictionary dic;
|
||||||
|
|
||||||
// the current genome location
|
// the current genome location
|
||||||
protected GenomeLoc mLoc = null;
|
protected GenomeLoc mLoc = null;
|
||||||
|
|
@ -61,20 +61,53 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the copy constructor,
|
||||||
|
*
|
||||||
|
* @param old the old strategy
|
||||||
|
*/
|
||||||
|
ShardStrategy(ShardStrategy old) {
|
||||||
|
this.dic = old.dic;
|
||||||
|
this.mLoc = old.mLoc;
|
||||||
|
this.seqLoc = old.seqLoc;
|
||||||
|
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
||||||
|
this.nextContig = old.nextContig;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Abstract methods that each strategy has to implement
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the next shards size
|
||||||
|
*
|
||||||
|
* @param size adjust the next size to this
|
||||||
|
*/
|
||||||
|
public abstract void adjustNextShardSize(long size);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is how the various shards strategies implements their approach
|
* This is how the various shards strategies implements their approach
|
||||||
*
|
*
|
||||||
* @return the next shard size
|
* @return the next shard size
|
||||||
*/
|
*/
|
||||||
protected abstract long nextShardSize();
|
abstract long nextShardSize();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Concrete methods that each strategy does not have to implement
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the next shard, based on the return size of nextShardSize
|
* get the next shard, based on the return size of nextShardSize
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public GenomeLoc next() {
|
public Shard next() {
|
||||||
// lets get some background info on the problem
|
// lets get some background info on the problem
|
||||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||||
long proposedSize = nextShardSize();
|
long proposedSize = nextShardSize();
|
||||||
|
|
@ -83,7 +116,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
|
||||||
if (nextStart + proposedSize < length) {
|
if (nextStart + proposedSize < length) {
|
||||||
lastGenomeLocSize = proposedSize;
|
lastGenomeLocSize = proposedSize;
|
||||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
|
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
|
||||||
return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
|
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize));
|
||||||
}
|
}
|
||||||
// else we can't make it in the current location, we have to stitch one together
|
// else we can't make it in the current location, we have to stitch one together
|
||||||
else {
|
else {
|
||||||
|
|
@ -92,7 +125,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
|
||||||
|
|
||||||
// move to the next contig
|
// move to the next contig
|
||||||
jumpContig();
|
jumpContig();
|
||||||
return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize);
|
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -123,4 +156,16 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
|
throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* to be for-each(able), we must implement this method
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Iterator<Shard> iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
*/
|
*/
|
||||||
public class ShardStrategyFactory {
|
public class ShardStrategyFactory {
|
||||||
public enum SHATTER_STRATEGY {
|
public enum SHATTER_STRATEGY {
|
||||||
ADAPTIVE, LINEAR
|
LINEAR, EXPONENTIAL
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -42,25 +42,34 @@ public class ShardStrategyFactory {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
||||||
ShardStrategy d = null;
|
|
||||||
switch (strat) {
|
switch (strat) {
|
||||||
case ADAPTIVE:
|
case LINEAR:
|
||||||
d = new AdaptiveShardStrategy(dic, startingSize);
|
return new LinearShardStrategy(dic, startingSize);
|
||||||
|
case EXPONENTIAL:
|
||||||
|
return new ExpGrowthShardStrategy(dic, startingSize);
|
||||||
default:
|
default:
|
||||||
d = new LinearShardStrategy(dic, startingSize); // default
|
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
|
||||||
}
|
}
|
||||||
return d;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* if you know what you want
|
* convert between types
|
||||||
*
|
*
|
||||||
* @param dic the seq dictionary
|
* @param strat the strategy
|
||||||
* @param startingSize the starting size
|
* @param convertFrom convert from this strategy
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static public AdaptiveShardStrategy getAdaptiveShard(SAMSequenceDictionary dic, long startingSize) {
|
static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, ShardStrategy convertFrom) {
|
||||||
return new AdaptiveShardStrategy(dic, startingSize);
|
switch (strat) {
|
||||||
|
case LINEAR:
|
||||||
|
return new LinearShardStrategy(convertFrom);
|
||||||
|
case EXPONENTIAL:
|
||||||
|
return new ExpGrowthShardStrategy(convertFrom);
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue