-Replaced adaptive and linear strategies with an adaptive linear strategy

-Added the exponential growth strategy
-Added factory code that allows you to transitition between strategies, so if you want to move from linear to exp at a point, and then back when you've hit a runtime threshold, it will take care of it for you.
-Changed the code to return a Shard instead of a GenomeLoc

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@313 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-04-07 19:37:38 +00:00
parent 6369d23b43
commit 08fddd43af
5 changed files with 230 additions and 22 deletions

View File

@ -0,0 +1,78 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
/**
*
* User: aaron
* Date: Apr 6, 2009
* Time: 8:23:19 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 6, 2009
* <p/>
* Class LinearShard
* <p/>
* A linear strategy, very very similar to adaptive
*/
public class ExpGrowthShardStrategy extends ShardStrategy {
// fixed size
private long baseSize = 100000;
private long currentExp = 1;
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param dic the seq dictionary
*/
ExpGrowthShardStrategy(SAMSequenceDictionary dic, long startSize) {
super(dic);
this.baseSize = startSize;
currentExp = 1;
}
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param strat the shatter to convert from
*/
ExpGrowthShardStrategy(ShardStrategy strat) {
super(strat);
this.baseSize = strat.nextShardSize();
currentExp = 1;
}
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public void adjustNextShardSize(long size) {
baseSize = size;
currentExp = 1;
}
/**
* This is how the various shards strategies implements their approach
*
* @return the next shard size
*/
protected long nextShardSize() {
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
}
}

View File

@ -6,7 +6,7 @@ import net.sf.samtools.SAMSequenceDictionary;
* *
* User: aaron * User: aaron
* Date: Apr 6, 2009 * Date: Apr 6, 2009
* Time: 8:23:19 PM * Time: 7:18:19 PM
* *
* The Broad Institute * The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
@ -24,13 +24,13 @@ import net.sf.samtools.SAMSequenceDictionary;
* @version 1.0 * @version 1.0
* @date Apr 6, 2009 * @date Apr 6, 2009
* <p/> * <p/>
* Class LinearShard * Class AdaptiveShard
* <p/> * <p/>
* A descriptions should go here. Blame aaron if it's missing. * allows you to change the sharding length as you traverse
*/ */
public class LinearShardStrategy extends ShardStrategy { class LinearShardStrategy extends ShardStrategy {
// fixed size // default the next size to 100,000
private long nextShardSize = 100000; private long nextShardSize = 100000;
/** /**
@ -43,6 +43,26 @@ public class LinearShardStrategy extends ShardStrategy {
this.nextShardSize = startSize; this.nextShardSize = startSize;
} }
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param strat the shatter to convert from
*/
LinearShardStrategy(ShardStrategy strat) {
super(strat);
this.nextShardSize = strat.nextShardSize();
}
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public void adjustNextShardSize(long size) {
nextShardSize = size;
}
/** /**
* This is how the various shards strategies implements their approach * This is how the various shards strategies implements their approach
* *
@ -51,4 +71,5 @@ public class LinearShardStrategy extends ShardStrategy {
protected long nextShardSize() { protected long nextShardSize() {
return nextShardSize; return nextShardSize;
} }
} }

View File

@ -0,0 +1,55 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
*
* User: aaron
* Date: Apr 7, 2009
* Time: 1:19:49 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 7, 2009
* <p/>
* Class Shard
* <p/>
* This is the base class for shards. Right now it does little more then
* wrap GenomeLoc (actually nothing more), but it's good to have the class
* in place so it's easier to change guts later.
*/
public class Shard {
// currently our location
final GenomeLoc mLoc;
public Shard(GenomeLoc loc) {
this.mLoc = loc;
}
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() {
return mLoc;
}
/**
* return a shard representing the passed in GenomeLoc
*
* @return
*/
public static Shard toShard(GenomeLoc loc) {
return new Shard(loc);
}
}

View File

@ -29,11 +29,11 @@ import java.util.Iterator;
* <p/> * <p/>
* The shard interface, which controls how data is divided * The shard interface, which controls how data is divided
*/ */
public abstract class ShardStrategy implements Iterator<GenomeLoc> { public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard> {
// this stores the seq dictionary, which is a reference for the // this stores the seq dictionary, which is a reference for the
// lengths and names of contigs, which you need to generate an iterative stratagy // lengths and names of contigs, which you need to generate an iterative stratagy
protected static SAMSequenceDictionary dic = null; protected final SAMSequenceDictionary dic;
// the current genome location // the current genome location
protected GenomeLoc mLoc = null; protected GenomeLoc mLoc = null;
@ -61,20 +61,53 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
} }
} }
/**
* the copy constructor,
*
* @param old the old strategy
*/
ShardStrategy(ShardStrategy old) {
this.dic = old.dic;
this.mLoc = old.mLoc;
this.seqLoc = old.seqLoc;
this.lastGenomeLocSize = old.lastGenomeLocSize;
this.nextContig = old.nextContig;
}
/**
*
* Abstract methods that each strategy has to implement
*
*/
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public abstract void adjustNextShardSize(long size);
/** /**
* This is how the various shards strategies implements their approach * This is how the various shards strategies implements their approach
* *
* @return the next shard size * @return the next shard size
*/ */
protected abstract long nextShardSize(); abstract long nextShardSize();
/**
*
* Concrete methods that each strategy does not have to implement
*
*/
/** /**
* get the next shard, based on the return size of nextShardSize * get the next shard, based on the return size of nextShardSize
* *
* @return * @return
*/ */
public GenomeLoc next() { public Shard next() {
// lets get some background info on the problem // lets get some background info on the problem
long length = dic.getSequence(seqLoc).getSequenceLength(); long length = dic.getSequence(seqLoc).getSequenceLength();
long proposedSize = nextShardSize(); long proposedSize = nextShardSize();
@ -83,7 +116,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
if (nextStart + proposedSize < length) { if (nextStart + proposedSize < length) {
lastGenomeLocSize = proposedSize; lastGenomeLocSize = proposedSize;
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize); mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize); return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize));
} }
// else we can't make it in the current location, we have to stitch one together // else we can't make it in the current location, we have to stitch one together
else { else {
@ -92,7 +125,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
// move to the next contig // move to the next contig
jumpContig(); jumpContig();
return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize); return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
} }
} }
@ -123,4 +156,16 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
public void remove() { public void remove() {
throw new UnsupportedOperationException("Can not remove records from a shard iterator!"); throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
} }
/**
* to be for-each(able), we must implement this method
*
* @return
*/
public Iterator<Shard> iterator() {
return this;
}
} }

View File

@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
*/ */
public class ShardStrategyFactory { public class ShardStrategyFactory {
public enum SHATTER_STRATEGY { public enum SHATTER_STRATEGY {
ADAPTIVE, LINEAR LINEAR, EXPONENTIAL
} }
/** /**
@ -42,25 +42,34 @@ public class ShardStrategyFactory {
* @return * @return
*/ */
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) { static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
ShardStrategy d = null;
switch (strat) { switch (strat) {
case ADAPTIVE: case LINEAR:
d = new AdaptiveShardStrategy(dic, startingSize); return new LinearShardStrategy(dic, startingSize);
case EXPONENTIAL:
return new ExpGrowthShardStrategy(dic, startingSize);
default: default:
d = new LinearShardStrategy(dic, startingSize); // default throw new RuntimeException("Strategy: " + strat + " isn't implemented");
} }
return d;
} }
/** /**
* if you know what you want * convert between types
* *
* @param dic the seq dictionary * @param strat the strategy
* @param startingSize the starting size * @param convertFrom convert from this strategy
* @return * @return
*/ */
static public AdaptiveShardStrategy getAdaptiveShard(SAMSequenceDictionary dic, long startingSize) { static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, ShardStrategy convertFrom) {
return new AdaptiveShardStrategy(dic, startingSize); switch (strat) {
case LINEAR:
return new LinearShardStrategy(convertFrom);
case EXPONENTIAL:
return new ExpGrowthShardStrategy(convertFrom);
default:
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
}
} }
} }