diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthShardStrategy.java new file mode 100644 index 000000000..07a11fbb8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthShardStrategy.java @@ -0,0 +1,78 @@ +package org.broadinstitute.sting.gatk.dataSources.shards; + +import net.sf.samtools.SAMSequenceDictionary; + +/** + * + * User: aaron + * Date: Apr 6, 2009 + * Time: 8:23:19 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Apr 6, 2009 + *

+ * Class LinearShard + *

+ * A linear strategy, very very similar to adaptive + */ +public class ExpGrowthShardStrategy extends ShardStrategy { + + // fixed size + private long baseSize = 100000; + private long currentExp = 1; + + /** + * the constructor, taking a seq dictionary to parse out contigs + * + * @param dic the seq dictionary + */ + ExpGrowthShardStrategy(SAMSequenceDictionary dic, long startSize) { + super(dic); + this.baseSize = startSize; + currentExp = 1; + } + + /** + * the constructor, taking a seq dictionary to parse out contigs + * + * @param strat the shatter to convert from + */ + ExpGrowthShardStrategy(ShardStrategy strat) { + super(strat); + this.baseSize = strat.nextShardSize(); + currentExp = 1; + } + + /** + * set the next shards size + * + * @param size adjust the next size to this + */ + public void adjustNextShardSize(long size) { + baseSize = size; + currentExp = 1; + } + + /** + * This is how the various shards strategies implements their approach + * + * @return the next shard size + */ + protected long nextShardSize() { + return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp)); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java index c550d1157..c435fb36e 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java @@ -6,7 +6,7 @@ import net.sf.samtools.SAMSequenceDictionary; * * User: aaron * Date: Apr 6, 2009 - * Time: 8:23:19 PM + * Time: 7:18:19 PM * * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT @@ -24,13 +24,13 @@ import net.sf.samtools.SAMSequenceDictionary; * @version 1.0 * @date Apr 6, 2009 *

- * Class LinearShard + * Class AdaptiveShard *

- * A descriptions should go here. Blame aaron if it's missing. + * allows you to change the sharding length as you traverse */ -public class LinearShardStrategy extends ShardStrategy { +class LinearShardStrategy extends ShardStrategy { - // fixed size + // default the next size to 100,000 private long nextShardSize = 100000; /** @@ -43,6 +43,26 @@ public class LinearShardStrategy extends ShardStrategy { this.nextShardSize = startSize; } + /** + * the constructor, taking a seq dictionary to parse out contigs + * + * @param strat the shatter to convert from + */ + LinearShardStrategy(ShardStrategy strat) { + super(strat); + this.nextShardSize = strat.nextShardSize(); + } + + + /** + * set the next shards size + * + * @param size adjust the next size to this + */ + public void adjustNextShardSize(long size) { + nextShardSize = size; + } + /** * This is how the various shards strategies implements their approach * @@ -51,4 +71,5 @@ public class LinearShardStrategy extends ShardStrategy { protected long nextShardSize() { return nextShardSize; } + } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java new file mode 100644 index 000000000..d23375d68 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.gatk.dataSources.shards; + +import org.broadinstitute.sting.utils.GenomeLoc; + +/** + * + * User: aaron + * Date: Apr 7, 2009 + * Time: 1:19:49 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Apr 7, 2009 + *

+ * Class Shard + *

+ * This is the base class for shards. Right now it does little more then + * wrap GenomeLoc (actually nothing more), but it's good to have the class + * in place so it's easier to change guts later. + */ +public class Shard { + + // currently our location + final GenomeLoc mLoc; + + public Shard(GenomeLoc loc) { + this.mLoc = loc; + } + + /** @return the genome location represented by this shard */ + public GenomeLoc getGenomeLoc() { + return mLoc; + } + + /** + * return a shard representing the passed in GenomeLoc + * + * @return + */ + public static Shard toShard(GenomeLoc loc) { + return new Shard(loc); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java index 3246a56ad..3ba7e46c6 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java @@ -29,11 +29,11 @@ import java.util.Iterator; *

* The shard interface, which controls how data is divided */ -public abstract class ShardStrategy implements Iterator { +public abstract class ShardStrategy implements Iterator, Iterable { // this stores the seq dictionary, which is a reference for the // lengths and names of contigs, which you need to generate an iterative stratagy - protected static SAMSequenceDictionary dic = null; + protected final SAMSequenceDictionary dic; // the current genome location protected GenomeLoc mLoc = null; @@ -61,20 +61,53 @@ public abstract class ShardStrategy implements Iterator { } } + /** + * the copy constructor, + * + * @param old the old strategy + */ + ShardStrategy(ShardStrategy old) { + this.dic = old.dic; + this.mLoc = old.mLoc; + this.seqLoc = old.seqLoc; + this.lastGenomeLocSize = old.lastGenomeLocSize; + this.nextContig = old.nextContig; + } + + /** + * + * Abstract methods that each strategy has to implement + * + */ + + /** + * set the next shards size + * + * @param size adjust the next size to this + */ + public abstract void adjustNextShardSize(long size); + /** * This is how the various shards strategies implements their approach * * @return the next shard size */ - protected abstract long nextShardSize(); + abstract long nextShardSize(); + + + /** + * + * Concrete methods that each strategy does not have to implement + * + */ /** * get the next shard, based on the return size of nextShardSize * * @return */ - public GenomeLoc next() { + public Shard next() { // lets get some background info on the problem long length = dic.getSequence(seqLoc).getSequenceLength(); long proposedSize = nextShardSize(); @@ -83,7 +116,7 @@ public abstract class ShardStrategy implements Iterator { if (nextStart + proposedSize < length) { lastGenomeLocSize = proposedSize; mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize); - return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize); + return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize)); } // else we can't make it in the current location, we have to stitch one together else { @@ -92,7 +125,7 @@ public abstract class ShardStrategy implements Iterator { // move to the next contig jumpContig(); - return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize); + return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize)); } } @@ -123,4 +156,16 @@ public abstract class ShardStrategy implements Iterator { public void remove() { throw new UnsupportedOperationException("Can not remove records from a shard iterator!"); } + + + /** + * to be for-each(able), we must implement this method + * + * @return + */ + public Iterator iterator() { + return this; + } + + } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java index 407604a08..41c608026 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java @@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary; */ public class ShardStrategyFactory { public enum SHATTER_STRATEGY { - ADAPTIVE, LINEAR + LINEAR, EXPONENTIAL } /** @@ -42,25 +42,34 @@ public class ShardStrategyFactory { * @return */ static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) { - ShardStrategy d = null; switch (strat) { - case ADAPTIVE: - d = new AdaptiveShardStrategy(dic, startingSize); + case LINEAR: + return new LinearShardStrategy(dic, startingSize); + case EXPONENTIAL: + return new ExpGrowthShardStrategy(dic, startingSize); default: - d = new LinearShardStrategy(dic, startingSize); // default + throw new RuntimeException("Strategy: " + strat + " isn't implemented"); } - return d; + } /** - * if you know what you want + * convert between types * - * @param dic the seq dictionary - * @param startingSize the starting size + * @param strat the strategy + * @param convertFrom convert from this strategy * @return */ - static public AdaptiveShardStrategy getAdaptiveShard(SAMSequenceDictionary dic, long startingSize) { - return new AdaptiveShardStrategy(dic, startingSize); + static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, ShardStrategy convertFrom) { + switch (strat) { + case LINEAR: + return new LinearShardStrategy(convertFrom); + case EXPONENTIAL: + return new ExpGrowthShardStrategy(convertFrom); + default: + throw new RuntimeException("Strategy: " + strat + " isn't implemented"); + + } } }