-Replaced adaptive and linear strategies with an adaptive linear strategy

-Added the exponential growth strategy -Added factory code that allows you to transitition between strategies, so if you want to move from linear to exp at a point, and then back when you've hit a runtime threshold, it will take care of it for you. -Changed the code to return a Shard instead of a GenomeLoc git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@313 348d0f76-0448-11de-a6fe-93d51630548a
2009-04-07 19:37:38 +00:00 · 2009-04-07 19:37:38 +00:00 · 08fddd43af
parent 6369d23b43
commit 08fddd43af
5 changed files with 230 additions and 22 deletions
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthShardStrategy.java
@ -0,0 +1,78 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import net.sf.samtools.SAMSequenceDictionary;
+
+/**
+ *
+ * User: aaron
+ * Date: Apr 6, 2009
+ * Time: 8:23:19 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date Apr 6, 2009
+ * <p/>
+ * Class LinearShard
+ * <p/>
+ * A linear strategy, very very similar to adaptive
+ */
+public class ExpGrowthShardStrategy extends ShardStrategy {
+
+    // fixed size
+    private long baseSize = 100000;
+    private long currentExp = 1;
+
+    /**
+     * the constructor, taking a seq dictionary to parse out contigs
+     *
+     * @param dic the seq dictionary
+     */
+    ExpGrowthShardStrategy(SAMSequenceDictionary dic, long startSize) {
+        super(dic);
+        this.baseSize = startSize;
+        currentExp = 1;
+    }
+
+    /**
+     * the constructor, taking a seq dictionary to parse out contigs
+     *
+     * @param strat the shatter to convert from
+     */
+    ExpGrowthShardStrategy(ShardStrategy strat) {
+        super(strat);
+        this.baseSize = strat.nextShardSize();
+        currentExp = 1;
+    }
+
+    /**
+     * set the next shards size
+     *
+     * @param size adjust the next size to this
+     */
+    public void adjustNextShardSize(long size) {
+        baseSize = size;
+        currentExp = 1;
+    }
+
+    /**
+     * This is how the various shards strategies implements their approach
+     *
+     * @return the next shard size
+     */
+    protected long nextShardSize() {
+        return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
+    }
+
+}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearShardStrategy.java
@ -6,7 +6,7 @@ import net.sf.samtools.SAMSequenceDictionary;
 *
 * User: aaron
 * Date: Apr 6, 2009
- * Time: 8:23:19 PM
+ * Time: 7:18:19 PM
 *
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
@ -24,13 +24,13 @@ import net.sf.samtools.SAMSequenceDictionary;
 * @version 1.0
 * @date Apr 6, 2009
 * <p/>
- * Class LinearShard
+ * Class AdaptiveShard
 * <p/>
- * A descriptions should go here. Blame aaron if it's missing.
+ * allows you to change the sharding length as you traverse
 */
-public class LinearShardStrategy extends ShardStrategy {
+class LinearShardStrategy extends ShardStrategy {

-    // fixed size
+    // default the next size to 100,000
    private long nextShardSize = 100000;

    /**
@ -43,6 +43,26 @@ public class LinearShardStrategy extends ShardStrategy {
        this.nextShardSize = startSize;
    }

+    /**
+     * the constructor, taking a seq dictionary to parse out contigs
+     *
+     * @param strat the shatter to convert from
+     */
+    LinearShardStrategy(ShardStrategy strat) {
+        super(strat);
+        this.nextShardSize = strat.nextShardSize();
+    }
+
+
+    /**
+     * set the next shards size
+     *
+     * @param size adjust the next size to this
+     */
+    public void adjustNextShardSize(long size) {
+        nextShardSize = size;
+    }
+
    /**
     * This is how the various shards strategies implements their approach
     *
@ -51,4 +71,5 @@ public class LinearShardStrategy extends ShardStrategy {
    protected long nextShardSize() {
        return nextShardSize;
    }
+
 }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java
@ -0,0 +1,55 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+/**
+ *
+ * User: aaron
+ * Date: Apr 7, 2009
+ * Time: 1:19:49 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date Apr 7, 2009
+ * <p/>
+ * Class Shard
+ * <p/>
+ * This is the base class for shards.  Right now it does little more then
+ * wrap GenomeLoc (actually nothing more), but it's good to have the class
+ * in place so it's easier to change guts later.
+ */
+public class Shard {
+
+    // currently our location
+    final GenomeLoc mLoc;
+
+    public Shard(GenomeLoc loc) {
+        this.mLoc = loc;
+    }
+
+    /** @return the genome location represented by this shard */
+    public GenomeLoc getGenomeLoc() {
+        return mLoc;
+    }
+
+    /**
+     * return a shard representing the passed in GenomeLoc
+     *
+     * @return
+     */
+    public static Shard toShard(GenomeLoc loc) {
+        return new Shard(loc);
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java
@ -29,11 +29,11 @@ import java.util.Iterator;
 * <p/>
 * The shard interface, which controls how data is divided
 */
-public abstract class ShardStrategy implements Iterator<GenomeLoc> {
+public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard> {

    // this stores the seq dictionary, which is a reference for the
    // lengths and names of contigs, which you need to generate an iterative stratagy
-    protected static SAMSequenceDictionary dic = null;
+    protected final SAMSequenceDictionary dic;

    // the current genome location
    protected GenomeLoc mLoc = null;
@ -61,20 +61,53 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
        }
    }

+    /**
+     * the copy constructor,
+     *
+     * @param old the old strategy
+     */
+    ShardStrategy(ShardStrategy old) {
+        this.dic = old.dic;
+        this.mLoc = old.mLoc;
+        this.seqLoc = old.seqLoc;
+        this.lastGenomeLocSize = old.lastGenomeLocSize;
+        this.nextContig = old.nextContig;
+    }
+
+    /**
+     *
+     * Abstract methods that each strategy has to implement
+     *
+     */
+
+    /**
+     * set the next shards size
+     *
+     * @param size adjust the next size to this
+     */
+    public abstract void adjustNextShardSize(long size);
+

    /**
     * This is how the various shards strategies implements their approach
     *
     * @return the next shard size
     */
-    protected abstract long nextShardSize();
+    abstract long nextShardSize();
+
+
+    /**
+     *
+     * Concrete methods that each strategy does not have to implement
+     *
+     */

    /**
     * get the next shard, based on the return size of nextShardSize
     *
     * @return
     */
-    public GenomeLoc next() {
+    public Shard next() {
        // lets get some background info on the problem
        long length = dic.getSequence(seqLoc).getSequenceLength();
        long proposedSize = nextShardSize();
@ -83,7 +116,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
        if (nextStart + proposedSize < length) {
            lastGenomeLocSize = proposedSize;
            mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
-            return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
+            return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize));
        }
        // else we can't make it in the current location, we have to stitch one together
        else {
@ -92,7 +125,7 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {

            // move to the next contig
            jumpContig();
-            return new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize);
+            return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
        }

    }
@ -123,4 +156,16 @@ public abstract class ShardStrategy implements Iterator<GenomeLoc> {
    public void remove() {
        throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
    }
+
+
+    /**
+     * to be for-each(able), we must implement this method
+     *
+     * @return
+     */
+    public Iterator<Shard> iterator() {
+        return this;
+    }
+
+
 }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
 */
 public class ShardStrategyFactory {
    public enum SHATTER_STRATEGY {
-        ADAPTIVE, LINEAR
+        LINEAR, EXPONENTIAL
    }

    /**
@ -42,25 +42,34 @@ public class ShardStrategyFactory {
     * @return
     */
    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
-        ShardStrategy d = null;
        switch (strat) {
-            case ADAPTIVE:
-                d = new AdaptiveShardStrategy(dic, startingSize);
+            case LINEAR:
+                return new LinearShardStrategy(dic, startingSize);
+            case EXPONENTIAL:
+                return new ExpGrowthShardStrategy(dic, startingSize);
            default:
-                d = new LinearShardStrategy(dic, startingSize); // default
+                throw new RuntimeException("Strategy: " + strat + " isn't implemented");
        }
-        return d;
+
    }

    /**
-     * if you know what you want
+     * convert between types
     *
-     * @param dic          the seq dictionary
-     * @param startingSize the starting size
+     * @param strat       the strategy
+     * @param convertFrom convert from this strategy
     * @return
     */
-    static public AdaptiveShardStrategy getAdaptiveShard(SAMSequenceDictionary dic, long startingSize) {
-        return new AdaptiveShardStrategy(dic, startingSize);
+    static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, ShardStrategy convertFrom) {
+        switch (strat) {
+            case LINEAR:
+                return new LinearShardStrategy(convertFrom);
+            case EXPONENTIAL:
+                return new ExpGrowthShardStrategy(convertFrom);
+            default:
+                throw new RuntimeException("Strategy: " + strat + " isn't implemented");
+
+        }
    }

 }