diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java
new file mode 100755
index 000000000..70838af86
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategy.java
@@ -0,0 +1,65 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+import java.util.List;
+
+/**
+ *
+ * User: aaron
+ * Date: May 14, 2009
+ * Time: 3:28:50 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date May 14, 2009
+ *
+ * Class LocusWindowShardStrategy
+ *
+ * This function knows how to shard on a genome loc boundry. It guarantee's
+ * a one-to-one mapping between a GenomeLoc and hte
+ */
+public class IntervalShardStrategy extends LocusShardStrategy {
+ /**
+ * the constructor, taking a seq dictionary to parse out contigs
+ *
+ * @param dic the seq dictionary
+ * @param intervals file
+ */
+ IntervalShardStrategy(SAMSequenceDictionary dic, List intervals) {
+ super(dic, intervals);
+ }
+
+ /**
+ * This is how the various shards strategies implements their approach, adjusting this value
+ *
+ * @return the next shard size
+ */
+ protected long nextShardSize() {
+ long nextSize = this.getCurrentInterval().getStop() - this.getCurrentInterval().getStart();
+ return nextSize;
+ }
+
+ /**
+ * set the next shards size
+ *
+ * @param size adjust the next size to this
+ */
+ public void adjustNextShardSize(long size) {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
index 5ddfaebaa..5907fd7f4 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
@@ -143,7 +143,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
if (this.intervals == null) {
return nonIntervaledNext(length, proposedSize, nextStart);
} else {
- return intervaledNext(length, proposedSize, nextStart);
+ return intervaledNext(proposedSize, nextStart);
}
}
@@ -151,16 +151,15 @@ public abstract class LocusShardStrategy implements ShardStrategy {
/**
* Interval based next processing
*
- * @param length the length of the sequence
* @param proposedSize the proposed size
* @param nextStart where we start from
* @return the shard that represents this data
*/
- private Shard intervaledNext(long length, long proposedSize, long nextStart) {
+ private Shard intervaledNext(long proposedSize, long nextStart) {
// get the current genome location
GenomeLoc loc = intervals.get(currentInterval);
- if (nextStart + proposedSize > loc.getStop()) {
- // we need to move the next interval
+ if (nextStart + proposedSize >= loc.getStop()) {
+ // we need to get the rest of the current loc in a shard (return it), and move to the next location
proposedSize = loc.getStop() - nextStart;
lastGenomeLocSize = proposedSize;
@@ -265,4 +264,16 @@ public abstract class LocusShardStrategy implements ShardStrategy {
return this;
}
+ /**
+ * this allows a shard strategy to get the current interval. It's kind of a hack, but for the
+ * locusWindowShardStrategy it was the best approach.
+ * @return
+ */
+ protected GenomeLoc getCurrentInterval() {
+ if (this.intervals == null || currentInterval < 0) {
+ return null;
+ }
+ return intervals.get(currentInterval);
+ }
+
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
index a5b0d3745..c4d1b63c6 100644
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
import java.util.List;
@@ -35,7 +36,7 @@ import java.util.List;
*/
public class ShardStrategyFactory {
public enum SHATTER_STRATEGY {
- LINEAR, EXPONENTIAL, READS
+ LINEAR, EXPONENTIAL, READS, INTERVAL
}
/** our log, which we want to capture anything from this class */
@@ -59,7 +60,7 @@ public class ShardStrategyFactory {
case READS:
return new ReadShardStrategy(dic, startingSize);
default:
- throw new RuntimeException("Strategy: " + strat + " isn't implemented");
+ throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
}
}
@@ -78,7 +79,7 @@ public class ShardStrategyFactory {
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(convertFrom);
default:
- throw new RuntimeException("Strategy: " + strat + " isn't implemented");
+ throw new StingException("Strategy: " + strat + " isn't implemented");
}
}
@@ -100,15 +101,17 @@ public class ShardStrategyFactory {
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
case READS:
// return new ReadShardStrategy(dic, startingSize);
- throw new RuntimeException("Strategy: " + strat + " isn't implemented for intervals");
+ throw new StingException("Strategy: " + strat + " isn't implemented for intervals");
+ case INTERVAL:
+ return new IntervalShardStrategy(dic, lst);
default:
- throw new RuntimeException("Strategy: " + strat + " isn't implemented");
+ throw new StingException("Strategy: " + strat + " isn't implemented");
}
}
/**
- * convert between types
+ * setup a reads shattering strategy
*
* @param readCount the number of reads to include in each shard
* @return
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
new file mode 100755
index 000000000..38ffca578
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
@@ -0,0 +1,142 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
+import org.junit.*;
+
+import java.io.File;
+import java.util.ArrayList;
+
+/**
+ *
+ * User: aaron
+ * Date: May 14, 2009
+ * Time: 3:52:57 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date May 14, 2009
+ *
+ * Class LocusWindowShardStrategyTest
+ *
+ * LocusWindowShardStrategy tests
+ */
+public class IntervalShardStrategyTest extends BaseTest {
+
+ private static FastaSequenceFile2 seq;
+
+ /**
+ * This function (because of the @BeforeClass tag) gets called only once ever,
+ * before any tests are run
+ */
+ @BeforeClass
+ public static void doBeforeAnyTests() {
+ seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
+ }
+
+ /**
+ * Tears down the test fixture after each call.
+ *
+ * Called after every test case method.
+ */
+ @AfterClass
+ public static void doAfterAllTests() {
+
+ }
+
+ /**
+ * This function does the setup of our parser, before each method call.
+ *
+ * Called before every test case method.
+ */
+ @Before
+ public void doForEachTest() {
+
+ }
+
+ /**
+ * Tears down the test fixture after each call.
+ *
+ * Called after every test case method.
+ */
+ @After
+ public void undoForEachTest() {
+
+ }
+
+ /** Tests that we got a string parameter in correctly */
+ @Test
+ public void testIntervalGenomeCycle() throws InterruptedException {
+ logger.warn("Executing testIntervalGenomeCycle");
+
+ SAMSequenceDictionary dic = seq.getSequenceDictionary();
+
+
+ // setup a list of genome locs that represent the whole file
+ SAMSequenceRecord s = dic.getSequence(1);
+ int stop = s.getSequenceLength();
+ int size = 10000;
+ int location = 1;
+
+ GenomeLoc.setupRefContigOrdering(dic);
+ // keep track of the number of genome locs we build
+ int genomeLocs = 0;
+ ArrayList locations = new ArrayList();
+ try {
+ while (location + size < stop) {
+ // lets make up some fake locations
+ GenomeLoc gl = new GenomeLoc(s.getSequenceName(), location, location + size - 1);
+ logger.debug("loc = " + location);
+
+ // let's move the location up, with a size space
+ location += (size * 2);
+
+ // add our current location to the list
+ locations.add(gl);
+
+ // add another genome location
+ ++genomeLocs;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ logger.debug("Location count = " + genomeLocs);
+ ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, seq.getSequenceDictionary(), 0, locations);
+ int shardCount = 0;
+ try {
+ for (Shard sh : strategy) {
+ GenomeLoc l = sh.getGenomeLoc();
+ GenomeLoc truth = locations.get(shardCount);
+ if (l.compareTo(truth) != 0) {
+ String truthStr = truth.getContig() + ":" + truth.getStart() + ":" + truth.getStop();
+ String lStr = l.getContig() + ":" + l.getStart() + ":" + l.getStop();
+ fail("Genome loc " + truthStr + " doesn't equal " + lStr);
+ }
+ shardCount++;
+ }
+ assertEquals(shardCount, genomeLocs);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("testIntervalGenomeCycle: ne exception expected");
+ }
+ }
+
+}
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
index a2d69ed62..88f4d069f 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
@@ -124,11 +124,10 @@ public class ShardStrategyFactoryTest extends BaseTest {
int size = 10000;
int location = 1;
GenomeLoc.setupRefContigOrdering(dic);
- logger.debug("done to sleep");
// keep track of the number of genome locs we build
int genomeLocs = 0;
ArrayList locations = new ArrayList();
- logger.debug("done to sleep2");
+
try {
while (location + size < stop) {
logger.debug("s = " + s.getSequenceName() + " " + location + " " + size);