diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
index de8d47993..f80142ce5 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
@@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List;
-/**
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * User: aaron
- * Date: Apr 6, 2009
- * Time: 8:23:19 PM
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShard.java
similarity index 81%
rename from java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShard.java
rename to java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShard.java
index 21c70bb78..7530cb672 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShard.java
@@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc;
@@ -31,17 +30,18 @@ import org.broadinstitute.sting.utils.GenomeLoc;
/**
* @author aaron
- *
- * Class IntervalReadShard
- *
- * This is the read shard that knowns about genomic intervals
+ *
+ * Class IntervalShard
+ *
+ * the base interval shard. All interval shards are generally the same,
+ * but must return their ShardType individually.
*/
-public class IntervalReadShard implements Shard {
+public class IntervalShard implements Shard {
/** a collection of genomic locations to interate over */
private GenomeLoc mSet;
- IntervalReadShard(GenomeLoc myLocation) {
+ IntervalShard(GenomeLoc myLocation) {
mSet = myLocation.clone();
}
@@ -55,7 +55,7 @@ public class IntervalReadShard implements Shard {
*
* @return READ, indicating the shard type
*/
- public ShardType getShardType() {
- return Shard.ShardType.READ;
+ public Shard.ShardType getShardType() {
+ return ShardType.INTERVAL;
}
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
index 5d91c0319..a11791439 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
@@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List;
-/**
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * User: aaron
- * Date: Apr 6, 2009
- * Time: 7:18:19 PM
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategy.java
deleted file mode 100755
index c7f5a6291..000000000
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategy.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package org.broadinstitute.sting.gatk.dataSources.shards;
-
-import net.sf.samtools.SAMSequenceDictionary;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-
-import java.util.List;
-
-/**
- *
- * User: aaron
- * Date: May 14, 2009
- * Time: 3:28:50 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date May 14, 2009
- *
- * Class LocusWindowShardStrategy
- *
- * This function knows how to shard on a genome loc boundry. It guarantees
- * a one-to-one mapping between a GenomeLoc and shard.
- */
-public class LocusIntervalShardStrategy extends LocusShardStrategy {
- /**
- * the constructor, taking a seq dictionary to parse out contigs
- *
- * @param dic the seq dictionary
- * @param intervals file
- */
- LocusIntervalShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
- super(dic, intervals);
- }
-
- /**
- * This is how the various shards strategies implements their approach, adjusting this value
- *
- * @return the next shard size
- */
- protected long nextShardSize() {
- long nextSize = this.getCurrentInterval().getStop() - this.getCurrentInterval().getStart();
- return nextSize;
- }
-
- /**
- * set the next shards size
- *
- * @param size adjust the next size to this
- */
- public void adjustNextShardSize(long size) {
- //To change body of implemented methods use File | Settings | File Templates.
- }
-
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
index 084dc2946..c663da43e 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
@@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import java.util.Iterator;
-import java.util.List;
/**
*
* User: aaron
@@ -159,11 +158,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
if (loc.getStop() - loc.getStart() <= proposedSize) {
intervals.removeRegion(loc);
- return new IntervalReadShard(loc);
+ return new IntervalShard(loc);
} else {
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
intervals.removeRegion(subLoc);
- return new IntervalReadShard(subLoc);
+ return new IntervalShard(subLoc);
}
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategy.java
deleted file mode 100755
index 0dbde857d..000000000
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategy.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.broadinstitute.sting.gatk.dataSources.shards;
-
-import net.sf.samtools.SAMSequenceDictionary;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.StingException;
-
-import java.util.Iterator;
-import java.util.List;
-
-/**
- *
- * User: aaron
- * Date: May 21, 2009
- * Time: 4:13:53 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- *
- * Class ReadByIntervalShardStrategy
- *
- * Impliments the sharding strategy for reads, given a list
- * of genomic locations. Shards returned will be bounded by the interval,
- * but each provided interval may be split into a number of smaller regions.
- */
-public class ReadIntervalShardStrategy implements ShardStrategy {
-
- /** our storage of the genomic locations they'd like to shard over */
- private final GenomeLocSortedSet regions;
-
- /** their prefered size of the shard, we can modify this based on what we see in the shards */
- private long size;
-
- /** the sequence dictionary we'll use to lookup the contigs */
- private final SAMSequenceDictionary dict;
-
- /**
- * change the recommended shard size for the next shard we generate. The code will do it's
- * best to respect this value, but there are no guarantees.
- *
- * @param size the next recommended shard size.
- */
- public void adjustNextShardSize(long size) {
- this.size = size;
- }
-
- /**
- * the default constructor
- *
- * @param dict the sequence dictionary to use
- * @param size the read count to iterate over
- */
- ReadIntervalShardStrategy(SAMSequenceDictionary dict, long size, GenomeLocSortedSet locations) {
- if (locations == null || locations.isEmpty()) {
- throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty.");
- }
- this.regions = locations.clone();
- this.size = size;
- this.dict = dict;
- }
-
- /**
- * returns true if there are additional shards
- * @return false if we're done processing shards
- */
- public boolean hasNext() {
- return (!regions.isEmpty());
- }
-
- /**
- * gets the next Shard
- * @return the next shard
- */
- public Shard next() {
- if ((this.regions == null) || (regions.isEmpty())) {
- throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty in next() function.");
- }
-
- // get the first region in the list
- GenomeLoc loc = regions.iterator().next();
-
- if (loc.getStop() - loc.getStart() <= this.size) {
- regions.removeRegion(loc);
- return new IntervalReadShard(loc);
- } else {
- GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(),loc.getStart(),loc.getStart()+size-1);
- regions.removeRegion(subLoc);
- return new IntervalReadShard(subLoc);
- }
-
- }
-
- /**
- * we don't support the remove command
- */
- public void remove() {
- throw new UnsupportedOperationException("ShardStrategies don't support remove()");
- }
-
- /**
- * makes the ReadIntervalShard iterable, i.e. usable in a for loop.
- * @return
- */
- public Iterator iterator() {
- return this;
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
index 3e4ebc62e..ffa2f2685 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
@@ -4,16 +4,29 @@ import net.sf.samtools.SAMSequenceDictionary;
import java.util.Iterator;
-/**
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
@@ -60,7 +73,7 @@ public class ReadShardStrategy implements ShardStrategy {
}
public Shard next() {
- return new ReadShard((int)readCount, this);
+ return new ReadShard((int)readCount, this);
}
public void remove() {
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java
index 0c94b6e37..bed3ffae1 100644
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/Shard.java
@@ -30,7 +30,7 @@ import java.io.Serializable;
*/
public interface Shard extends Serializable {
enum ShardType {
- READ, LOCUS
+ READ, LOCUS, INTERVAL
}
/** @return the genome location represented by this shard */
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
index 3677bcb75..f9b4b3364 100644
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@@ -2,12 +2,9 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import java.util.List;
-
/**
*
* User: aaron
@@ -81,10 +78,9 @@ public class ShardStrategyFactory {
return new LinearLocusShardStrategy(dic, startingSize, lst);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
- case READS:
- return new ReadIntervalShardStrategy(dic, startingSize, lst);
case INTERVAL:
- return new LocusIntervalShardStrategy(dic, lst);
+ case READS:
+ return new IntervalShardStrategy(startingSize, lst);
default:
throw new StingException("Strategy: " + strat + " isn't implemented");
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
index cb9db9df2..5571028a3 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
@@ -148,14 +148,16 @@ public class SAMDataSource implements SimpleDataSource {
reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getSafetyChecking());
- } else if (shard.getShardType() == Shard.ShardType.LOCUS) {
+ } else if (shard.getShardType() == Shard.ShardType.LOCUS ||
+ shard.getShardType() == Shard.ShardType.INTERVAL) {
iterator = seekLocus(shard.getGenomeLoc());
iterator = TraversalEngine.applyDecoratingIterators(false,
iterator,
reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getSafetyChecking());
- } else {
+ }
+ else {
throw new StingException("seek: Unknown shard type");
}
diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 90bd909a3..c85bff529 100755
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -38,11 +38,9 @@ import java.util.ArrayList;
* To change this template use File | Settings | File Templates.
*/
-/**
- * Shards and schedules data in manageable chunks.
- */
+/** Shards and schedules data in manageable chunks. */
public abstract class MicroScheduler {
- private static long SHARD_SIZE = 100000L;
+ private static long SHARD_SIZE = 100000L;
protected static Logger logger = Logger.getLogger(MicroScheduler.class);
@@ -59,14 +57,13 @@ public abstract class MicroScheduler {
* @param nThreadsToUse Number of threads to utilize.
* @return The best-fit microscheduler.
*/
- public static MicroScheduler create( Walker walker, Reads reads, File ref, List> rods, int nThreadsToUse ) {
- if( walker instanceof TreeReducible && nThreadsToUse > 1 ) {
+ public static MicroScheduler create(Walker walker, Reads reads, File ref, List> rods, int nThreadsToUse) {
+ if (walker instanceof TreeReducible && nThreadsToUse > 1) {
logger.info("Creating hierarchical microscheduler");
- return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse );
- }
- else {
+ return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse);
+ } else {
logger.info("Creating linear microscheduler");
- return new LinearMicroScheduler( walker, reads, ref, rods );
+ return new LinearMicroScheduler(walker, reads, ref, rods);
}
}
@@ -75,16 +72,16 @@ public abstract class MicroScheduler {
* @param reads The reads.
* @param refFile File pointer to the reference.
*/
- protected MicroScheduler( Walker walker, Reads reads, File refFile, List> rods ) {
+ protected MicroScheduler(Walker walker, Reads reads, File refFile, List> rods) {
if (walker instanceof ReadWalker) {
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
} else {
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
}
- this.reads = getReadsDataSource( reads );
- this.reference = openReferenceSequenceFile( refFile );
- this.rods = getReferenceOrderedDataSources( rods );
+ this.reads = getReadsDataSource(reads);
+ this.reference = openReferenceSequenceFile(refFile);
+ this.rods = getReferenceOrderedDataSources(rods);
}
/**
@@ -102,7 +99,7 @@ public abstract class MicroScheduler {
* @param intervals A list of intervals over which to walk. Null for whole dataset.
* @return the return type of the walker
*/
- public abstract Object execute( Walker walker, GenomeLocSortedSet intervals);
+ public abstract Object execute(Walker walker, GenomeLocSortedSet intervals);
/**
* Get the sharding strategy given a driving data source.
@@ -111,35 +108,42 @@ public abstract class MicroScheduler {
* @param intervals Intervals to use when limiting sharding.
* @return Sharding strategy for this driving data source.
*/
- protected ShardStrategy getShardStrategy( Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals ) {
+ protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
ShardStrategy shardStrategy = null;
+ ShardStrategyFactory.SHATTER_STRATEGY shardType;
+ if (walker instanceof LocusWalker) {
+ if (intervals != null) {
+ shardType = (walker.isReduceByInterval()) ?
+ ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
+ ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
- if( walker instanceof LocusWalker ) {
- if( intervals != null ) {
- ShardStrategyFactory.SHATTER_STRATEGY shardType = (walker.isReduceByInterval()) ?
- ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
- ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
+ shardStrategy = ShardStrategyFactory.shatter(shardType,
+ drivingDataSource.getSequenceDictionary(),
+ SHARD_SIZE,
+ intervals);
+ } else
+ shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
+ drivingDataSource.getSequenceDictionary(),
+ SHARD_SIZE);
- shardStrategy = ShardStrategyFactory.shatter( shardType,
- drivingDataSource.getSequenceDictionary(),
- SHARD_SIZE,
- intervals );
+ } else if (walker instanceof ReadWalker) {
+
+ shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
+
+ if (intervals != null) {
+ shardStrategy = ShardStrategyFactory.shatter(shardType,
+ drivingDataSource.getSequenceDictionary(),
+ SHARD_SIZE,
+ intervals);
+ } else {
+ shardStrategy = ShardStrategyFactory.shatter(shardType,
+ drivingDataSource.getSequenceDictionary(),
+ SHARD_SIZE);
}
- else
- shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
- drivingDataSource.getSequenceDictionary(),
- SHARD_SIZE );
-
- }
- else if( walker instanceof ReadWalker ) {
- shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.READS,
- drivingDataSource.getSequenceDictionary(),
- SHARD_SIZE );
- }
- else
+ } else
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
- return shardStrategy;
+ return shardStrategy;
}
/**
@@ -147,20 +151,20 @@ public abstract class MicroScheduler {
* @param shard The section of data to view.
* @return An accessor for all the data in this shard.
*/
- protected ShardDataProvider getShardDataProvider( Shard shard ) {
- return new ShardDataProvider( shard, reads, reference, rods );
+ protected ShardDataProvider getShardDataProvider(Shard shard) {
+ return new ShardDataProvider(shard, reads, reference, rods);
}
/**
* Gets a data source for the given set of reads.
* @return A data source for the given set of reads.
*/
- private SAMDataSource getReadsDataSource( Reads reads ) {
+ private SAMDataSource getReadsDataSource(Reads reads) {
// By reference traversals are happy with no reads. Make sure that case is handled.
- if( reads.getReadsFiles().size() == 0 )
+ if (reads.getReadsFiles().size() == 0)
return null;
- SAMDataSource dataSource = new SAMDataSource( reads );
+ SAMDataSource dataSource = new SAMDataSource(reads);
// Side effect: initialize the traversal engine with reads data.
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
@@ -174,10 +178,10 @@ public abstract class MicroScheduler {
* Open the reference-ordered data sources.
* @return A list of reference-ordered data sources.
*/
- private List getReferenceOrderedDataSources( List> rods) {
+ private List getReferenceOrderedDataSources(List> rods) {
List dataSources = new ArrayList();
- for( ReferenceOrderedData extends ReferenceOrderedDatum> rod: rods )
- dataSources.add( new ReferenceOrderedDataSource(rod) );
+ for (ReferenceOrderedData extends ReferenceOrderedDatum> rod : rods)
+ dataSources.add(new ReferenceOrderedDataSource(rod));
return dataSources;
}
@@ -186,12 +190,12 @@ public abstract class MicroScheduler {
* @param refFile Handle to a reference sequence file. Non-null.
* @return A thread-safe file wrapper.
*/
- private IndexedFastaSequenceFile openReferenceSequenceFile( File refFile ) {
+ private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
}
- catch( FileNotFoundException ex ) {
+ catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
GenomeLoc.setupRefContigOrdering(ref);
diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
index 7f7c842bb..4646de4d9 100755
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.providers.ReadView;
import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
+import org.broadinstitute.sting.gatk.dataSources.shards.IntervalShard;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@@ -79,7 +80,12 @@ public class TraverseReads extends TraversalEngine {
ShardDataProvider dataProvider,
T sum) {
- logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize()));
+ if (shard instanceof ReadShard) {
+ logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize()));
+ } else if (shard instanceof IntervalShard) {
+ logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((IntervalShard) shard).getGenomeLoc()));
+ }
+
if (!(walker instanceof ReadWalker))
throw new IllegalArgumentException("Walker isn't a read walker!");
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
similarity index 77%
rename from java/test/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategyTest.java
rename to java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
index 0b6351ba7..4330ea46a 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
@@ -44,7 +44,7 @@ import net.sf.samtools.SAMFileHeader;
*
* Tests the ReadIntervalShardStrategy class
*/
-public class ReadIntervalShardStrategyTest extends BaseTest {
+public class IntervalShardStrategyTest extends BaseTest {
private GenomeLocSortedSet mSortedSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
@@ -60,19 +60,21 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
@Test(expected = StingException.class)
public void testExceptionOnEmpty() {
- ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+ IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
}
@Test
public void testSingleChromosomeFunctionality() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
mSortedSet.add(loc);
- ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+ IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
int counter = 0;
+ Shard d = null;
while (strat.hasNext()) {
- Shard d = strat.next();
+ d = strat.next();
counter++;
}
+ assertTrue(d instanceof IntervalShard);
assertEquals(10, counter);
}
@@ -82,12 +84,14 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
- ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+ IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
int counter = 0;
+ Shard d = null;
while (strat.hasNext()) {
- Shard d = strat.next();
+ d = strat.next();
counter++;
}
+ assertTrue(d instanceof IntervalShard);
assertEquals(50, counter);
}
@@ -97,7 +101,7 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
- ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 789, mSortedSet);
+ IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet);
int counter = 0;
while (strat.hasNext()) {
Shard d = strat.next();
@@ -113,11 +117,28 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
assertEquals(10, counter);
}
+
+ @Test
+ public void testInfiniteShardSize() {
+ for (int x = 0; x < 5; x++) {
+ GenomeLoc loc = new GenomeLoc(x, 1, 1000);
+ mSortedSet.add(loc);
+ }
+ IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet);
+ int counter = 0;
+ while (strat.hasNext()) {
+ Shard d = strat.next();
+ assertEquals(1000, d.getGenomeLoc().getStop());
+ counter++;
+ }
+ assertEquals(5, counter);
+ }
+
@Test(expected = UnsupportedOperationException.class)
public void testRemove() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
mSortedSet.add(loc);
- ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+ IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
strat.remove();
}
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShardTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardTest.java
similarity index 85%
rename from java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShardTest.java
rename to java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardTest.java
index ed17fd4b9..d727661b7 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShardTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardTest.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
import org.junit.Before;
import org.junit.Test;
@@ -42,9 +41,9 @@ import net.sf.samtools.SAMFileHeader;
*
* Tests for the IntervalReadShard class.
*/
-public class IntervalReadShardTest extends BaseTest {
+public class IntervalShardTest extends BaseTest {
- private IntervalReadShard shard = null;
+ private IntervalShard intervalShard = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1;
@@ -59,15 +58,15 @@ public class IntervalReadShardTest extends BaseTest {
@Test
public void simpleReturn() {
GenomeLoc loc = new GenomeLoc(1, 1, 100);
- shard = new IntervalReadShard(loc);
- assertTrue(shard.getGenomeLoc().equals(loc));
+ intervalShard = new IntervalShard(loc);
+ assertTrue(intervalShard.getGenomeLoc().equals(loc));
}
@Test
public void ensureNotReference() {
GenomeLoc loc = new GenomeLoc(1, 1, 100);
- shard = new IntervalReadShard(loc);
- assertTrue(shard.getGenomeLoc() != loc && shard.getGenomeLoc().equals(loc));
+ intervalShard = new IntervalShard(loc);
+ assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
}
}
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategyTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategyTest.java
similarity index 56%
rename from java/test/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategyTest.java
rename to java/test/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategyTest.java
index 69c55d46e..00b45e731 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategyTest.java
@@ -8,6 +8,7 @@ import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMSequenceDictionary;
/*
@@ -38,42 +39,65 @@ import net.sf.samtools.SAMFileHeader;
/**
* @author aaron
*
- * Class LocusIntervalShardStrategyTest
+ * Class LocusShardStrategyTest
*
- * Tests the LocusIntervalShardStrategy class.
+ * Test for the Locus Shard Strategy
*/
-public class LocusIntervalShardStrategyTest extends BaseTest {
+public class LinearLocusShardStrategyTest extends BaseTest {
+
private GenomeLocSortedSet mSortedSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1;
private static final int CHROMOSOME_SIZE = 1000;
- private LocusIntervalShardStrategy strat = null;
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
- mSortedSet = new GenomeLocSortedSet();
}
@Test
- public void testOneToOneness() {
- for (int x = 0; x < 100; x++) {
- GenomeLoc loc = new GenomeLoc(0,(x*10)+1, (x*10)+8);
- mSortedSet.add(loc);
- }
- strat = new LocusIntervalShardStrategy(header.getSequenceDictionary(),mSortedSet);
+ public void testSetup() {
+ LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
int counter = 0;
- while (strat.hasNext()) {
+ while(strat.hasNext()) {
+ Shard d = strat.next();
+ assertTrue(d instanceof LocusShard);
+ assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499);
++counter;
- GenomeLoc loc = strat.next().getGenomeLoc();
- long stop = loc.getStop();
- long start = loc.getStart();
- long length = stop - start;
- assertTrue(length == 7);
}
- assertTrue(counter == 100);
-
+ assertTrue(counter == 10);
}
+ @Test
+ public void testAdjustSize() {
+ LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
+ strat.adjustNextShardSize(1000);
+ int counter = 0;
+ while(strat.hasNext()) {
+ Shard d = strat.next();
+ assertTrue(d instanceof LocusShard);
+ assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999);
+ ++counter;
+ }
+ assertTrue(counter == 5);
+ }
+
+
+ @Test
+ public void testUnevenSplit() {
+ LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
+ int counter = 0;
+ while(strat.hasNext()) {
+ Shard d = strat.next();
+ assertTrue(d instanceof LocusShard);
+ if (counter % 2 == 0) {
+ assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599);
+ } else {
+ assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399);
+ }
+ ++counter;
+ }
+ assertTrue(counter == 10);
+ }
}
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
index af1eccd5c..8141a4660 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
@@ -2,20 +2,14 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.fail;
-import net.sf.samtools.SAMSequenceDictionary;
-import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
-import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.junit.*;
import static org.junit.Assert.assertTrue;
-import java.io.File;
-import java.util.ArrayList;
-
/**
*
* User: aaron
@@ -63,7 +57,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
GenomeLoc l = new GenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
- assertTrue(st instanceof ReadIntervalShardStrategy);
+ assertTrue(st instanceof IntervalShardStrategy);
}
@Test