diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 8801875cc..062c05b31 100755
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -35,6 +35,8 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
+import org.broadinstitute.sting.gatk.datasources.shards.Shard;
+import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@@ -571,11 +573,26 @@ public class GenomeAnalysisEngine {
ReferenceSequenceFile drivingDataSource,
GenomeLocSortedSet intervals,
Integer maxIterations) {
- long SHARD_SIZE = 100000L;
+ if(!readsDataSource.hasIndex()) {
+ if(!getArguments().unsafe || intervals != null)
+ throw new StingException("The GATK cannot currently process unindexed BAM files");
+
+ Shard.ShardType shardType;
+ if(walker instanceof LocusWalker)
+ shardType = Shard.ShardType.LOCUS;
+ else if(walker instanceof ReadWalker || walker instanceof DuplicateWalker)
+ shardType = Shard.ShardType.READ;
+ else
+ throw new StingException("The GATK cannot currently process unindexed BAM files");
+
+ return new MonolithicShardStrategy(shardType);
+ }
ShardStrategy shardStrategy = null;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
+ long SHARD_SIZE = 100000L;
+
if (walker instanceof LocusWalker) {
if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java
index b1eae4a0a..ad9ed191b 100755
--- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java
@@ -97,10 +97,11 @@ public class LocusReferenceView extends ReferenceView {
windowStop = 0;
}
- long expandedStart = getWindowStart( bounds );
- long expandedStop = getWindowStop( bounds );
-
- initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
+ if(bounds != null) {
+ long expandedStart = getWindowStart( bounds );
+ long expandedStop = getWindowStop( bounds );
+ initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
+ }
}
/**
@@ -120,7 +121,16 @@ public class LocusReferenceView extends ReferenceView {
validateLocation( genomeLoc );
GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
- char[] bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)(window.getStart() - getWindowStart(bounds)), (int)window.size() ).toCharArray();
+ char[] bases = null;
+
+ if(bounds != null) {
+ bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)(window.getStart() - getWindowStart(bounds)), (int)window.size() ).toCharArray();
+ }
+ else {
+ if(referenceSequence == null || referenceSequence.getContigIndex() != genomeLoc.getContigIndex())
+ referenceSequence = reference.getSequence(genomeLoc.getContig());
+ bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)window.getStart()-1, (int)window.size()).toCharArray();
+ }
return new ReferenceContext( genomeLoc, window, bases );
}
@@ -139,11 +149,10 @@ public class LocusReferenceView extends ReferenceView {
* @param genomeLoc location to verify.
*/
private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException {
- //
if( !genomeLoc.isSingleBP() )
throw new InvalidPositionException(
String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop()));
- if( !bounds.containsP(genomeLoc) )
+ if( bounds != null && !bounds.containsP(genomeLoc) )
throw new InvalidPositionException(
String.format("Requested position %s not within interval %s", genomeLoc, bounds));
}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
index 4e02f777d..576fb32eb 100755
--- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
@@ -4,7 +4,6 @@ import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
@@ -112,7 +111,7 @@ public abstract class LocusView extends LocusIterator implements View {
* @return True if another locus context is bounded by this shard.
*/
protected boolean hasNextLocus() {
- return nextLocus != null && !nextLocus.getLocation().isPast(shard.getGenomeLoc());
+ return nextLocus != null && (shard.getGenomeLoc() == null || !nextLocus.getLocation().isPast(shard.getGenomeLoc()));
}
/**
@@ -121,7 +120,7 @@ public abstract class LocusView extends LocusIterator implements View {
* @throw NoSuchElementException if the next element is missing.
*/
protected AlignmentContext nextLocus() {
- if( nextLocus == null || nextLocus.getLocation().isPast(shard.getGenomeLoc()) )
+ if( nextLocus == null || (shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc())) )
throw new NoSuchElementException("No more elements remain in locus context queue.");
// Cache the current and apply filtering.
@@ -132,7 +131,7 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next();
if( sourceInfo.getDownsampleToCoverage() != null )
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
- if( nextLocus.getLocation().isPast(shard.getGenomeLoc()) )
+ if( shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc()) )
nextLocus = null;
}
else
@@ -149,13 +148,16 @@ public abstract class LocusView extends LocusIterator implements View {
if( loci.hasNext() )
nextLocus = loci.next();
- // Iterate past cruft at the beginning to the first locus in the shard.
- while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() )
- nextLocus = loci.next();
+ // If the location of this shard is available, trim the data stream to match the shard.
+ if(shard.getGenomeLoc() != null) {
+ // Iterate past cruft at the beginning to the first locus in the shard.
+ while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() )
+ nextLocus = loci.next();
- // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
- if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) )
- nextLocus = null;
+ // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
+ if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) )
+ nextLocus = null;
+ }
}
/**
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java
index d993016d5..b49350379 100755
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java
@@ -61,4 +61,13 @@ public class LocusShard implements Shard {
public static LocusShard toShard(GenomeLoc loc) {
return new LocusShard(loc);
}
+
+ /**
+ * String representation of this shard.
+ * @return A string representation of the boundaries of this shard.
+ */
+ @Override
+ public String toString() {
+ return mLoc.toString();
+ }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java
new file mode 100644
index 000000000..e84d91faf
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java
@@ -0,0 +1,51 @@
+package org.broadinstitute.sting.gatk.datasources.shards;
+
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+/**
+ * A single, monolithic shard bridging all available data.
+ * @author mhanna
+ * @version 0.1
+ */
+public class MonolithicShard implements Shard {
+ /**
+ * What type of MonolithicShard is this? Read or locus?
+ */
+ private ShardType shardType;
+
+ /**
+ * Creates a new monolithic shard of the given type.
+ * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
+ */
+ public MonolithicShard(ShardType shardType) {
+ if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
+ throw new StingException("Invalid shard type for monolithic shard: " + shardType);
+ this.shardType = shardType;
+ }
+
+ /**
+ * Returns null, indicating that (in this case) the entire genome is covered.
+ * @return null.
+ */
+ public GenomeLoc getGenomeLoc() {
+ return null;
+ }
+
+ /**
+ * Reports the type of monolithic shard.
+ * @return Type of monolithic shard.
+ */
+ public ShardType getShardType() {
+ return shardType;
+ }
+
+ /**
+ * String representation of this shard.
+ * @return "entire genome".
+ */
+ @Override
+ public String toString() {
+ return "entire genome";
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java
new file mode 100644
index 000000000..b90f3d147
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java
@@ -0,0 +1,76 @@
+package org.broadinstitute.sting.gatk.datasources.shards;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Create a giant shard representing all the data in the input BAM(s).
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class MonolithicShardStrategy implements ShardStrategy {
+ /**
+ * The single shard associated with this sharding strategy.
+ */
+ private MonolithicShard shard;
+
+ /**
+ * Create a new shard strategy for shards of the given type.
+ * @param shardType The shard type.
+ */
+ public MonolithicShardStrategy(Shard.ShardType shardType) {
+ shard = new MonolithicShard(shardType);
+ }
+
+ /**
+ * Convenience for using in a foreach loop. Will NOT create a new, reset instance of the iterator;
+ * will only return another copy of the active iterator.
+ * @return A copy of this.
+ */
+ public Iterator
* seek
@@ -210,7 +222,11 @@ public class SAMDataSource implements SimpleDataSource {
*
* @return an iterator for that region
*/
- private StingSAMIterator seekRead( ReadShard shard ) throws SimpleDataSourceLoadException {
+ private StingSAMIterator seekRead( Shard shard ) throws SimpleDataSourceLoadException {
+ if(shard instanceof MonolithicShard)
+ return createIterator(new EntireStream());
+
+ ReadShard readShard = (ReadShard)shard;
StingSAMIterator iter = null;
// If there are no entries in the sequence dictionary, there can't possibly be any unmapped reads. Force state to 'unmapped'.
@@ -221,22 +237,22 @@ public class SAMDataSource implements SimpleDataSource {
if (lastReadPos == null) {
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = createIterator(new MappedStreamSegment(lastReadPos));
- return InitialReadIterator(shard.getSize(), iter);
+ return InitialReadIterator(readShard.getSize(), iter);
} else {
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
- iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
+ iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
}
if (intoUnmappedReads && !includeUnmappedReads)
- shard.signalDone();
+ readShard.signalDone();
}
if (intoUnmappedReads && includeUnmappedReads) {
if (iter != null)
iter.close();
- iter = toUnmappedReads(shard.getSize());
+ iter = toUnmappedReads(readShard.getSize());
if (!iter.hasNext())
- shard.signalDone();
+ readShard.signalDone();
}
return iter;
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java
index b2ac4ea71..b10c73adf 100644
--- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java
@@ -44,18 +44,24 @@ import java.util.List;
* @version 0.1
*/
class SAMResourcePool extends ResourcePool