diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
index 60049d374..6585c24b9 100755
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
@@ -29,7 +29,7 @@ import java.util.List;
*
* Interface Shard
*
- * The shard interface, which controls how data is divided
+ * The shard interface, which controls how data is divided for loci
*/
public abstract class LocusShardStrategy implements ShardStrategy {
@@ -66,7 +66,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*/
LocusShardStrategy(SAMSequenceDictionary dic) {
this.dic = dic;
- mLoc = new GenomeLoc(0,0,0);
+ mLoc = new GenomeLoc(0, 0, 0);
if (dic.getSequences().size() > 0) {
nextContig = true;
}
@@ -110,19 +110,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*/
/**
- * set the next shards size
- *
- * @param size adjust the next size to this
- */
- public abstract void adjustNextShardSize(long size);
-
-
- /**
- * This is how the various shards strategies implements their approach
+ * This is how the various shards strategies implements their approach, adjusting this value
*
* @return the next shard size
*/
- abstract long nextShardSize();
+ protected abstract long nextShardSize();
/**
@@ -132,8 +124,6 @@ public abstract class LocusShardStrategy implements ShardStrategy {
*/
-
-
/**
* get the next shard, based on the return size of nextShardSize
*
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java
index 30a3fd6ed..dd7908e0b 100644
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java
@@ -28,4 +28,13 @@ import java.util.Iterator;
* class, but not this will be an interface to accomidate read based sharding
*/
public interface ShardStrategy extends Iterator, Iterable {
+
+ /**
+ * set the next shards size
+ *
+ * @param size adjust the next size to this
+ */
+ public abstract void adjustNextShardSize(long size);
+
+
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
index bb773caaa..39af328f0 100644
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@@ -108,7 +108,7 @@ public class ShardStrategyFactory {
* @param readCount the number of reads to include in each shard
* @return
*/
- static public ShardStrategy shatterByReadCount(long readCount) {
+ static public ShardStrategy shatterByReadCount(SAMSequenceDictionary dic, long readCount) {
return null;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
new file mode 100755
index 000000000..0260de781
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
@@ -0,0 +1,116 @@
+package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
+
+import edu.mit.broad.picard.sam.SamFileHeaderMerger;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileReader;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * User: aaron
+ * Date: Mar 26, 2009
+ * Time: 2:36:16 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+public class SAMDataSource implements SimpleDataSource {
+ /** our SAM data files */
+ private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
+
+ /** our log, which we want to capture anything from this class */
+ protected static Logger logger = Logger.getLogger(SAMDataSource.class);
+
+ // are we set to locus mode or read mode for dividing
+ private boolean locusMode = false;
+
+ // How strict should we be with SAM/BAM parsing?
+ protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
+
+ // our list of readers
+ private final List samFileList = new ArrayList();
+
+ /**
+ * constructor, given a single sam file
+ *
+ * @param samFiles the list of sam files
+ */
+ public SAMDataSource(List samFiles) throws SimpleDataSourceLoadException {
+ for (String fileName : samFiles) {
+ File smFile = new File(fileName);
+ if (!smFile.canRead()) {
+ throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName);
+ }
+ samFileList.add(smFile);
+
+ }
+
+ //SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(samFileList, SORT_ORDER);
+ }
+
+
+ protected SAMFileReader initializeSAMFile(final File samFile) {
+ if (samFile.toString().endsWith(".list")) {
+ return null;
+ } else {
+ SAMFileReader samReader = new SAMFileReader(samFile, true);
+ samReader.setValidationStringency(strictness);
+
+ final SAMFileHeader header = samReader.getFileHeader();
+ logger.info(String.format("Sort order is: " + header.getSortOrder()));
+
+ return samReader;
+ }
+ }
+
+ /**
+ *
+ * seek
+ *
+ *
+ * @param location the genome location to extract data for
+ * @return an iterator for that region
+ */
+ public MergingSamRecordIterator2 seek(GenomeLoc location) throws SimpleDataSourceLoadException {
+
+ // right now this is pretty damn heavy, it copies the file list into a reader list every time
+ List lst = new ArrayList();
+ for (File f : this.samFileList) {
+ SAMFileReader reader = initializeSAMFile(f);
+ if (reader == null) {
+ throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + f);
+ }
+ lst.add(reader);
+ }
+
+ // now merge the headers
+ SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(lst, SORT_ORDER);
+
+ // make a merging iterator for this record
+ MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
+
+
+ // we do different things for locus and read modes
+ if (locusMode) {
+ iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop());
+ } else {
+ iter.queryContained(location.getContig(), (int) location.getStart(), (int) location.getStop());
+ }
+
+ // return the iterator
+ return iter;
+ }
+
+
+
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java
index c4ce0de6e..d07dea89b 100644
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java
@@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
-import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMBAMDataSource;
+import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
@@ -68,10 +68,10 @@ public class MicroManager {
SHARD_SIZE );
ReferenceIterator refIter = new ReferenceIterator(ref);
- SAMBAMDataSource dataSource = null;
+ SAMDataSource dataSource = null;
try {
- dataSource = new SAMBAMDataSource( Arrays.asList( new String[] { reads.getCanonicalPath() } ) );
+ dataSource = new SAMDataSource( Arrays.asList( new String[] { reads.getCanonicalPath() } ) );
}
catch( SimpleDataSourceLoadException ex ) {
throw new RuntimeException( ex );
diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
index 955aa9363..653b8facd 100755
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
@@ -87,7 +87,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
try {
- SAMBAMDataSource data = new SAMBAMDataSource(fl);
+ SAMDataSource data = new SAMDataSource(fl);
for (Shard sh : strat) {
int readCount = 0;
count++;
@@ -136,7 +136,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
int count = 0;
try {
- SAMBAMDataSource data = new SAMBAMDataSource(fl);
+ SAMDataSource data = new SAMDataSource(fl);
for (Shard sh : strat) {
int readCount = 0;
count++;
@@ -171,7 +171,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
logger.debug("Pile two:");
try {
- SAMBAMDataSource data = new SAMBAMDataSource(fl);
+ SAMDataSource data = new SAMDataSource(fl);
for (Shard sh : strat) {
int readCount = 0;
count++;
diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
index d613898d1..85b8ed066 100755
--- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java
@@ -6,7 +6,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
-import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMBAMDataSource;
+import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
@@ -83,7 +83,7 @@ public class BoundedReadIteratorTest extends BaseTest {
long shardReadCount = 0;
try {
- SAMBAMDataSource data = new SAMBAMDataSource(fl);
+ SAMDataSource data = new SAMDataSource(fl);
// make sure we have a shard
if (!strat.hasNext()) {