diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index f661a8803..88f6ae024 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -13,9 +14,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.lang.reflect.Type; +import java.util.Iterator; import java.util.List; +import java.util.Scanner; /** * User: hanna * Date: May 21, 2009 @@ -68,19 +72,24 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { boolean flashbackData ) { this.fileDescriptor = fileDescriptor; this.builder = builder; + + // TODO: Unify the two blocks of code below by creating a ReferenceOrderedDataPool base class of a coherent type (not RMDTrack for one and SeekableIterator for the other). if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) { iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor, builder, referenceSequenceDictionary, genomeLocParser); - header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader(); + this.header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader(); this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary(); } else { - RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); - header = track.getHeader(); - this.sequenceDictionary = track.getSequenceDictionary(); - iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData); + iteratorPool = new ReferenceOrderedDataPool(fileDescriptor, + builder, + referenceSequenceDictionary, + genomeLocParser, + flashbackData); + this.header = ((ReferenceOrderedDataPool)iteratorPool).getHeader(); + this.sequenceDictionary = ((ReferenceOrderedDataPool)iteratorPool).getSequenceDictionary(); } } @@ -165,12 +174,54 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * A pool of reference-ordered data iterators. */ class ReferenceOrderedDataPool extends ResourcePool { - private final RMDTrack track; + // the reference-ordered data itself. + private final RMDTriplet fileDescriptor; + + // our tribble track builder + private final RMDTrackBuilder builder; + + /** + * The header from this RMD, if present. + */ + private final Object header; + + /** + * The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's. + */ + private final SAMSequenceDictionary sequenceDictionary; + boolean flashbackData = false; - public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) { + public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) { super(sequenceDictionary,genomeLocParser); - this.track = track; + this.fileDescriptor = fileDescriptor; + this.builder = builder; this.flashbackData = flashbackData; + + // prepopulate one RMDTrack + LocationAwareSeekableRODIterator iterator = createNewResource(); + this.addNewResource(iterator); + + // Pull the proper header and sequence dictionary from the prepopulated track. + //this.header = iterator.getHeader(); + //this.sequenceDictionary = iterator.getSequenceDictionary(); + this.header = null; + this.sequenceDictionary = null; + } + + /** + * Gets the header used by this resource pool. + * @return Header used by this resource pool. + */ + public Object getHeader() { + return header; + } + + /** + * Gets the sequence dictionary built into the ROD index file. + * @return Sequence dictionary from the index file. + */ + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; } /** @@ -179,7 +230,9 @@ class ReferenceOrderedDataPool extends ResourcePool 0) + throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface"); + LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator()); return (flashbackData) ? new FlashBackIterator(iter) : iter; } @@ -281,8 +334,8 @@ class ReferenceOrderedQueryDataPool extends ResourcePool availableResources) { - for (RMDTrack reader : availableResources) - if (reader != null) return reader; + for (RMDTrack reader : availableResources) + if (reader != null) return reader; return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 0a3168b56..3b9f8243f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -114,11 +114,11 @@ public class RMDTrack { } public CloseableIterator query(GenomeLoc interval) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName()); } public CloseableIterator query(GenomeLoc interval, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName()); } public CloseableIterator query(String contig, int start, int stop) throws IOException { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index 555e429b8..c720cf4a3 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -19,6 +19,7 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; +import java.util.Collections; import net.sf.picard.reference.IndexedFastaSequenceFile; /** @@ -40,7 +41,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; public class ReferenceOrderedDataPoolUnitTest extends BaseTest { - private RMDTrack rod = null; + private RMDTriplet triplet = null; + private RMDTrackBuilder builder = null; private IndexedFastaSequenceFile seq; private GenomeLocParser genomeLocParser; @@ -62,13 +64,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @BeforeMethod public void setUp() { String fileName = testDir + "TabularDataTest.dat"; - RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); - rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE)); + triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE); + builder = new RMDTrackBuilder(Collections.singletonList(triplet),seq.getSequenceDictionary(),genomeLocParser,null); } @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -89,17 +91,17 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); - LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); + ReferenceOrderedQueryDataPool iteratorPool = new ReferenceOrderedQueryDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser); + LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new EntireStream() ); // Create a new iterator at position 2. - LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); + LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new EntireStream() ); Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect"); // Test out-of-order access: first iterator2, then iterator1. - // Ugh...first call to a region needs to be a seek. + // Ugh...first call to a region needs to be a seek. TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject(); assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.get("COL1").equals("C")); @@ -139,7 +141,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -171,40 +173,4 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect"); } - - @Test - public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); - LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); - - Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); - Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect"); - - TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject(); - assertTrue(datum.getLocation().equals(testSite3)); - assertTrue(datum.get("COL1").equals("F")); - assertTrue(datum.get("COL2").equals("G")); - assertTrue(datum.get("COL3").equals("H")); - - iteratorPool.release(iterator); - - // Create another iterator after the current iterator. - iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) ); - - // Make sure that the previously acquired iterator was reused. - Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect"); - Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect"); - - datum = (TableFeature)iterator.next().get(0).getUnderlyingObject(); - assertTrue(datum.getLocation().equals(testSite1)); - assertTrue(datum.get("COL1").equals("A")); - assertTrue(datum.get("COL2").equals("B")); - assertTrue(datum.get("COL3").equals("C")); - - iteratorPool.release(iterator); - - Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect"); - Assert.assertEquals(iteratorPool.numAvailableIterators(), 2, "Number of available iterators in the pool is incorrect"); - } - }