Bug fixes in streaming in Tribble data via /dev/stdin.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-01-05 02:43:04 +00:00
parent 23dbc5ccf3
commit 0982d35f5b
3 changed files with 77 additions and 58 deletions

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -13,9 +14,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Type; import java.lang.reflect.Type;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Scanner;
/** /**
* User: hanna * User: hanna
* Date: May 21, 2009 * Date: May 21, 2009
@ -68,19 +72,24 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
boolean flashbackData ) { boolean flashbackData ) {
this.fileDescriptor = fileDescriptor; this.fileDescriptor = fileDescriptor;
this.builder = builder; this.builder = builder;
// TODO: Unify the two blocks of code below by creating a ReferenceOrderedDataPool base class of a coherent type (not RMDTrack for one and SeekableIterator for the other).
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) { if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor, iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
builder, builder,
referenceSequenceDictionary, referenceSequenceDictionary,
genomeLocParser); genomeLocParser);
header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader(); this.header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary(); this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
} }
else { else {
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); iteratorPool = new ReferenceOrderedDataPool(fileDescriptor,
header = track.getHeader(); builder,
this.sequenceDictionary = track.getSequenceDictionary(); referenceSequenceDictionary,
iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData); genomeLocParser,
flashbackData);
this.header = ((ReferenceOrderedDataPool)iteratorPool).getHeader();
this.sequenceDictionary = ((ReferenceOrderedDataPool)iteratorPool).getSequenceDictionary();
} }
} }
@ -165,12 +174,54 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* A pool of reference-ordered data iterators. * A pool of reference-ordered data iterators.
*/ */
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> { class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
private final RMDTrack track; // the reference-ordered data itself.
private final RMDTriplet fileDescriptor;
// our tribble track builder
private final RMDTrackBuilder builder;
/**
* The header from this RMD, if present.
*/
private final Object header;
/**
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
*/
private final SAMSequenceDictionary sequenceDictionary;
boolean flashbackData = false; boolean flashbackData = false;
public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) { public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
super(sequenceDictionary,genomeLocParser); super(sequenceDictionary,genomeLocParser);
this.track = track; this.fileDescriptor = fileDescriptor;
this.builder = builder;
this.flashbackData = flashbackData; this.flashbackData = flashbackData;
// prepopulate one RMDTrack
LocationAwareSeekableRODIterator iterator = createNewResource();
this.addNewResource(iterator);
// Pull the proper header and sequence dictionary from the prepopulated track.
//this.header = iterator.getHeader();
//this.sequenceDictionary = iterator.getSequenceDictionary();
this.header = null;
this.sequenceDictionary = null;
}
/**
* Gets the header used by this resource pool.
* @return Header used by this resource pool.
*/
public Object getHeader() {
return header;
}
/**
* Gets the sequence dictionary built into the ROD index file.
* @return Sequence dictionary from the index file.
*/
public SAMSequenceDictionary getSequenceDictionary() {
return sequenceDictionary;
} }
/** /**
@ -179,7 +230,9 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
* @return The newly created resource. * @return The newly created resource.
*/ */
public LocationAwareSeekableRODIterator createNewResource() { public LocationAwareSeekableRODIterator createNewResource() {
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator()); if(numIterators() > 0)
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter; return (flashbackData) ? new FlashBackIterator(iter) : iter;
} }
@ -281,8 +334,8 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
@Override @Override
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) { protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
for (RMDTrack reader : availableResources) for (RMDTrack reader : availableResources)
if (reader != null) return reader; if (reader != null) return reader;
return null; return null;
} }

View File

@ -114,11 +114,11 @@ public class RMDTrack {
} }
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException { public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
} }
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException { public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
} }
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException { public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {

View File

@ -19,6 +19,7 @@ import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.Collections;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
/** /**
@ -40,7 +41,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
public class ReferenceOrderedDataPoolUnitTest extends BaseTest { public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
private RMDTrack rod = null; private RMDTriplet triplet = null;
private RMDTrackBuilder builder = null;
private IndexedFastaSequenceFile seq; private IndexedFastaSequenceFile seq;
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
@ -62,13 +64,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@BeforeMethod @BeforeMethod
public void setUp() { public void setUp() {
String fileName = testDir + "TabularDataTest.dat"; String fileName = testDir + "TabularDataTest.dat";
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null); triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE);
rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE)); builder = new RMDTrackBuilder(Collections.singletonList(triplet),seq.getSequenceDictionary(),genomeLocParser,null);
} }
@Test @Test
public void testCreateSingleIterator() { public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); ResourcePool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
@ -89,17 +91,17 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@Test @Test
public void testCreateMultipleIterators() { public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); ReferenceOrderedQueryDataPool iteratorPool = new ReferenceOrderedQueryDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser);
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new EntireStream() );
// Create a new iterator at position 2. // Create a new iterator at position 2.
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new EntireStream() );
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
// Test out-of-order access: first iterator2, then iterator1. // Test out-of-order access: first iterator2, then iterator1.
// Ugh...first call to a region needs to be a seek. // Ugh...first call to a region needs to be a seek.
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject(); TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite2)); assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C")); assertTrue(datum.get("COL1").equals("C"));
@ -139,7 +141,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@Test @Test
public void testIteratorConservation() { public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
@ -171,40 +173,4 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect"); Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
} }
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
assertTrue(datum.get("COL3").equals("H"));
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
assertTrue(datum.get("COL3").equals("C"));
iteratorPool.release(iterator);
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 2, "Number of available iterators in the pool is incorrect");
}
} }