Bug fixes in streaming in Tribble data via /dev/stdin.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-01-05 02:43:04 +00:00
parent 23dbc5ccf3
commit 0982d35f5b
3 changed files with 77 additions and 58 deletions

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -13,9 +14,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
/**
* User: hanna
* Date: May 21, 2009
@ -68,19 +72,24 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
boolean flashbackData ) {
this.fileDescriptor = fileDescriptor;
this.builder = builder;
// TODO: Unify the two blocks of code below by creating a ReferenceOrderedDataPool base class of a coherent type (not RMDTrack for one and SeekableIterator for the other).
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
builder,
referenceSequenceDictionary,
genomeLocParser);
header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
this.header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
}
else {
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
header = track.getHeader();
this.sequenceDictionary = track.getSequenceDictionary();
iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData);
iteratorPool = new ReferenceOrderedDataPool(fileDescriptor,
builder,
referenceSequenceDictionary,
genomeLocParser,
flashbackData);
this.header = ((ReferenceOrderedDataPool)iteratorPool).getHeader();
this.sequenceDictionary = ((ReferenceOrderedDataPool)iteratorPool).getSequenceDictionary();
}
}
@ -165,12 +174,54 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* A pool of reference-ordered data iterators.
*/
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
private final RMDTrack track;
// the reference-ordered data itself.
private final RMDTriplet fileDescriptor;
// our tribble track builder
private final RMDTrackBuilder builder;
/**
* The header from this RMD, if present.
*/
private final Object header;
/**
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
*/
private final SAMSequenceDictionary sequenceDictionary;
boolean flashbackData = false;
public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) {
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
super(sequenceDictionary,genomeLocParser);
this.track = track;
this.fileDescriptor = fileDescriptor;
this.builder = builder;
this.flashbackData = flashbackData;
// prepopulate one RMDTrack
LocationAwareSeekableRODIterator iterator = createNewResource();
this.addNewResource(iterator);
// Pull the proper header and sequence dictionary from the prepopulated track.
//this.header = iterator.getHeader();
//this.sequenceDictionary = iterator.getSequenceDictionary();
this.header = null;
this.sequenceDictionary = null;
}
/**
* Gets the header used by this resource pool.
* @return Header used by this resource pool.
*/
public Object getHeader() {
return header;
}
/**
* Gets the sequence dictionary built into the ROD index file.
* @return Sequence dictionary from the index file.
*/
public SAMSequenceDictionary getSequenceDictionary() {
return sequenceDictionary;
}
/**
@ -179,7 +230,9 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
* @return The newly created resource.
*/
public LocationAwareSeekableRODIterator createNewResource() {
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
if(numIterators() > 0)
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter;
}
@ -281,8 +334,8 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
@Override
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
for (RMDTrack reader : availableResources)
if (reader != null) return reader;
for (RMDTrack reader : availableResources)
if (reader != null) return reader;
return null;
}

View File

@ -114,11 +114,11 @@ public class RMDTrack {
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {

View File

@ -19,6 +19,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Collections;
import net.sf.picard.reference.IndexedFastaSequenceFile;
/**
@ -40,7 +41,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
private RMDTrack rod = null;
private RMDTriplet triplet = null;
private RMDTrackBuilder builder = null;
private IndexedFastaSequenceFile seq;
private GenomeLocParser genomeLocParser;
@ -62,13 +64,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@BeforeMethod
public void setUp() {
String fileName = testDir + "TabularDataTest.dat";
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE));
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE);
builder = new RMDTrackBuilder(Collections.singletonList(triplet),seq.getSequenceDictionary(),genomeLocParser,null);
}
@Test
public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
ResourcePool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
@ -89,17 +91,17 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@Test
public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
ReferenceOrderedQueryDataPool iteratorPool = new ReferenceOrderedQueryDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser);
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new EntireStream() );
// Create a new iterator at position 2.
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new EntireStream() );
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
// Test out-of-order access: first iterator2, then iterator1.
// Ugh...first call to a region needs to be a seek.
// Ugh...first call to a region needs to be a seek.
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C"));
@ -139,7 +141,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@Test
public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
@ -171,40 +173,4 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
}
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
assertTrue(datum.get("COL3").equals("H"));
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
assertTrue(datum.get("COL3").equals("C"));
iteratorPool.release(iterator);
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
Assert.assertEquals(iteratorPool.numAvailableIterators(), 2, "Number of available iterators in the pool is incorrect");
}
}