Bug fixes in streaming in Tribble data via /dev/stdin.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
23dbc5ccf3
commit
0982d35f5b
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -13,9 +14,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
|
|
@ -68,19 +72,24 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
boolean flashbackData ) {
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
|
||||
// TODO: Unify the two blocks of code below by creating a ReferenceOrderedDataPool base class of a coherent type (not RMDTrack for one and SeekableIterator for the other).
|
||||
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
|
||||
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
|
||||
builder,
|
||||
referenceSequenceDictionary,
|
||||
genomeLocParser);
|
||||
header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
|
||||
this.header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
|
||||
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
|
||||
}
|
||||
else {
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
header = track.getHeader();
|
||||
this.sequenceDictionary = track.getSequenceDictionary();
|
||||
iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData);
|
||||
iteratorPool = new ReferenceOrderedDataPool(fileDescriptor,
|
||||
builder,
|
||||
referenceSequenceDictionary,
|
||||
genomeLocParser,
|
||||
flashbackData);
|
||||
this.header = ((ReferenceOrderedDataPool)iteratorPool).getHeader();
|
||||
this.sequenceDictionary = ((ReferenceOrderedDataPool)iteratorPool).getSequenceDictionary();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -165,12 +174,54 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* A pool of reference-ordered data iterators.
|
||||
*/
|
||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||
private final RMDTrack track;
|
||||
// the reference-ordered data itself.
|
||||
private final RMDTriplet fileDescriptor;
|
||||
|
||||
// our tribble track builder
|
||||
private final RMDTrackBuilder builder;
|
||||
|
||||
/**
|
||||
* The header from this RMD, if present.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
|
||||
*/
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
boolean flashbackData = false;
|
||||
public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) {
|
||||
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
|
||||
super(sequenceDictionary,genomeLocParser);
|
||||
this.track = track;
|
||||
this.fileDescriptor = fileDescriptor;
|
||||
this.builder = builder;
|
||||
this.flashbackData = flashbackData;
|
||||
|
||||
// prepopulate one RMDTrack
|
||||
LocationAwareSeekableRODIterator iterator = createNewResource();
|
||||
this.addNewResource(iterator);
|
||||
|
||||
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||
//this.header = iterator.getHeader();
|
||||
//this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||
this.header = null;
|
||||
this.sequenceDictionary = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header used by this resource pool.
|
||||
* @return Header used by this resource pool.
|
||||
*/
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary built into the ROD index file.
|
||||
* @return Sequence dictionary from the index file.
|
||||
*/
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -179,7 +230,9 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
* @return The newly created resource.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
if(numIterators() > 0)
|
||||
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
|
||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||
}
|
||||
|
||||
|
|
@ -281,8 +334,8 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
|
|||
|
||||
@Override
|
||||
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
|
||||
for (RMDTrack reader : availableResources)
|
||||
if (reader != null) return reader;
|
||||
for (RMDTrack reader : availableResources)
|
||||
if (reader != null) return reader;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -114,11 +114,11 @@ public class RMDTrack {
|
|||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Collections;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
/**
|
||||
|
|
@ -40,7 +41,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
|
||||
public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||
|
||||
private RMDTrack rod = null;
|
||||
private RMDTriplet triplet = null;
|
||||
private RMDTrackBuilder builder = null;
|
||||
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -62,13 +64,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
@BeforeMethod
|
||||
public void setUp() {
|
||||
String fileName = testDir + "TabularDataTest.dat";
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE));
|
||||
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE);
|
||||
builder = new RMDTrackBuilder(Collections.singletonList(triplet),seq.getSequenceDictionary(),genomeLocParser,null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
|
|
@ -89,17 +91,17 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
ReferenceOrderedQueryDataPool iteratorPool = new ReferenceOrderedQueryDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser);
|
||||
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new EntireStream() );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
|
||||
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new EntireStream() );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
|
||||
|
||||
// Test out-of-order access: first iterator2, then iterator1.
|
||||
// Ugh...first call to a region needs to be a seek.
|
||||
// Ugh...first call to a region needs to be a seek.
|
||||
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
|
||||
assertTrue(datum.getLocation().equals(testSite2));
|
||||
assertTrue(datum.get("COL1").equals("C"));
|
||||
|
|
@ -139,7 +141,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testIteratorConservation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
|
|
@ -171,40 +173,4 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIteratorCreation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
|
||||
|
||||
TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
|
||||
assertTrue(datum.getLocation().equals(testSite3));
|
||||
assertTrue(datum.get("COL1").equals("F"));
|
||||
assertTrue(datum.get("COL2").equals("G"));
|
||||
assertTrue(datum.get("COL3").equals("H"));
|
||||
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
|
||||
|
||||
// Make sure that the previously acquired iterator was reused.
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
|
||||
|
||||
datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
|
||||
assertTrue(datum.getLocation().equals(testSite1));
|
||||
assertTrue(datum.get("COL1").equals("A"));
|
||||
assertTrue(datum.get("COL2").equals("B"));
|
||||
assertTrue(datum.get("COL3").equals("C"));
|
||||
|
||||
iteratorPool.release(iterator);
|
||||
|
||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 2, "Number of available iterators in the pool is incorrect");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue