Bug fixes in streaming in Tribble data via /dev/stdin.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4935 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
23dbc5ccf3
commit
0982d35f5b
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
@ -13,9 +14,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Scanner;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 21, 2009
|
* Date: May 21, 2009
|
||||||
|
|
@ -68,19 +72,24 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
boolean flashbackData ) {
|
boolean flashbackData ) {
|
||||||
this.fileDescriptor = fileDescriptor;
|
this.fileDescriptor = fileDescriptor;
|
||||||
this.builder = builder;
|
this.builder = builder;
|
||||||
|
|
||||||
|
// TODO: Unify the two blocks of code below by creating a ReferenceOrderedDataPool base class of a coherent type (not RMDTrack for one and SeekableIterator for the other).
|
||||||
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
|
if (fileDescriptor.getStorageType() != RMDTriplet.RMDStorageType.STREAM) {
|
||||||
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
|
iteratorPool = new ReferenceOrderedQueryDataPool(fileDescriptor,
|
||||||
builder,
|
builder,
|
||||||
referenceSequenceDictionary,
|
referenceSequenceDictionary,
|
||||||
genomeLocParser);
|
genomeLocParser);
|
||||||
header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
|
this.header = ((ReferenceOrderedQueryDataPool)iteratorPool).getHeader();
|
||||||
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
|
this.sequenceDictionary = ((ReferenceOrderedQueryDataPool)iteratorPool).getSequenceDictionary();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
iteratorPool = new ReferenceOrderedDataPool(fileDescriptor,
|
||||||
header = track.getHeader();
|
builder,
|
||||||
this.sequenceDictionary = track.getSequenceDictionary();
|
referenceSequenceDictionary,
|
||||||
iteratorPool = new ReferenceOrderedDataPool(track,referenceSequenceDictionary,genomeLocParser,flashbackData);
|
genomeLocParser,
|
||||||
|
flashbackData);
|
||||||
|
this.header = ((ReferenceOrderedDataPool)iteratorPool).getHeader();
|
||||||
|
this.sequenceDictionary = ((ReferenceOrderedDataPool)iteratorPool).getSequenceDictionary();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -165,12 +174,54 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
||||||
* A pool of reference-ordered data iterators.
|
* A pool of reference-ordered data iterators.
|
||||||
*/
|
*/
|
||||||
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
|
||||||
private final RMDTrack track;
|
// the reference-ordered data itself.
|
||||||
|
private final RMDTriplet fileDescriptor;
|
||||||
|
|
||||||
|
// our tribble track builder
|
||||||
|
private final RMDTrackBuilder builder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The header from this RMD, if present.
|
||||||
|
*/
|
||||||
|
private final Object header;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
|
||||||
|
*/
|
||||||
|
private final SAMSequenceDictionary sequenceDictionary;
|
||||||
|
|
||||||
boolean flashbackData = false;
|
boolean flashbackData = false;
|
||||||
public ReferenceOrderedDataPool( RMDTrack track, SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, boolean flashbackData ) {
|
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
|
||||||
super(sequenceDictionary,genomeLocParser);
|
super(sequenceDictionary,genomeLocParser);
|
||||||
this.track = track;
|
this.fileDescriptor = fileDescriptor;
|
||||||
|
this.builder = builder;
|
||||||
this.flashbackData = flashbackData;
|
this.flashbackData = flashbackData;
|
||||||
|
|
||||||
|
// prepopulate one RMDTrack
|
||||||
|
LocationAwareSeekableRODIterator iterator = createNewResource();
|
||||||
|
this.addNewResource(iterator);
|
||||||
|
|
||||||
|
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||||
|
//this.header = iterator.getHeader();
|
||||||
|
//this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||||
|
this.header = null;
|
||||||
|
this.sequenceDictionary = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the header used by this resource pool.
|
||||||
|
* @return Header used by this resource pool.
|
||||||
|
*/
|
||||||
|
public Object getHeader() {
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the sequence dictionary built into the ROD index file.
|
||||||
|
* @return Sequence dictionary from the index file.
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getSequenceDictionary() {
|
||||||
|
return sequenceDictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -179,7 +230,9 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
||||||
* @return The newly created resource.
|
* @return The newly created resource.
|
||||||
*/
|
*/
|
||||||
public LocationAwareSeekableRODIterator createNewResource() {
|
public LocationAwareSeekableRODIterator createNewResource() {
|
||||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
if(numIterators() > 0)
|
||||||
|
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
|
||||||
|
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
|
||||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -281,8 +334,8 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
|
protected RMDTrack selectBestExistingResource(DataStreamSegment segment, List<RMDTrack> availableResources) {
|
||||||
for (RMDTrack reader : availableResources)
|
for (RMDTrack reader : availableResources)
|
||||||
if (reader != null) return reader;
|
if (reader != null) return reader;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -114,11 +114,11 @@ public class RMDTrack {
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
/**
|
/**
|
||||||
|
|
@ -40,7 +41,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
|
|
||||||
private RMDTrack rod = null;
|
private RMDTriplet triplet = null;
|
||||||
|
private RMDTrackBuilder builder = null;
|
||||||
|
|
||||||
private IndexedFastaSequenceFile seq;
|
private IndexedFastaSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
@ -62,13 +64,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
String fileName = testDir + "TabularDataTest.dat";
|
String fileName = testDir + "TabularDataTest.dat";
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null);
|
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE);
|
||||||
rod = builder.createInstanceOfTrack(new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE));
|
builder = new RMDTrackBuilder(Collections.singletonList(triplet),seq.getSequenceDictionary(),genomeLocParser,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateSingleIterator() {
|
public void testCreateSingleIterator() {
|
||||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
ResourcePool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||||
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||||
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||||
|
|
@ -89,17 +91,17 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateMultipleIterators() {
|
public void testCreateMultipleIterators() {
|
||||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
ReferenceOrderedQueryDataPool iteratorPool = new ReferenceOrderedQueryDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser);
|
||||||
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new EntireStream() );
|
||||||
|
|
||||||
// Create a new iterator at position 2.
|
// Create a new iterator at position 2.
|
||||||
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
|
LocationAwareSeekableRODIterator iterator2 = iteratorPool.iterator( new EntireStream() );
|
||||||
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
||||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
|
||||||
|
|
||||||
// Test out-of-order access: first iterator2, then iterator1.
|
// Test out-of-order access: first iterator2, then iterator1.
|
||||||
// Ugh...first call to a region needs to be a seek.
|
// Ugh...first call to a region needs to be a seek.
|
||||||
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
|
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
|
||||||
assertTrue(datum.getLocation().equals(testSite2));
|
assertTrue(datum.getLocation().equals(testSite2));
|
||||||
assertTrue(datum.get("COL1").equals("C"));
|
assertTrue(datum.get("COL1").equals("C"));
|
||||||
|
|
@ -139,7 +141,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIteratorConservation() {
|
public void testIteratorConservation() {
|
||||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||||
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||||
|
|
@ -171,40 +173,4 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
||||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
|
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testIteratorCreation() {
|
|
||||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod,seq.getSequenceDictionary(),genomeLocParser,false);
|
|
||||||
LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
|
||||||
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect");
|
|
||||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 0, "Number of available iterators in the pool is incorrect");
|
|
||||||
|
|
||||||
TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
|
|
||||||
assertTrue(datum.getLocation().equals(testSite3));
|
|
||||||
assertTrue(datum.get("COL1").equals("F"));
|
|
||||||
assertTrue(datum.get("COL2").equals("G"));
|
|
||||||
assertTrue(datum.get("COL3").equals("H"));
|
|
||||||
|
|
||||||
iteratorPool.release(iterator);
|
|
||||||
|
|
||||||
// Create another iterator after the current iterator.
|
|
||||||
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
|
|
||||||
|
|
||||||
// Make sure that the previously acquired iterator was reused.
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
|
||||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 1, "Number of available iterators in the pool is incorrect");
|
|
||||||
|
|
||||||
datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
|
|
||||||
assertTrue(datum.getLocation().equals(testSite1));
|
|
||||||
assertTrue(datum.get("COL1").equals("A"));
|
|
||||||
assertTrue(datum.get("COL2").equals("B"));
|
|
||||||
assertTrue(datum.get("COL3").equals("C"));
|
|
||||||
|
|
||||||
iteratorPool.release(iterator);
|
|
||||||
|
|
||||||
Assert.assertEquals(iteratorPool.numIterators(), 2, "Number of iterators in the pool is incorrect");
|
|
||||||
Assert.assertEquals(iteratorPool.numAvailableIterators(), 2, "Number of available iterators in the pool is incorrect");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue