fixes for parallel processing problems with Tribble, a small bug in the resource pool, and some more documentation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3349 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6868ce988f
commit
2c55ac1374
|
|
@ -1,12 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -41,7 +42,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* A pool of iterators for navigating through the genome.
|
||||
*/
|
||||
private final ReferenceOrderedDataPool iteratorPool;
|
||||
private final ResourcePool<?,LocationAwareSeekableRODIterator> iteratorPool;
|
||||
|
||||
/**
|
||||
* Create a new reference-ordered data source.
|
||||
|
|
@ -49,8 +50,10 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
|
||||
this.rod = rod;
|
||||
if (rod.supportsQuery()) iteratorPool = null;
|
||||
else iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||
if (rod.supportsQuery())
|
||||
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), rod);
|
||||
else
|
||||
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -75,11 +78,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Iterator through the data.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator seek( Shard shard ) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(shard.getGenomeLocs() == null || shard.getGenomeLocs().size() == 0 ? null : shard.getGenomeLocs());
|
||||
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
return iteratorPool.iterator(dataStreamSegment);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -90,30 +90,17 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Iterator through the data.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(loc == null ? null : Arrays.asList(loc));
|
||||
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
return iteratorPool.iterator(dataStreamSegment);
|
||||
}
|
||||
|
||||
/**
|
||||
* assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region
|
||||
* @param loc the region to query for
|
||||
* @return a LocationAwareSeekableRODIterator over the selected region
|
||||
*/
|
||||
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
|
||||
if (loc == null) // for the mono shard case
|
||||
return new SeekableRODIterator(rod.getIterator());
|
||||
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the specified iterator, returning it to the pool.
|
||||
* @param iterator Iterator to close.
|
||||
*/
|
||||
public void close( LocationAwareSeekableRODIterator iterator ) {
|
||||
if (iteratorPool != null) iteratorPool.release(iterator);
|
||||
iteratorPool.release(iterator);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -189,78 +176,54 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
}
|
||||
|
||||
/**
|
||||
* stitch together the multiple calls to seek (since shards can have multiple intervals now)
|
||||
* on the underlying Tribble track into one seamless iteration
|
||||
* a data pool for the new query based RODs
|
||||
*/
|
||||
class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, LocationAwareSeekableRODIterator> {
|
||||
|
||||
// the list of intervals we're iterating over
|
||||
private final LinkedList<GenomeLoc> locationList;
|
||||
// the reference-ordered data itself.
|
||||
private final RMDTrack rod;
|
||||
|
||||
// The reference-ordered data itself.
|
||||
private final QueryableTrack rod;
|
||||
// our tribble track builder
|
||||
private final TribbleRMDTrackBuilder builder;
|
||||
|
||||
// the current iterator
|
||||
private SeekableRODIterator iterator;
|
||||
|
||||
StitchingLocationAwareSeekableRODIterator(List<GenomeLoc> list, QueryableTrack rmd) {
|
||||
rod = rmd;
|
||||
locationList = new LinkedList<GenomeLoc>();
|
||||
locationList.addAll(list);
|
||||
fetchNextInterval();
|
||||
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, RMDTrack rod ) {
|
||||
this.rod = rod;
|
||||
this.builder = builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
if (iterator == null) return null;
|
||||
return iterator.peekNextLocation();
|
||||
protected FeatureReader createNewResource() {
|
||||
return builder.createFeatureReader(rod.getType(),rod.getFile());
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc position() {
|
||||
if (iterator == null) return null;
|
||||
return iterator.position();
|
||||
protected FeatureReader selectBestExistingResource(DataStreamSegment segment, List<FeatureReader> availableResources) {
|
||||
for (FeatureReader reader : availableResources)
|
||||
if (reader != null) return reader;
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList seekForward(GenomeLoc interval) {
|
||||
RODRecordList list = iterator.seekForward(interval);
|
||||
if (list == null) { // we were unable to seek the current interval to the location
|
||||
fetchNextInterval();
|
||||
list = iterator.seekForward(interval);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (iterator == null) return false;
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next");
|
||||
RODRecordList list = iterator.next();
|
||||
if (!iterator.hasNext()) fetchNextInterval();
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team");
|
||||
}
|
||||
|
||||
private void fetchNextInterval() {
|
||||
if (locationList != null && locationList.size() > 0) {
|
||||
GenomeLoc loc = locationList.getFirst();
|
||||
locationList.removeFirst();
|
||||
if (rod == null) throw new StingException("Unable to query(), target rod is null, next location = " + ((locationList != null) ? locationList.getFirst() : "null"));
|
||||
try {
|
||||
iterator = new SeekableRODIterator(rod.query(loc));
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName());
|
||||
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureReader resource) {
|
||||
try {
|
||||
if (position instanceof MappedStreamSegment) {
|
||||
GenomeLoc pos = ((MappedStreamSegment) position).locus;
|
||||
//System.err.println("Querying position1 " + pos.getContig() + " start " + pos.getStart() + " stop " + pos.getStop());
|
||||
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(), (int) pos.getStart(), (int) pos.getStop()),rod.getName()));
|
||||
} else {
|
||||
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to create iterator for rod named " + rod.getName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void closeResource(FeatureReader resource) {
|
||||
try {
|
||||
resource.close();
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to close reader for rod named " + rod.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,26 +50,27 @@ abstract class ResourcePool <T,I extends Iterator> {
|
|||
public I iterator( DataStreamSegment segment ) {
|
||||
// Grab the first iterator in the list whose position is before the requested position.
|
||||
T selectedResource = null;
|
||||
synchronized(this) {
|
||||
selectedResource = selectBestExistingResource( segment, availableResources );
|
||||
synchronized (this) {
|
||||
selectedResource = selectBestExistingResource(segment, availableResources);
|
||||
|
||||
// No iterator found? Create another. It is expected that
|
||||
// each iterator created will have its own file handle.
|
||||
if( selectedResource == null ) {
|
||||
if (selectedResource == null) {
|
||||
selectedResource = createNewResource();
|
||||
addNewResource( selectedResource );
|
||||
addNewResource(selectedResource);
|
||||
}
|
||||
|
||||
// Remove the iterator from the list of available iterators.
|
||||
availableResources.remove(selectedResource);
|
||||
|
||||
|
||||
I iterator = createIteratorFromResource(segment, selectedResource);
|
||||
|
||||
// Make a note of this assignment for proper releasing later.
|
||||
resourceAssignments.put(iterator, selectedResource);
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
I iterator = createIteratorFromResource( segment, selectedResource );
|
||||
|
||||
// Make a note of this assignment for proper releasing later.
|
||||
resourceAssignments.put( iterator, selectedResource );
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
|
|||
this.stream = null;
|
||||
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
|
||||
// TODO: this line is a problem, creating with an empty hashset eliminates any genotype FORMAT fields in the calls (besides GT)
|
||||
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,11 +64,11 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeW
|
|||
* @param target Target stream for the temporary storage. May not be null.
|
||||
*/
|
||||
public void mergeInto(VCFGenotypeWriter target) {
|
||||
VCFReader reader = new VCFReader(file);
|
||||
// make sure we pass false to the reader, so that it doesn't create an index on disk
|
||||
VCFReader reader = new VCFReader(file,false);
|
||||
while ( reader.hasNext() )
|
||||
target.addRecord(reader.next());
|
||||
reader.close();
|
||||
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class PeekableRODIterator
|
||||
* <p/>
|
||||
* the methods attached to a peekable ROD iterator
|
||||
*/
|
||||
public interface PeekableRODIterator extends Iterator<List<ReferenceOrderedDatum>> {
|
||||
public GenomeLoc peekNextLocation();
|
||||
|
||||
public RODRecordList seekForward(GenomeLoc interval);
|
||||
}
|
||||
|
||||
|
|
@ -30,7 +30,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
|
|||
/**
|
||||
* Retrieves, but does not remove, the head of this iterator.
|
||||
* @return T the next element in the iterator
|
||||
* @throws NoSuchElementException - if the iterator doesn't have a next element
|
||||
*/
|
||||
public T element() {
|
||||
T x = next();
|
||||
|
|
@ -40,7 +39,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
|
|||
|
||||
/**
|
||||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iteration has no more elements.
|
||||
*/
|
||||
public T next() {
|
||||
if (pushedElement != null) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
|
|
@ -78,7 +79,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
|
||||
// but re-enables next() again after a length-1 query.
|
||||
|
||||
public SeekableRODIterator(Iterator<GATKFeature> it) {
|
||||
public SeekableRODIterator(CloseableIterator<GATKFeature> it) {
|
||||
this.it = new PushbackIterator<GATKFeature>(it);
|
||||
records = new LinkedList<GATKFeature>();
|
||||
// the following is a trick: we would like the iterator to know the actual name assigned to
|
||||
|
|
@ -169,7 +170,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
|
||||
if ( r.getLocation().getStart() < curr_position )
|
||||
throw new StingException("LocationAwareSeekableRODIterator: track "+r.getName() +
|
||||
" is out of coordinate order on contig "+r.getLocation().getContig());
|
||||
" is out of coordinate order on contig "+r.getLocation() + " compared to " + curr_contig + ":" + curr_position);
|
||||
|
||||
if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet
|
||||
|
||||
|
|
@ -334,4 +335,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
|
@ -64,7 +65,7 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
|||
* but other more advanced tracks support the query interface
|
||||
*/
|
||||
@Override
|
||||
public Iterator<GATKFeature> getIterator() {
|
||||
public CloseableIterator<GATKFeature> getIterator() {
|
||||
try {
|
||||
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
|
||||
} catch (IOException e) {
|
||||
|
|
@ -83,22 +84,22 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop(), contained),this.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
|
@ -37,9 +38,9 @@ import java.util.Iterator;
|
|||
* a decorator interface for tracks that are queryable
|
||||
*/
|
||||
public interface QueryableTrack {
|
||||
public Iterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
|
||||
public Iterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
||||
public Iterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
||||
public Iterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
||||
public void close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -73,7 +74,7 @@ public abstract class RMDTrack {
|
|||
* @return how to get an iterator of the underlying data. This is all a track has to support,
|
||||
* but other more advanced tracks support the query interface
|
||||
*/
|
||||
public abstract Iterator<GATKFeature> getIterator();
|
||||
public abstract CloseableIterator<GATKFeature> getIterator();
|
||||
|
||||
/**
|
||||
* helper function for determining if we are the same track
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||
|
|
@ -64,7 +65,7 @@ public class RODRMDTrack extends RMDTrack {
|
|||
* but other more advanced tracks support the query interface
|
||||
*/
|
||||
@Override
|
||||
public Iterator<GATKFeature> getIterator() {
|
||||
public CloseableIterator<GATKFeature> getIterator() {
|
||||
return new GATKFeatureIterator(data.iterator());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,11 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.*;
|
||||
import org.broad.tribble.index.linear.LinearIndex;
|
||||
import org.broad.tribble.index.linear.LinearIndexCreator;
|
||||
import org.broad.tribble.readers.BasicFeatureReader;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
|
|
@ -53,16 +52,6 @@ import java.util.Map;
|
|||
* This class keeps track of the available codecs, and knows how to put together a track of
|
||||
* that gets iterators from the FeatureReader using Tribble.
|
||||
*
|
||||
* Here's an example run command to find SNPs 200 base pairs up and downstream of the target file.
|
||||
*
|
||||
* java -jar dist/GenomeAnalysisTK.jar \
|
||||
* -R /broad/1KG/reference/human_b36_both.fasta \
|
||||
* -L 1:1863 \
|
||||
* -L MT:16520 \
|
||||
* -db /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b36.rod \
|
||||
* -dbw 200 \
|
||||
* -l INFO \
|
||||
* -T DbSNPWindowCounter
|
||||
*/
|
||||
public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder {
|
||||
/**
|
||||
|
|
@ -120,11 +109,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
// check to see if the input file has an index
|
||||
if (requireIndex(inputFile)) {
|
||||
logger.warn("Creating Tribble Index for file " + inputFile);
|
||||
LinearIndex index = createIndex(inputFile, this.createByType(targetClass));
|
||||
reader = new FeatureReader(inputFile,index, this.createByType(targetClass));
|
||||
LinearIndex index = createIndex(inputFile, this.createByType(targetClass), true);
|
||||
reader = new BasicFeatureReader(inputFile,index, this.createByType(targetClass));
|
||||
}
|
||||
else {
|
||||
reader = new FeatureReader(inputFile,this.createByType(targetClass));
|
||||
reader = new BasicFeatureReader(inputFile,this.createByType(targetClass));
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to create reader with file " + inputFile, e);
|
||||
|
|
@ -138,18 +127,19 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
* create an index for the input file
|
||||
* @param inputFile the input file
|
||||
* @param codec the codec to use
|
||||
* @param onDisk write the index to disk?
|
||||
* @return a linear index for the specified type
|
||||
* @throws IOException if we cannot write the index file
|
||||
*/
|
||||
public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
|
||||
public static LinearIndex createIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
|
||||
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
|
||||
|
||||
// if we can write the index, we should, but if not just create it in memory
|
||||
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
|
||||
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()))
|
||||
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()) && onDisk)
|
||||
return create.createIndex();
|
||||
else {
|
||||
logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
|
||||
if (onDisk) logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
|
||||
return create.createIndex(null);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -36,11 +37,11 @@ import java.util.Iterator;
|
|||
*
|
||||
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
|
||||
*/
|
||||
public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> {
|
||||
private final Iterator<Feature> iterator;
|
||||
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||
private final CloseableIterator<Feature> iterator;
|
||||
private final String name;
|
||||
|
||||
public FeatureToGATKFeatureIterator(Iterator<Feature> iter, String name) {
|
||||
public FeatureToGATKFeatureIterator(CloseableIterator<Feature> iter, String name) {
|
||||
this.name = name;
|
||||
this.iterator = iter;
|
||||
}
|
||||
|
|
@ -59,4 +60,9 @@ public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> {
|
|||
public void remove() {
|
||||
throw new UnsupportedOperationException("Why does Iterator have this method? We always throw an exception here");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
this.iterator.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -36,7 +37,7 @@ import java.util.Iterator;
|
|||
*
|
||||
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
|
||||
*/
|
||||
public class GATKFeatureIterator implements Iterator<GATKFeature> {
|
||||
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||
private final Iterator<ReferenceOrderedDatum> iter;
|
||||
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
|
||||
this.iter = iter;
|
||||
|
|
@ -56,4 +57,9 @@ public class GATKFeatureIterator implements Iterator<GATKFeature> {
|
|||
public void remove() {
|
||||
throw new UnsupportedOperationException("Remove not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// do nothing, our underlying iterator doesn't support this
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
|
@ -13,11 +14,10 @@ import java.util.List;
|
|||
* <p/>
|
||||
* combine iteration with a position aware interface
|
||||
*/
|
||||
public interface LocationAwareSeekableRODIterator extends Iterator<RODRecordList> {
|
||||
public interface LocationAwareSeekableRODIterator extends CloseableIterator<RODRecordList> {
|
||||
public GenomeLoc peekNextLocation();
|
||||
|
||||
public GenomeLoc position();
|
||||
|
||||
public RODRecordList seekForward(GenomeLoc interval);
|
||||
|
||||
public RODRecordList seekForward(GenomeLoc interval);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -73,7 +74,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
ReferenceOrderedData snp_mask;
|
||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
|
||||
Iterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||
snpMaskIterator = new SeekableRODIterator(iter);
|
||||
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.FeatureIterator;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.util.CloseableTribbleIterator;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -49,7 +51,7 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
|
|||
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
FeatureIterator<DbSNPFeature> dbSNPs;
|
||||
CloseableTribbleIterator<DbSNPFeature> dbSNPs;
|
||||
|
||||
// our upstream and downstream window locations
|
||||
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
|||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* An extension of eth GenotypeWriter interface with support
|
||||
* An extension of the GenotypeWriter interface with support
|
||||
* for adding header lines.
|
||||
*
|
||||
* @author mhanna
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import java.util.zip.GZIPInputStream;
|
|||
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.index.linear.LinearIndex;
|
||||
import org.broad.tribble.readers.BasicFeatureReader;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
@ -45,7 +46,16 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @param vcfFile the vcf file to write
|
||||
*/
|
||||
public VCFReader(File vcfFile) {
|
||||
initialize(vcfFile, null);
|
||||
initialize(vcfFile, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a VCF reader, given a VCF file
|
||||
*
|
||||
* @param vcfFile the vcf file to write
|
||||
*/
|
||||
public VCFReader(File vcfFile, boolean createIndexOnDisk) {
|
||||
initialize(vcfFile, null, createIndexOnDisk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -54,22 +64,21 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @param vcfFile the vcf file to write
|
||||
*/
|
||||
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
|
||||
initialize(vcfFile, transform);
|
||||
initialize(vcfFile, transform, true);
|
||||
}
|
||||
|
||||
private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
|
||||
/**
|
||||
* initialize the VCF reader
|
||||
* @param vcfFile the VCF file to open
|
||||
* @param transform the line transformer to use, if any
|
||||
* @param createIndexOnDisk do we need to create an index on disk?
|
||||
*/
|
||||
private void initialize(File vcfFile, VCFCodec.LineTransform transform, boolean createIndexOnDisk) {
|
||||
VCFCodec codec = new VCFCodec();
|
||||
LinearIndex index = null;
|
||||
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
|
||||
try {
|
||||
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec());
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
|
||||
}
|
||||
}
|
||||
LinearIndex index = createIndex(vcfFile, createIndexOnDisk);
|
||||
if (transform != null) codec.setTransformer(transform);
|
||||
try {
|
||||
vcfReader = new FeatureReader(vcfFile,index,codec);
|
||||
vcfReader = new BasicFeatureReader(vcfFile,index,codec);
|
||||
iterator= vcfReader.iterator();
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to read VCF File from " + vcfFile, e);
|
||||
|
|
@ -79,6 +88,24 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
mHeader = codec.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* create an index given:
|
||||
* @param vcfFile the vcf file
|
||||
* @param createIndexOnDisk do we create the index on disk (or only in memory?)
|
||||
* @return an instance of an index
|
||||
*/
|
||||
private LinearIndex createIndex(File vcfFile, boolean createIndexOnDisk) {
|
||||
LinearIndex index = null;
|
||||
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
|
||||
try {
|
||||
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
/** @return true if we have another VCF record to return */
|
||||
public boolean hasNext() {
|
||||
|
|
@ -110,6 +137,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* close the files
|
||||
*/
|
||||
public void close() {
|
||||
if (vcfReader != null) try {
|
||||
vcfReader.close();
|
||||
|
|
|
|||
|
|
@ -142,6 +142,11 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
|
|||
public void remove() {
|
||||
throw new IllegalStateException("GRRR");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// nothing to do
|
||||
}
|
||||
}
|
||||
|
||||
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import net.sf.picard.filter.SamRecordFilter;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
|
|
@ -17,6 +18,7 @@ import org.junit.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -47,7 +49,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
reads.setMaxPileupSize(MAX_READS);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(records.iterator(), reads);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
|
||||
|
||||
// inject the testing version of the locus iterator watcher
|
||||
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||
|
|
@ -73,7 +75,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
reads.setMaxPileupSize(MAX_READS);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = new LocusIteratorByState(records.iterator(), reads);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
|
||||
|
||||
// inject the testing version of the locus iterator watcher
|
||||
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
|
||||
|
|
@ -102,4 +104,32 @@ class TestReads extends Reads {
|
|||
public void setMaxPileupSize(int maxSize) {
|
||||
this.maximumReadsAtLocus = maxSize;
|
||||
}
|
||||
}
|
||||
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
|
@ -61,7 +61,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
|
|||
public void testBuilderIndexUnwriteable() {
|
||||
File vcfFile = new File(validationDataLocation + "/ROD_validation/mixedup.vcf");
|
||||
try {
|
||||
builder.createIndex(vcfFile,new VCFCodec());
|
||||
builder.createIndex(vcfFile,new VCFCodec(), true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail("Unable to make index because of IO exception " + e.getMessage());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -186,6 +186,11 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
public void remove() {
|
||||
throw new IllegalStateException("GRRR");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// nothing to do
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="79" status="integration" publication="20100507124200" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="80" status="integration" publication="20100512124200" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue