fixes for parallel processing problems with Tribble, a small bug in the resource pool, and some more documentation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3349 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-12 06:13:26 +00:00
parent 6868ce988f
commit 2c55ac1374
26 changed files with 204 additions and 203 deletions

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -41,7 +42,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/**
* A pool of iterators for navigating through the genome.
*/
private final ReferenceOrderedDataPool iteratorPool;
private final ResourcePool<?,LocationAwareSeekableRODIterator> iteratorPool;
/**
* Create a new reference-ordered data source.
@ -49,8 +50,10 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
*/
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
this.rod = rod;
if (rod.supportsQuery()) iteratorPool = null;
else iteratorPool = new ReferenceOrderedDataPool( walker, rod );
if (rod.supportsQuery())
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), rod);
else
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
}
/**
@ -75,11 +78,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public LocationAwareSeekableRODIterator seek( Shard shard ) {
if (iteratorPool == null) // use query
return getQuery(shard.getGenomeLocs() == null || shard.getGenomeLocs().size() == 0 ? null : shard.getGenomeLocs());
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
return iteratorPool.iterator(dataStreamSegment);
}
/**
@ -90,30 +90,17 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
if (iteratorPool == null) // use query
return getQuery(loc == null ? null : Arrays.asList(loc));
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
return iteratorPool.iterator(dataStreamSegment);
}
/**
* assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region
* @param loc the region to query for
* @return a LocationAwareSeekableRODIterator over the selected region
*/
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
if (loc == null) // for the mono shard case
return new SeekableRODIterator(rod.getIterator());
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
}
/**
* Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close.
*/
public void close( LocationAwareSeekableRODIterator iterator ) {
if (iteratorPool != null) iteratorPool.release(iterator);
iteratorPool.release(iterator);
}
}
@ -189,78 +176,54 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
}
/**
* stitch together the multiple calls to seek (since shards can have multiple intervals now)
* on the underlying Tribble track into one seamless iteration
* a data pool for the new query based RODs
*/
class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekableRODIterator {
class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, LocationAwareSeekableRODIterator> {
// the list of intervals we're iterating over
private final LinkedList<GenomeLoc> locationList;
// the reference-ordered data itself.
private final RMDTrack rod;
// The reference-ordered data itself.
private final QueryableTrack rod;
// our tribble track builder
private final TribbleRMDTrackBuilder builder;
// the current iterator
private SeekableRODIterator iterator;
StitchingLocationAwareSeekableRODIterator(List<GenomeLoc> list, QueryableTrack rmd) {
rod = rmd;
locationList = new LinkedList<GenomeLoc>();
locationList.addAll(list);
fetchNextInterval();
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, RMDTrack rod ) {
this.rod = rod;
this.builder = builder;
}
@Override
public GenomeLoc peekNextLocation() {
if (iterator == null) return null;
return iterator.peekNextLocation();
protected FeatureReader createNewResource() {
return builder.createFeatureReader(rod.getType(),rod.getFile());
}
@Override
public GenomeLoc position() {
if (iterator == null) return null;
return iterator.position();
protected FeatureReader selectBestExistingResource(DataStreamSegment segment, List<FeatureReader> availableResources) {
for (FeatureReader reader : availableResources)
if (reader != null) return reader;
return null;
}
@Override
public RODRecordList seekForward(GenomeLoc interval) {
RODRecordList list = iterator.seekForward(interval);
if (list == null) { // we were unable to seek the current interval to the location
fetchNextInterval();
list = iterator.seekForward(interval);
}
return list;
}
@Override
public boolean hasNext() {
if (iterator == null) return false;
return iterator.hasNext();
}
@Override
public RODRecordList next() {
if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next");
RODRecordList list = iterator.next();
if (!iterator.hasNext()) fetchNextInterval();
return list;
}
@Override
public void remove() {
throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team");
}
private void fetchNextInterval() {
if (locationList != null && locationList.size() > 0) {
GenomeLoc loc = locationList.getFirst();
locationList.removeFirst();
if (rod == null) throw new StingException("Unable to query(), target rod is null, next location = " + ((locationList != null) ? locationList.getFirst() : "null"));
try {
iterator = new SeekableRODIterator(rod.query(loc));
} catch (IOException e) {
throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName());
protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureReader resource) {
try {
if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
//System.err.println("Querying position1 " + pos.getContig() + " start " + pos.getStart() + " stop " + pos.getStop());
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(), (int) pos.getStart(), (int) pos.getStop()),rod.getName()));
} else {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
}
} catch (IOException e) {
throw new StingException("Unable to create iterator for rod named " + rod.getName());
}
}
@Override
protected void closeResource(FeatureReader resource) {
try {
resource.close();
} catch (IOException e) {
throw new StingException("Unable to close reader for rod named " + rod.getName());
}
}
}

View File

@ -50,26 +50,27 @@ abstract class ResourcePool <T,I extends Iterator> {
public I iterator( DataStreamSegment segment ) {
// Grab the first iterator in the list whose position is before the requested position.
T selectedResource = null;
synchronized(this) {
selectedResource = selectBestExistingResource( segment, availableResources );
synchronized (this) {
selectedResource = selectBestExistingResource(segment, availableResources);
// No iterator found? Create another. It is expected that
// each iterator created will have its own file handle.
if( selectedResource == null ) {
if (selectedResource == null) {
selectedResource = createNewResource();
addNewResource( selectedResource );
addNewResource(selectedResource);
}
// Remove the iterator from the list of available iterators.
availableResources.remove(selectedResource);
I iterator = createIteratorFromResource(segment, selectedResource);
// Make a note of this assignment for proper releasing later.
resourceAssignments.put(iterator, selectedResource);
return iterator;
}
I iterator = createIteratorFromResource( segment, selectedResource );
// Make a note of this assignment for proper releasing later.
resourceAssignments.put( iterator, selectedResource );
return iterator;
}
/**

View File

@ -73,6 +73,7 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
this.stream = null;
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
// TODO: this line is a problem, creating with an empty hashset eliminates any genotype FORMAT fields in the calls (besides GT)
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
}

View File

@ -64,11 +64,11 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeW
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(VCFGenotypeWriter target) {
VCFReader reader = new VCFReader(file);
// make sure we pass false to the reader, so that it doesn't create an index on disk
VCFReader reader = new VCFReader(file,false);
while ( reader.hasNext() )
target.addRecord(reader.next());
reader.close();
file.delete();
}
}

View File

@ -1,46 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
import java.util.List;
/**
* @author aaron
* <p/>
* Class PeekableRODIterator
* <p/>
* the methods attached to a peekable ROD iterator
*/
public interface PeekableRODIterator extends Iterator<List<ReferenceOrderedDatum>> {
public GenomeLoc peekNextLocation();
public RODRecordList seekForward(GenomeLoc interval);
}

View File

@ -30,7 +30,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
/**
* Retrieves, but does not remove, the head of this iterator.
* @return T the next element in the iterator
* @throws NoSuchElementException - if the iterator doesn't have a next element
*/
public T element() {
T x = next();
@ -40,7 +39,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
/**
* @return the next element in the iteration.
* @throws NoSuchElementException - iteration has no more elements.
*/
public T next() {
if (pushedElement != null) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -78,7 +79,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
// but re-enables next() again after a length-1 query.
public SeekableRODIterator(Iterator<GATKFeature> it) {
public SeekableRODIterator(CloseableIterator<GATKFeature> it) {
this.it = new PushbackIterator<GATKFeature>(it);
records = new LinkedList<GATKFeature>();
// the following is a trick: we would like the iterator to know the actual name assigned to
@ -169,7 +170,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
if ( r.getLocation().getStart() < curr_position )
throw new StingException("LocationAwareSeekableRODIterator: track "+r.getName() +
" is out of coordinate order on contig "+r.getLocation().getContig());
" is out of coordinate order on contig "+r.getLocation() + " compared to " + curr_contig + ":" + curr_position);
if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet
@ -334,4 +335,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
}
@Override
public void close() {
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
}
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
@ -64,7 +65,7 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
* but other more advanced tracks support the query interface
*/
@Override
public Iterator<GATKFeature> getIterator() {
public CloseableIterator<GATKFeature> getIterator() {
try {
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
} catch (IOException e) {
@ -83,22 +84,22 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
}
@Override
public Iterator<GATKFeature> query(GenomeLoc interval) throws IOException {
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
@Override
public Iterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop(), contained),this.getName());
}
@Override
public Iterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
}
@Override
public Iterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName());
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -37,9 +38,9 @@ import java.util.Iterator;
* a decorator interface for tracks that are queryable
*/
public interface QueryableTrack {
public Iterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
public Iterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public void close();
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import java.io.File;
@ -73,7 +74,7 @@ public abstract class RMDTrack {
* @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface
*/
public abstract Iterator<GATKFeature> getIterator();
public abstract CloseableIterator<GATKFeature> getIterator();
/**
* helper function for determining if we are the same track

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
@ -64,7 +65,7 @@ public class RODRMDTrack extends RMDTrack {
* but other more advanced tracks support the query interface
*/
@Override
public Iterator<GATKFeature> getIterator() {
public CloseableIterator<GATKFeature> getIterator() {
return new GATKFeatureIterator(data.iterator());
}

View File

@ -26,11 +26,10 @@
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.*;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
@ -53,16 +52,6 @@ import java.util.Map;
* This class keeps track of the available codecs, and knows how to put together a track of
* that gets iterators from the FeatureReader using Tribble.
*
* Here's an example run command to find SNPs 200 base pairs up and downstream of the target file.
*
* java -jar dist/GenomeAnalysisTK.jar \
* -R /broad/1KG/reference/human_b36_both.fasta \
* -L 1:1863 \
* -L MT:16520 \
* -db /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b36.rod \
* -dbw 200 \
* -l INFO \
* -T DbSNPWindowCounter
*/
public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder {
/**
@ -120,11 +109,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// check to see if the input file has an index
if (requireIndex(inputFile)) {
logger.warn("Creating Tribble Index for file " + inputFile);
LinearIndex index = createIndex(inputFile, this.createByType(targetClass));
reader = new FeatureReader(inputFile,index, this.createByType(targetClass));
LinearIndex index = createIndex(inputFile, this.createByType(targetClass), true);
reader = new BasicFeatureReader(inputFile,index, this.createByType(targetClass));
}
else {
reader = new FeatureReader(inputFile,this.createByType(targetClass));
reader = new BasicFeatureReader(inputFile,this.createByType(targetClass));
}
} catch (FileNotFoundException e) {
throw new StingException("Unable to create reader with file " + inputFile, e);
@ -138,18 +127,19 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* create an index for the input file
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk write the index to disk?
* @return a linear index for the specified type
* @throws IOException if we cannot write the index file
*/
public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
public static LinearIndex createIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
// if we can write the index, we should, but if not just create it in memory
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()))
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()) && onDisk)
return create.createIndex();
else {
logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
if (onDisk) logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
return create.createIndex(null);
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.Feature;
import java.util.Iterator;
@ -36,11 +37,11 @@ import java.util.Iterator;
*
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
*/
public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> {
private final Iterator<Feature> iterator;
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final CloseableIterator<Feature> iterator;
private final String name;
public FeatureToGATKFeatureIterator(Iterator<Feature> iter, String name) {
public FeatureToGATKFeatureIterator(CloseableIterator<Feature> iter, String name) {
this.name = name;
this.iterator = iter;
}
@ -59,4 +60,9 @@ public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> {
public void remove() {
throw new UnsupportedOperationException("Why does Iterator have this method? We always throw an exception here");
}
@Override
public void close() {
this.iterator.close();
}
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import java.util.Iterator;
@ -36,7 +37,7 @@ import java.util.Iterator;
*
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
*/
public class GATKFeatureIterator implements Iterator<GATKFeature> {
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final Iterator<ReferenceOrderedDatum> iter;
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
this.iter = iter;
@ -56,4 +57,9 @@ public class GATKFeatureIterator implements Iterator<GATKFeature> {
public void remove() {
throw new UnsupportedOperationException("Remove not supported");
}
@Override
public void close() {
// do nothing, our underlying iterator doesn't support this
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -13,11 +14,10 @@ import java.util.List;
* <p/>
* combine iteration with a position aware interface
*/
public interface LocationAwareSeekableRODIterator extends Iterator<RODRecordList> {
public interface LocationAwareSeekableRODIterator extends CloseableIterator<RODRecordList> {
public GenomeLoc peekNextLocation();
public GenomeLoc position();
public RODRecordList seekForward(GenomeLoc interval);
public RODRecordList seekForward(GenomeLoc interval);
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.sequenom;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -73,7 +74,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
ReferenceOrderedData snp_mask;
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
Iterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
snpMaskIterator = new SeekableRODIterator(iter);
} else {

View File

@ -1,9 +1,11 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureIterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.util.CloseableTribbleIterator;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -49,7 +51,7 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
FeatureIterator<DbSNPFeature> dbSNPs;
CloseableTribbleIterator<DbSNPFeature> dbSNPs;
// our upstream and downstream window locations
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set;
/**
* An extension of eth GenotypeWriter interface with support
* An extension of the GenotypeWriter interface with support
* for adding header lines.
*
* @author mhanna

View File

@ -22,6 +22,7 @@ import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
@ -45,7 +46,16 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile) {
initialize(vcfFile, null);
initialize(vcfFile, null, true);
}
/**
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile, boolean createIndexOnDisk) {
initialize(vcfFile, null, createIndexOnDisk);
}
/**
@ -54,22 +64,21 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
initialize(vcfFile, transform);
initialize(vcfFile, transform, true);
}
private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
/**
* initialize the VCF reader
* @param vcfFile the VCF file to open
* @param transform the line transformer to use, if any
* @param createIndexOnDisk do we need to create an index on disk?
*/
private void initialize(File vcfFile, VCFCodec.LineTransform transform, boolean createIndexOnDisk) {
VCFCodec codec = new VCFCodec();
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec());
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
LinearIndex index = createIndex(vcfFile, createIndexOnDisk);
if (transform != null) codec.setTransformer(transform);
try {
vcfReader = new FeatureReader(vcfFile,index,codec);
vcfReader = new BasicFeatureReader(vcfFile,index,codec);
iterator= vcfReader.iterator();
} catch (FileNotFoundException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
@ -79,6 +88,24 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
mHeader = codec.getHeader();
}
/**
* create an index given:
* @param vcfFile the vcf file
* @param createIndexOnDisk do we create the index on disk (or only in memory?)
* @return an instance of an index
*/
private LinearIndex createIndex(File vcfFile, boolean createIndexOnDisk) {
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
return index;
}
/** @return true if we have another VCF record to return */
public boolean hasNext() {
@ -110,6 +137,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
return this;
}
/**
* close the files
*/
public void close() {
if (vcfReader != null) try {
vcfReader.close();

View File

@ -142,6 +142,11 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
public void remove() {
throw new IllegalStateException("GRRR");
}
@Override
public void close() {
// nothing to do
}
}
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {

View File

@ -5,6 +5,7 @@ import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -17,6 +18,7 @@ import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
@ -47,7 +49,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
reads.setMaxPileupSize(MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(records.iterator(), reads);
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -73,7 +75,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
reads.setMaxPileupSize(MAX_READS);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(records.iterator(), reads);
li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
// inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -102,4 +104,32 @@ class TestReads extends Reads {
public void setMaxPileupSize(int maxSize) {
this.maximumReadsAtLocus = maxSize;
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
@Override
public void close() {
return;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public T next() {
return iterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}

View File

@ -61,7 +61,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
public void testBuilderIndexUnwriteable() {
File vcfFile = new File(validationDataLocation + "/ROD_validation/mixedup.vcf");
try {
builder.createIndex(vcfFile,new VCFCodec());
builder.createIndex(vcfFile,new VCFCodec(), true);
} catch (IOException e) {
Assert.fail("Unable to make index because of IO exception " + e.getMessage());
}

View File

@ -186,6 +186,11 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
public void remove() {
throw new IllegalStateException("GRRR");
}
@Override
public void close() {
// nothing to do
}
}

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="79" status="integration" publication="20100507124200" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="80" status="integration" publication="20100512124200" />
</ivy-module>