fixes for parallel processing problems with Tribble, a small bug in the resource pool, and some more documentation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3349 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-12 06:13:26 +00:00
parent 6868ce988f
commit 2c55ac1374
26 changed files with 204 additions and 203 deletions

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -41,7 +42,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/** /**
* A pool of iterators for navigating through the genome. * A pool of iterators for navigating through the genome.
*/ */
private final ReferenceOrderedDataPool iteratorPool; private final ResourcePool<?,LocationAwareSeekableRODIterator> iteratorPool;
/** /**
* Create a new reference-ordered data source. * Create a new reference-ordered data source.
@ -49,8 +50,10 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
*/ */
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) { public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
this.rod = rod; this.rod = rod;
if (rod.supportsQuery()) iteratorPool = null; if (rod.supportsQuery())
else iteratorPool = new ReferenceOrderedDataPool( walker, rod ); iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), rod);
else
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
} }
/** /**
@ -75,11 +78,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data. * @return Iterator through the data.
*/ */
public LocationAwareSeekableRODIterator seek( Shard shard ) { public LocationAwareSeekableRODIterator seek( Shard shard ) {
if (iteratorPool == null) // use query
return getQuery(shard.getGenomeLocs() == null || shard.getGenomeLocs().size() == 0 ? null : shard.getGenomeLocs());
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return iteratorPool.iterator(dataStreamSegment);
return RODIterator;
} }
/** /**
@ -90,30 +90,17 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data. * @return Iterator through the data.
*/ */
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) { public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
if (iteratorPool == null) // use query
return getQuery(loc == null ? null : Arrays.asList(loc));
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream(); DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return iteratorPool.iterator(dataStreamSegment);
return RODIterator;
} }
/**
* assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region
* @param loc the region to query for
* @return a LocationAwareSeekableRODIterator over the selected region
*/
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
if (loc == null) // for the mono shard case
return new SeekableRODIterator(rod.getIterator());
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
}
/** /**
* Close the specified iterator, returning it to the pool. * Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close. * @param iterator Iterator to close.
*/ */
public void close( LocationAwareSeekableRODIterator iterator ) { public void close( LocationAwareSeekableRODIterator iterator ) {
if (iteratorPool != null) iteratorPool.release(iterator); iteratorPool.release(iterator);
} }
} }
@ -189,79 +176,55 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
} }
/** /**
* stitch together the multiple calls to seek (since shards can have multiple intervals now) * a data pool for the new query based RODs
* on the underlying Tribble track into one seamless iteration
*/ */
class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekableRODIterator { class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureReader, LocationAwareSeekableRODIterator> {
// the list of intervals we're iterating over // the reference-ordered data itself.
private final LinkedList<GenomeLoc> locationList; private final RMDTrack rod;
// The reference-ordered data itself. // our tribble track builder
private final QueryableTrack rod; private final TribbleRMDTrackBuilder builder;
// the current iterator public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, RMDTrack rod ) {
private SeekableRODIterator iterator; this.rod = rod;
this.builder = builder;
StitchingLocationAwareSeekableRODIterator(List<GenomeLoc> list, QueryableTrack rmd) {
rod = rmd;
locationList = new LinkedList<GenomeLoc>();
locationList.addAll(list);
fetchNextInterval();
} }
@Override @Override
public GenomeLoc peekNextLocation() { protected FeatureReader createNewResource() {
if (iterator == null) return null; return builder.createFeatureReader(rod.getType(),rod.getFile());
return iterator.peekNextLocation();
} }
@Override @Override
public GenomeLoc position() { protected FeatureReader selectBestExistingResource(DataStreamSegment segment, List<FeatureReader> availableResources) {
if (iterator == null) return null; for (FeatureReader reader : availableResources)
return iterator.position(); if (reader != null) return reader;
return null;
} }
@Override @Override
public RODRecordList seekForward(GenomeLoc interval) { protected LocationAwareSeekableRODIterator createIteratorFromResource(DataStreamSegment position, FeatureReader resource) {
RODRecordList list = iterator.seekForward(interval);
if (list == null) { // we were unable to seek the current interval to the location
fetchNextInterval();
list = iterator.seekForward(interval);
}
return list;
}
@Override
public boolean hasNext() {
if (iterator == null) return false;
return iterator.hasNext();
}
@Override
public RODRecordList next() {
if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next");
RODRecordList list = iterator.next();
if (!iterator.hasNext()) fetchNextInterval();
return list;
}
@Override
public void remove() {
throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team");
}
private void fetchNextInterval() {
if (locationList != null && locationList.size() > 0) {
GenomeLoc loc = locationList.getFirst();
locationList.removeFirst();
if (rod == null) throw new StingException("Unable to query(), target rod is null, next location = " + ((locationList != null) ? locationList.getFirst() : "null"));
try { try {
iterator = new SeekableRODIterator(rod.query(loc)); if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
//System.err.println("Querying position1 " + pos.getContig() + " start " + pos.getStart() + " stop " + pos.getStop());
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.query(pos.getContig(), (int) pos.getStart(), (int) pos.getStop()),rod.getName()));
} else {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(resource.iterator(),rod.getName()));
}
} catch (IOException e) { } catch (IOException e) {
throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName()); throw new StingException("Unable to create iterator for rod named " + rod.getName());
} }
} }
@Override
protected void closeResource(FeatureReader resource) {
try {
resource.close();
} catch (IOException e) {
throw new StingException("Unable to close reader for rod named " + rod.getName());
}
} }
} }

View File

@ -62,7 +62,7 @@ abstract class ResourcePool <T,I extends Iterator> {
// Remove the iterator from the list of available iterators. // Remove the iterator from the list of available iterators.
availableResources.remove(selectedResource); availableResources.remove(selectedResource);
}
I iterator = createIteratorFromResource(segment, selectedResource); I iterator = createIteratorFromResource(segment, selectedResource);
@ -71,6 +71,7 @@ abstract class ResourcePool <T,I extends Iterator> {
return iterator; return iterator;
} }
}
/** /**
* Release the lock on the given iterator, returning it to the pool. * Release the lock on the given iterator, returning it to the pool.

View File

@ -73,6 +73,7 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
this.stream = null; this.stream = null;
writer = GenotypeWriterFactory.create(stub.getFormat(), file); writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
// TODO: this line is a problem, creating with an empty hashset eliminates any genotype FORMAT fields in the calls (besides GT)
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>()); GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
} }

View File

@ -64,11 +64,11 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeW
* @param target Target stream for the temporary storage. May not be null. * @param target Target stream for the temporary storage. May not be null.
*/ */
public void mergeInto(VCFGenotypeWriter target) { public void mergeInto(VCFGenotypeWriter target) {
VCFReader reader = new VCFReader(file); // make sure we pass false to the reader, so that it doesn't create an index on disk
VCFReader reader = new VCFReader(file,false);
while ( reader.hasNext() ) while ( reader.hasNext() )
target.addRecord(reader.next()); target.addRecord(reader.next());
reader.close(); reader.close();
file.delete(); file.delete();
} }
} }

View File

@ -1,46 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
import java.util.List;
/**
* @author aaron
* <p/>
* Class PeekableRODIterator
* <p/>
* the methods attached to a peekable ROD iterator
*/
public interface PeekableRODIterator extends Iterator<List<ReferenceOrderedDatum>> {
public GenomeLoc peekNextLocation();
public RODRecordList seekForward(GenomeLoc interval);
}

View File

@ -30,7 +30,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
/** /**
* Retrieves, but does not remove, the head of this iterator. * Retrieves, but does not remove, the head of this iterator.
* @return T the next element in the iterator * @return T the next element in the iterator
* @throws NoSuchElementException - if the iterator doesn't have a next element
*/ */
public T element() { public T element() {
T x = next(); T x = next();
@ -40,7 +39,6 @@ public class PushbackIterator<T> implements Iterator<T>, Iterable<T> {
/** /**
* @return the next element in the iteration. * @return the next element in the iteration.
* @throws NoSuchElementException - iteration has no more elements.
*/ */
public T next() { public T next() {
if (pushedElement != null) { if (pushedElement != null) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -78,7 +79,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1, // This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
// but re-enables next() again after a length-1 query. // but re-enables next() again after a length-1 query.
public SeekableRODIterator(Iterator<GATKFeature> it) { public SeekableRODIterator(CloseableIterator<GATKFeature> it) {
this.it = new PushbackIterator<GATKFeature>(it); this.it = new PushbackIterator<GATKFeature>(it);
records = new LinkedList<GATKFeature>(); records = new LinkedList<GATKFeature>();
// the following is a trick: we would like the iterator to know the actual name assigned to // the following is a trick: we would like the iterator to know the actual name assigned to
@ -169,7 +170,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
if ( r.getLocation().getStart() < curr_position ) if ( r.getLocation().getStart() < curr_position )
throw new StingException("LocationAwareSeekableRODIterator: track "+r.getName() + throw new StingException("LocationAwareSeekableRODIterator: track "+r.getName() +
" is out of coordinate order on contig "+r.getLocation().getContig()); " is out of coordinate order on contig "+r.getLocation() + " compared to " + curr_contig + ":" + curr_position);
if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet if ( r.getLocation().getStart() > curr_position ) break; // next record starts after the current position; we do not need it yet
@ -334,4 +335,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
} }
@Override
public void close() {
if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close();
}
} }

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureReader; import org.broad.tribble.FeatureReader;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
@ -64,7 +65,7 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
* but other more advanced tracks support the query interface * but other more advanced tracks support the query interface
*/ */
@Override @Override
public Iterator<GATKFeature> getIterator() { public CloseableIterator<GATKFeature> getIterator() {
try { try {
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName()); return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
} catch (IOException e) { } catch (IOException e) {
@ -83,22 +84,22 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
} }
@Override @Override
public Iterator<GATKFeature> query(GenomeLoc interval) throws IOException { public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
} }
@Override @Override
public Iterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException { public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop(), contained),this.getName()); return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop(), contained),this.getName());
} }
@Override @Override
public Iterator<GATKFeature> query(String contig, int start, int stop) throws IOException { public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
} }
@Override @Override
public Iterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException { public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName()); return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName());
} }

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -37,9 +38,9 @@ import java.util.Iterator;
* a decorator interface for tracks that are queryable * a decorator interface for tracks that are queryable
*/ */
public interface QueryableTrack { public interface QueryableTrack {
public Iterator<GATKFeature> query(final GenomeLoc interval) throws IOException; public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
public Iterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException; public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException; public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException; public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public void close(); public void close();
} }

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import java.io.File; import java.io.File;
@ -73,7 +74,7 @@ public abstract class RMDTrack {
* @return how to get an iterator of the underlying data. This is all a track has to support, * @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface * but other more advanced tracks support the query interface
*/ */
public abstract Iterator<GATKFeature> getIterator(); public abstract CloseableIterator<GATKFeature> getIterator();
/** /**
* helper function for determining if we are the same track * helper function for determining if we are the same track

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
@ -64,7 +65,7 @@ public class RODRMDTrack extends RMDTrack {
* but other more advanced tracks support the query interface * but other more advanced tracks support the query interface
*/ */
@Override @Override
public Iterator<GATKFeature> getIterator() { public CloseableIterator<GATKFeature> getIterator() {
return new GATKFeatureIterator(data.iterator()); return new GATKFeatureIterator(data.iterator());
} }

View File

@ -26,11 +26,10 @@
package org.broadinstitute.sting.gatk.refdata.tracks.builders; package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.Feature; import org.broad.tribble.*;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex; import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.linear.LinearIndexCreator; import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureReaderTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
@ -53,16 +52,6 @@ import java.util.Map;
* This class keeps track of the available codecs, and knows how to put together a track of * This class keeps track of the available codecs, and knows how to put together a track of
* that gets iterators from the FeatureReader using Tribble. * that gets iterators from the FeatureReader using Tribble.
* *
* Here's an example run command to find SNPs 200 base pairs up and downstream of the target file.
*
* java -jar dist/GenomeAnalysisTK.jar \
* -R /broad/1KG/reference/human_b36_both.fasta \
* -L 1:1863 \
* -L MT:16520 \
* -db /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b36.rod \
* -dbw 200 \
* -l INFO \
* -T DbSNPWindowCounter
*/ */
public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder { public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder {
/** /**
@ -120,11 +109,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// check to see if the input file has an index // check to see if the input file has an index
if (requireIndex(inputFile)) { if (requireIndex(inputFile)) {
logger.warn("Creating Tribble Index for file " + inputFile); logger.warn("Creating Tribble Index for file " + inputFile);
LinearIndex index = createIndex(inputFile, this.createByType(targetClass)); LinearIndex index = createIndex(inputFile, this.createByType(targetClass), true);
reader = new FeatureReader(inputFile,index, this.createByType(targetClass)); reader = new BasicFeatureReader(inputFile,index, this.createByType(targetClass));
} }
else { else {
reader = new FeatureReader(inputFile,this.createByType(targetClass)); reader = new BasicFeatureReader(inputFile,this.createByType(targetClass));
} }
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("Unable to create reader with file " + inputFile, e); throw new StingException("Unable to create reader with file " + inputFile, e);
@ -138,18 +127,19 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* create an index for the input file * create an index for the input file
* @param inputFile the input file * @param inputFile the input file
* @param codec the codec to use * @param codec the codec to use
* @param onDisk write the index to disk?
* @return a linear index for the specified type * @return a linear index for the specified type
* @throws IOException if we cannot write the index file * @throws IOException if we cannot write the index file
*/ */
public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException { public static LinearIndex createIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec); LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
// if we can write the index, we should, but if not just create it in memory // if we can write the index, we should, but if not just create it in memory
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension); File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite())) if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()) && onDisk)
return create.createIndex(); return create.createIndex();
else { else {
logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only"); if (onDisk) logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
return create.createIndex(null); return create.createIndex(null);
} }

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.utils; package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import java.util.Iterator; import java.util.Iterator;
@ -36,11 +37,11 @@ import java.util.Iterator;
* *
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs) * a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
*/ */
public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> { public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final Iterator<Feature> iterator; private final CloseableIterator<Feature> iterator;
private final String name; private final String name;
public FeatureToGATKFeatureIterator(Iterator<Feature> iter, String name) { public FeatureToGATKFeatureIterator(CloseableIterator<Feature> iter, String name) {
this.name = name; this.name = name;
this.iterator = iter; this.iterator = iter;
} }
@ -59,4 +60,9 @@ public class FeatureToGATKFeatureIterator implements Iterator<GATKFeature> {
public void remove() { public void remove() {
throw new UnsupportedOperationException("Why does Iterator have this method? We always throw an exception here"); throw new UnsupportedOperationException("Why does Iterator have this method? We always throw an exception here");
} }
@Override
public void close() {
this.iterator.close();
}
} }

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.utils; package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import java.util.Iterator; import java.util.Iterator;
@ -36,7 +37,7 @@ import java.util.Iterator;
* *
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam! * Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
*/ */
public class GATKFeatureIterator implements Iterator<GATKFeature> { public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final Iterator<ReferenceOrderedDatum> iter; private final Iterator<ReferenceOrderedDatum> iter;
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) { public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
this.iter = iter; this.iter = iter;
@ -56,4 +57,9 @@ public class GATKFeatureIterator implements Iterator<GATKFeature> {
public void remove() { public void remove() {
throw new UnsupportedOperationException("Remove not supported"); throw new UnsupportedOperationException("Remove not supported");
} }
@Override
public void close() {
// do nothing, our underlying iterator doesn't support this
}
} }

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.utils; package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -13,11 +14,10 @@ import java.util.List;
* <p/> * <p/>
* combine iteration with a position aware interface * combine iteration with a position aware interface
*/ */
public interface LocationAwareSeekableRODIterator extends Iterator<RODRecordList> { public interface LocationAwareSeekableRODIterator extends CloseableIterator<RODRecordList> {
public GenomeLoc peekNextLocation(); public GenomeLoc peekNextLocation();
public GenomeLoc position(); public GenomeLoc position();
public RODRecordList seekForward(GenomeLoc interval); public RODRecordList seekForward(GenomeLoc interval);
} }

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.sequenom; package org.broadinstitute.sting.gatk.walkers.sequenom;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -73,7 +74,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
ReferenceOrderedData snp_mask; ReferenceOrderedData snp_mask;
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder(); TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
Iterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
snpMaskIterator = new SeekableRODIterator(iter); snpMaskIterator = new SeekableRODIterator(iter);
} else { } else {

View File

@ -1,9 +1,11 @@
package org.broadinstitute.sting.oneoffprojects.walkers; package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureIterator; import org.broad.tribble.FeatureIterator;
import org.broad.tribble.FeatureReader; import org.broad.tribble.FeatureReader;
import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.util.CloseableTribbleIterator;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -49,7 +51,7 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
FeatureIterator<DbSNPFeature> dbSNPs; CloseableTribbleIterator<DbSNPFeature> dbSNPs;
// our upstream and downstream window locations // our upstream and downstream window locations
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0); int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set; import java.util.Set;
/** /**
* An extension of eth GenotypeWriter interface with support * An extension of the GenotypeWriter interface with support
* for adding header lines. * for adding header lines.
* *
* @author mhanna * @author mhanna

View File

@ -22,6 +22,7 @@ import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader; import org.broad.tribble.FeatureReader;
import org.broad.tribble.index.linear.LinearIndex; import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.readers.BasicFeatureReader;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
@ -45,7 +46,16 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param vcfFile the vcf file to write * @param vcfFile the vcf file to write
*/ */
public VCFReader(File vcfFile) { public VCFReader(File vcfFile) {
initialize(vcfFile, null); initialize(vcfFile, null, true);
}
/**
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile, boolean createIndexOnDisk) {
initialize(vcfFile, null, createIndexOnDisk);
} }
/** /**
@ -54,22 +64,21 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param vcfFile the vcf file to write * @param vcfFile the vcf file to write
*/ */
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) { public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
initialize(vcfFile, transform); initialize(vcfFile, transform, true);
} }
private void initialize(File vcfFile, VCFCodec.LineTransform transform) { /**
* initialize the VCF reader
* @param vcfFile the VCF file to open
* @param transform the line transformer to use, if any
* @param createIndexOnDisk do we need to create an index on disk?
*/
private void initialize(File vcfFile, VCFCodec.LineTransform transform, boolean createIndexOnDisk) {
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
LinearIndex index = null; LinearIndex index = createIndex(vcfFile, createIndexOnDisk);
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec());
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
if (transform != null) codec.setTransformer(transform); if (transform != null) codec.setTransformer(transform);
try { try {
vcfReader = new FeatureReader(vcfFile,index,codec); vcfReader = new BasicFeatureReader(vcfFile,index,codec);
iterator= vcfReader.iterator(); iterator= vcfReader.iterator();
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e); throw new StingException("Unable to read VCF File from " + vcfFile, e);
@ -79,6 +88,24 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
mHeader = codec.getHeader(); mHeader = codec.getHeader();
} }
/**
* create an index given:
* @param vcfFile the vcf file
* @param createIndexOnDisk do we create the index on disk (or only in memory?)
* @return an instance of an index
*/
private LinearIndex createIndex(File vcfFile, boolean createIndexOnDisk) {
LinearIndex index = null;
if (TribbleRMDTrackBuilder.requireIndex(vcfFile)) {
try {
index = TribbleRMDTrackBuilder.createIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
}
return index;
}
/** @return true if we have another VCF record to return */ /** @return true if we have another VCF record to return */
public boolean hasNext() { public boolean hasNext() {
@ -110,6 +137,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
return this; return this;
} }
/**
* close the files
*/
public void close() { public void close() {
if (vcfReader != null) try { if (vcfReader != null) try {
vcfReader.close(); vcfReader.close();

View File

@ -142,6 +142,11 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
public void remove() { public void remove() {
throw new IllegalStateException("GRRR"); throw new IllegalStateException("GRRR");
} }
@Override
public void close() {
// nothing to do
}
} }
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList { class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {

View File

@ -5,6 +5,7 @@ import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -17,6 +18,7 @@ import org.junit.Test;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -47,7 +49,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
reads.setMaxPileupSize(MAX_READS); reads.setMaxPileupSize(MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(records.iterator(), reads); li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
// inject the testing version of the locus iterator watcher // inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -73,7 +75,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
reads.setMaxPileupSize(MAX_READS); reads.setMaxPileupSize(MAX_READS);
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(records.iterator(), reads); li = new LocusIteratorByState(new FakeCloseableIterator(records.iterator()), reads);
// inject the testing version of the locus iterator watcher // inject the testing version of the locus iterator watcher
li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS)); li.setLocusOverflowTracker(new LocusIteratorOverride(MAX_READS));
@ -103,3 +105,31 @@ class TestReads extends Reads {
this.maximumReadsAtLocus = maxSize; this.maximumReadsAtLocus = maxSize;
} }
} }
class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
@Override
public void close() {
return;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public T next() {
return iterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}

View File

@ -61,7 +61,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
public void testBuilderIndexUnwriteable() { public void testBuilderIndexUnwriteable() {
File vcfFile = new File(validationDataLocation + "/ROD_validation/mixedup.vcf"); File vcfFile = new File(validationDataLocation + "/ROD_validation/mixedup.vcf");
try { try {
builder.createIndex(vcfFile,new VCFCodec()); builder.createIndex(vcfFile,new VCFCodec(), true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail("Unable to make index because of IO exception " + e.getMessage()); Assert.fail("Unable to make index because of IO exception " + e.getMessage());
} }

View File

@ -186,6 +186,11 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
public void remove() { public void remove() {
throw new IllegalStateException("GRRR"); throw new IllegalStateException("GRRR");
} }
@Override
public void close() {
// nothing to do
}
} }

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="79" status="integration" publication="20100507124200" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="80" status="integration" publication="20100512124200" />
</ivy-module>