Lower memory support for merged sharding. Merged sharding is still not available.
WARNING: If you update frequently, you might have to rm -rf ~/.ant/cache -- this is an unfortunate side effect of the way we distribute picard-private.jar. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3050 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4d4db7fe63
commit
c0eb5c27ea
|
|
@ -74,7 +74,7 @@ public class BAMFileStat extends CommandLineProgram {
|
|||
inspector.inspect(System.out,null,null);
|
||||
}
|
||||
|
||||
private class BAMFileIndexContentInspector extends BAMFileIndex2 {
|
||||
private class BAMFileIndexContentInspector extends PreloadedBAMFileIndex {
|
||||
public BAMFileIndexContentInspector(File bamFileIndex) {
|
||||
super(bamFileIndex);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
|||
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
|
||||
this.position = this.dataSource.getCurrentPosition();
|
||||
if(locations != null)
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList(),this.dataSource.getNumIndexLevels()-1));
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList()));
|
||||
|
||||
filePointerIterator = filePointers.iterator();
|
||||
if(filePointerIterator.hasNext())
|
||||
|
|
@ -91,15 +91,14 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
|||
}
|
||||
|
||||
public void advance() {
|
||||
Map<SAMReaderID,List<Chunk>> shardPosition = null;
|
||||
Map<SAMReaderID,List<Chunk>> shardPosition = new HashMap<SAMReaderID,List<Chunk>>();
|
||||
nextShard = null;
|
||||
SamRecordFilter filter = null;
|
||||
|
||||
if(!filePointers.isEmpty()) {
|
||||
Map<SAMReaderID,List<Chunk>> selectedReaders = new HashMap<SAMReaderID,List<Chunk>>();
|
||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||
shardPosition = dataSource.getFilePointersBounding(currentFilePointer.bin);
|
||||
|
||||
shardPosition = currentFilePointer.chunks;
|
||||
for(SAMReaderID id: shardPosition.keySet()) {
|
||||
List<Chunk> chunks = shardPosition.get(id);
|
||||
List<Chunk> selectedChunks = new ArrayList<Chunk>();
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
|
||||
|
||||
this.reads = (BlockDrivenSAMDataSource)reads;
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.reads,intervals,this.reads.getNumIndexLevels()-1));
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.reads,intervals));
|
||||
}
|
||||
else {
|
||||
final int maxShardSize = 100000;
|
||||
|
|
@ -130,7 +130,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
public IndexDelimitedLocusShard next() {
|
||||
FilePointer nextFilePointer = filePointerIterator.next();
|
||||
Map<SAMReaderID,List<Chunk>> chunksBounding = reads!=null ? reads.getFilePointersBounding(nextFilePointer.bin) : null;
|
||||
Map<SAMReaderID,List<Chunk>> chunksBounding = nextFilePointer.chunks != null ? nextFilePointer.chunks : null;
|
||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,15 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.Bin;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
|
||||
/**
|
||||
* Shard intervals based on position within the BAM file.
|
||||
|
|
@ -18,22 +19,157 @@ import net.sf.samtools.Bin;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class IntervalSharder {
|
||||
protected static List<FilePointer> shardIntervals(final BlockDrivenSAMDataSource dataSource, final List<GenomeLoc> loci, final int binsDeeperThan) {
|
||||
protected static List<FilePointer> shardIntervals(final BlockDrivenSAMDataSource dataSource, final List<GenomeLoc> loci) {
|
||||
Map<SAMReaderID,List<FilePointer>> filePointersByReader = new HashMap<SAMReaderID,List<FilePointer>>();
|
||||
for(SAMReaderID id: dataSource.getReaderIDs()) {
|
||||
PreloadedBAMFileIndex index = dataSource.getIndex(id);
|
||||
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.\
|
||||
filePointersByReader.put(id,shardIntervalsOverIndex(dataSource,id,index,loci,index.getNumIndexLevels()-1));
|
||||
index.close();
|
||||
}
|
||||
return combineFilePointers(filePointersByReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine adjacent file pointers into a structure that can be streamed in.
|
||||
* @param filePointersByReader File pointers broken down by reader.
|
||||
* @return A large structure of file pointers.
|
||||
*/
|
||||
private static List<FilePointer> combineFilePointers(Map<SAMReaderID,List<FilePointer>> filePointersByReader) {
|
||||
PeekableIterator<FilePointer> mergingIterator = new PeekableIterator<FilePointer>(new FilePointerMergingIterator(filePointersByReader));
|
||||
|
||||
List<FilePointer> overlappingFilePointers = new ArrayList<FilePointer>();
|
||||
List<FilePointer> mergedFilePointers = new ArrayList<FilePointer>();
|
||||
|
||||
while(mergingIterator.hasNext()) {
|
||||
GenomeLoc bounds = null;
|
||||
|
||||
// Load up a segment where file pointers overlap
|
||||
while(mergingIterator.hasNext() && (overlappingFilePointers.size() == 0 || mergingIterator.peek().getBounds().overlapsP(bounds))) {
|
||||
FilePointer filePointer = mergingIterator.next();
|
||||
if(bounds != null)
|
||||
bounds = GenomeLocParser.createGenomeLoc(bounds.getContig(),
|
||||
Math.min(bounds.getStart(),filePointer.getBounds().getStart()),
|
||||
Math.max(bounds.getStop(),filePointer.getBounds().getStop()));
|
||||
else
|
||||
bounds = filePointer.getBounds();
|
||||
overlappingFilePointers.add(filePointer);
|
||||
}
|
||||
|
||||
// determine the complete set of unique locations defining this set.
|
||||
List<GenomeLoc> overlappingLocations = new ArrayList<GenomeLoc>();
|
||||
for(FilePointer filePointer: overlappingFilePointers)
|
||||
overlappingLocations.addAll(filePointer.locations);
|
||||
Collections.sort(overlappingLocations);
|
||||
overlappingLocations = GenomeLocSortedSet.mergeOverlappingLocations(overlappingLocations);
|
||||
|
||||
while(!overlappingLocations.isEmpty()) {
|
||||
long overlapStart = overlappingLocations.get(0).getStart();
|
||||
long overlapStop = overlappingLocations.get(overlappingLocations.size()-1).getStop();
|
||||
|
||||
for(FilePointer overlappingFilePointer: overlappingFilePointers) {
|
||||
if(overlappingFilePointer.getBounds().getStop() < overlapStart)
|
||||
continue;
|
||||
if(overlappingFilePointer.getBounds().getStart() > overlapStart) overlapStop = Math.min(overlapStop,overlappingFilePointer.getBounds().getStart()-1);
|
||||
if(overlappingFilePointer.getBounds().getStop() < overlapStop) overlapStop = Math.min(overlapStop,overlappingFilePointer.getBounds().getStop());
|
||||
}
|
||||
|
||||
// Find the overlapping genome locs.
|
||||
List<GenomeLoc> segmentOverlap = new ArrayList<GenomeLoc>();
|
||||
for(GenomeLoc overlappingLocation: overlappingLocations) {
|
||||
if(overlappingLocation.getStop() <= overlapStop) {
|
||||
// segment is completely before end of overlap.
|
||||
segmentOverlap.add(overlappingLocation);
|
||||
}
|
||||
else if(overlappingLocation.getStart() <= overlapStop) {
|
||||
// segment is partially before end of overlap.
|
||||
segmentOverlap.add(GenomeLocParser.setStop(overlappingLocation,overlapStop));
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// segment starts after overlap ends.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Trim the overlapping genome locs of the overlapping locations list.
|
||||
while(!overlappingLocations.isEmpty() && overlappingLocations.get(0).getStart() <= overlapStop) {
|
||||
GenomeLoc location = overlappingLocations.remove(0);
|
||||
if(location.getStop() > overlapStop)
|
||||
overlappingLocations.add(0,GenomeLocParser.setStart(location,overlapStop+1));
|
||||
}
|
||||
|
||||
// Merge together all file pointers that overlap with these bounds.
|
||||
GenomeLoc overlapBounds = GenomeLocParser.createGenomeLoc(segmentOverlap.get(0).getContigIndex(),overlapStart,overlapStop);
|
||||
FilePointer mergedFilePointer = null;
|
||||
for(FilePointer overlappingFilePointer: overlappingFilePointers) {
|
||||
if(overlappingFilePointer.getBounds().overlapsP(overlapBounds))
|
||||
mergedFilePointer = overlappingFilePointer.merge(mergedFilePointer,segmentOverlap);
|
||||
}
|
||||
|
||||
// Add the resulting file pointer and clear state.
|
||||
mergedFilePointers.add(mergedFilePointer);
|
||||
}
|
||||
|
||||
// reset
|
||||
overlappingFilePointers.clear();
|
||||
}
|
||||
|
||||
return mergedFilePointers;
|
||||
}
|
||||
|
||||
private static class FilePointerMergingIterator implements Iterator<FilePointer> {
|
||||
private PriorityQueue<PeekableIterator<FilePointer>> filePointerQueue;
|
||||
|
||||
public FilePointerMergingIterator(Map<SAMReaderID,List<FilePointer>> filePointers) {
|
||||
filePointerQueue = new PriorityQueue<PeekableIterator<FilePointer>>(filePointers.size(),new FilePointerMergingComparator());
|
||||
for(List<FilePointer> filePointersByReader: filePointers.values())
|
||||
filePointerQueue.add(new PeekableIterator<FilePointer>(filePointersByReader.iterator()));
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return !filePointerQueue.isEmpty();
|
||||
}
|
||||
|
||||
public FilePointer next() {
|
||||
if(!hasNext()) throw new NoSuchElementException("FilePointerMergingIterator is out of elements");
|
||||
PeekableIterator<FilePointer> nextIterator = filePointerQueue.remove();
|
||||
FilePointer nextFilePointer = nextIterator.next();
|
||||
if(nextIterator.hasNext())
|
||||
filePointerQueue.add(nextIterator);
|
||||
return nextFilePointer;
|
||||
}
|
||||
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a merging iterator."); }
|
||||
|
||||
private class FilePointerMergingComparator implements Comparator<PeekableIterator<FilePointer>> {
|
||||
public int compare(PeekableIterator<FilePointer> lhs, PeekableIterator<FilePointer> rhs) {
|
||||
if(!lhs.hasNext() && !rhs.hasNext()) return 0;
|
||||
if(!rhs.hasNext()) return -1;
|
||||
if(!lhs.hasNext()) return 1;
|
||||
return lhs.peek().getBounds().compareTo(rhs.peek().getBounds());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<FilePointer> shardIntervalsOverIndex(final BlockDrivenSAMDataSource dataSource, final SAMReaderID id, final PreloadedBAMFileIndex index, final List<GenomeLoc> loci, final int binsDeeperThan) {
|
||||
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
|
||||
List<FilePointer> filePointers = new ArrayList<FilePointer>();
|
||||
FilePointer filePointer = null;
|
||||
FilePointer lastFilePointer = null;
|
||||
Bin lastBin = null;
|
||||
|
||||
for(GenomeLoc location: loci) {
|
||||
// If crossing contigs, be sure to reset the filepointer that's been accumulating shard data.
|
||||
if(filePointer != null && filePointer.bin.referenceSequence != location.getContigIndex()) {
|
||||
filePointers.add(filePointer);
|
||||
filePointer = null;
|
||||
if(lastFilePointer != null && lastFilePointer.referenceSequence != location.getContigIndex()) {
|
||||
filePointers.add(lastFilePointer);
|
||||
lastFilePointer = null;
|
||||
lastBin = null;
|
||||
}
|
||||
|
||||
int locationStart = (int)location.getStart();
|
||||
final int locationStop = (int)location.getStop();
|
||||
|
||||
List<Bin> bins = findBinsAtLeastAsDeepAs(dataSource,dataSource.getOverlappingBins(location),binsDeeperThan);
|
||||
List<Bin> bins = findBinsAtLeastAsDeepAs(index,getOverlappingBins(dataSource,id,index,location),binsDeeperThan);
|
||||
|
||||
// Recursive stopping condition -- algorithm is at the zero point and no bins have been found.
|
||||
if(binsDeeperThan == 0 && bins.size() == 0) {
|
||||
|
|
@ -43,12 +179,13 @@ public class IntervalSharder {
|
|||
|
||||
// No bins found; step up a level and search again.
|
||||
if(bins.size() == 0) {
|
||||
if(filePointer != null && filePointer.locations.size() > 0) {
|
||||
filePointers.add(filePointer);
|
||||
filePointer = null;
|
||||
if(lastFilePointer != null && lastFilePointer.locations.size() > 0) {
|
||||
filePointers.add(lastFilePointer);
|
||||
lastFilePointer = null;
|
||||
lastBin = null;
|
||||
}
|
||||
|
||||
filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(location),binsDeeperThan-1));
|
||||
filePointers.addAll(shardIntervalsOverIndex(dataSource,id,index,Collections.singletonList(location),binsDeeperThan-1));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -57,48 +194,51 @@ public class IntervalSharder {
|
|||
Iterator<Bin> binIterator = bins.iterator();
|
||||
|
||||
while(locationStop >= locationStart) {
|
||||
int binStart = filePointer!=null ? dataSource.getFirstLocusInBin(filePointer.bin) : 0;
|
||||
int binStop = filePointer!=null ? dataSource.getLastLocusInBin(filePointer.bin) : 0;
|
||||
int binStart = lastFilePointer!=null ? index.getFirstLocusInBin(lastBin) : 0;
|
||||
int binStop = lastFilePointer!=null ? index.getLastLocusInBin(lastBin) : 0;
|
||||
|
||||
while(binStop < locationStart && binIterator.hasNext()) {
|
||||
if(filePointer != null && filePointer.locations.size() > 0)
|
||||
filePointers.add(filePointer);
|
||||
if(lastFilePointer != null && lastFilePointer.locations.size() > 0)
|
||||
filePointers.add(lastFilePointer);
|
||||
|
||||
filePointer = new FilePointer(binIterator.next());
|
||||
binStart = dataSource.getFirstLocusInBin(filePointer.bin);
|
||||
binStop = dataSource.getLastLocusInBin(filePointer.bin);
|
||||
lastBin = binIterator.next();
|
||||
lastFilePointer = new FilePointer(id,lastBin.referenceSequence,getFilePointersBounding(index,lastBin));
|
||||
binStart = index.getFirstLocusInBin(lastBin);
|
||||
binStop = index.getLastLocusInBin(lastBin);
|
||||
}
|
||||
|
||||
if(locationStart < binStart) {
|
||||
// The region starts before the first bin in the sequence. Add the region occurring before the sequence.
|
||||
if(filePointer != null && filePointer.locations.size() > 0) {
|
||||
filePointers.add(filePointer);
|
||||
filePointer = null;
|
||||
if(lastFilePointer != null && lastFilePointer.locations.size() > 0) {
|
||||
filePointers.add(lastFilePointer);
|
||||
lastFilePointer = null;
|
||||
lastBin = null;
|
||||
}
|
||||
|
||||
final int regionStop = Math.min(locationStop,binStart-1);
|
||||
|
||||
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop);
|
||||
filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(subset),binsDeeperThan-1));
|
||||
filePointers.addAll(shardIntervalsOverIndex(dataSource,id,index,Collections.singletonList(subset),binsDeeperThan-1));
|
||||
|
||||
locationStart = regionStop + 1;
|
||||
}
|
||||
else if(locationStart > binStop) {
|
||||
// The region starts after the last bin in the sequence. Add the region occurring after the sequence.
|
||||
if(filePointer != null && filePointer.locations.size() > 0) {
|
||||
filePointers.add(filePointer);
|
||||
filePointer = null;
|
||||
if(lastFilePointer != null && lastFilePointer.locations.size() > 0) {
|
||||
filePointers.add(lastFilePointer);
|
||||
lastFilePointer = null;
|
||||
lastBin = null;
|
||||
}
|
||||
|
||||
GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop);
|
||||
filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(subset),binsDeeperThan-1));
|
||||
filePointers.addAll(shardIntervalsOverIndex(dataSource,id,index,Collections.singletonList(subset),binsDeeperThan-1));
|
||||
|
||||
locationStart = locationStop + 1;
|
||||
}
|
||||
else {
|
||||
// The start of the region overlaps the bin. Add the overlapping subset.
|
||||
final int regionStop = Math.min(locationStop,binStop);
|
||||
filePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),
|
||||
lastFilePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),
|
||||
locationStart,
|
||||
regionStop));
|
||||
locationStart = regionStop + 1;
|
||||
|
|
@ -106,43 +246,99 @@ public class IntervalSharder {
|
|||
}
|
||||
}
|
||||
|
||||
if(filePointer != null && filePointer.locations.size() > 0)
|
||||
filePointers.add(filePointer);
|
||||
if(lastFilePointer != null && lastFilePointer.locations.size() > 0)
|
||||
filePointers.add(lastFilePointer);
|
||||
|
||||
return filePointers;
|
||||
}
|
||||
|
||||
private static List<Bin> findBinsAtLeastAsDeepAs(final BlockDrivenSAMDataSource dataSource, final List<Bin> bins, final int deepestBinLevel) {
|
||||
private static List<Bin> findBinsAtLeastAsDeepAs(final PreloadedBAMFileIndex index, final List<Bin> bins, final int deepestBinLevel) {
|
||||
List<Bin> deepestBins = new ArrayList<Bin>();
|
||||
for(Bin bin: bins) {
|
||||
if(dataSource.getLevelForBin(bin) >= deepestBinLevel)
|
||||
if(index.getLevelForBin(bin) >= deepestBinLevel)
|
||||
deepestBins.add(bin);
|
||||
}
|
||||
return deepestBins;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the bins in each BAM file that overlap with the given interval list.
|
||||
* @param location Location for which to determine the bin.
|
||||
* @return A map of reader back to bin.
|
||||
*/
|
||||
private static List<Bin> getOverlappingBins(final BlockDrivenSAMDataSource dataSource, final SAMReaderID id, final PreloadedBAMFileIndex index, final GenomeLoc location) {
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
final SAMFileHeader fileHeader = dataSource.getHeader(id);
|
||||
int referenceIndex = fileHeader.getSequenceIndex(location.getContig());
|
||||
if (referenceIndex != -1) {
|
||||
return index.getBinsContaining(referenceIndex,(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file pointers bounded by this bin, grouped by the reader of origination.
|
||||
* @param bin The bin for which to load data.
|
||||
* @return A map of the file pointers bounding the bin.
|
||||
*/
|
||||
private static List<Chunk> getFilePointersBounding(final PreloadedBAMFileIndex index, final Bin bin) {
|
||||
if(bin != null) {
|
||||
List<Chunk> chunks = index.getSearchBins(bin);
|
||||
return chunks != null ? chunks : Collections.<Chunk>emptyList();
|
||||
}
|
||||
else
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a small section of a BAM file, and every associated interval.
|
||||
*/
|
||||
class FilePointer {
|
||||
protected final Bin bin;
|
||||
protected final Map<SAMReaderID,List<Chunk>> chunks = new HashMap<SAMReaderID,List<Chunk>>();
|
||||
protected final int referenceSequence;
|
||||
protected final List<GenomeLoc> locations;
|
||||
|
||||
public FilePointer(Bin bin) {
|
||||
this.bin = bin;
|
||||
public FilePointer(SAMReaderID id, int referenceSequence, List<Chunk> chunks) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.chunks.put(id,chunks);
|
||||
this.locations = new ArrayList<GenomeLoc>();
|
||||
}
|
||||
|
||||
public FilePointer(GenomeLoc location) {
|
||||
bin = null;
|
||||
referenceSequence = location.getContigIndex();
|
||||
locations = Collections.singletonList(location);
|
||||
}
|
||||
|
||||
/**
|
||||
* Private constructor for merge operation.
|
||||
* @param referenceSequence Sequence to merge.
|
||||
* @param locations Merged locations.
|
||||
*/
|
||||
private FilePointer(final int referenceSequence, final List<GenomeLoc> locations) {
|
||||
this.referenceSequence = referenceSequence;
|
||||
this.locations = locations;
|
||||
}
|
||||
|
||||
public FilePointer merge(FilePointer other, List<GenomeLoc> locations) {
|
||||
FilePointer merged = new FilePointer(referenceSequence,locations);
|
||||
merged.chunks.putAll(this.chunks);
|
||||
if(other != null)
|
||||
merged.chunks.putAll(other.chunks);
|
||||
return merged;
|
||||
}
|
||||
|
||||
public void addLocation(GenomeLoc location) {
|
||||
locations.add(location);
|
||||
}
|
||||
|
||||
public GenomeLoc getBounds() {
|
||||
final long boundaryStart = locations.get(0).getStart();
|
||||
final long boundaryStop = locations.get(locations.size()-1).getStop();
|
||||
return GenomeLocParser.createGenomeLoc(locations.get(0).getContigIndex(),boundaryStart,boundaryStop);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -96,36 +96,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the bins in each BAM file that overlap with the given interval list.
|
||||
* @param location Location for which to determine the bin.
|
||||
* @return A map of reader back to bin.
|
||||
* Gets the index for a particular reader. Always preloaded.
|
||||
* @param id Id of the reader.
|
||||
* @return The index. Will preload the index if necessary.
|
||||
*/
|
||||
public List<Bin> getOverlappingBins(final GenomeLoc location) {
|
||||
public PreloadedBAMFileIndex getIndex(final SAMReaderID id) {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
return Collections.emptyList();
|
||||
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
|
||||
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file pointers bounded by this bin, grouped by the reader of origination.
|
||||
* @param bin The bin for which to load data.
|
||||
* @return A map of the file pointers bounding the bin.
|
||||
*/
|
||||
public Map<SAMReaderID,List<Chunk>> getFilePointersBounding(Bin bin) {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
Map<SAMReaderID,List<Chunk>> filePointers = new HashMap<SAMReaderID,List<Chunk>>();
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)readers.getReader(id);
|
||||
if(bin != null)
|
||||
filePointers.put(id,reader2.getFilePointersBounding(bin));
|
||||
else
|
||||
filePointers.put(id,Collections.<Chunk>emptyList());
|
||||
}
|
||||
return filePointers;
|
||||
return ((SAMFileReader2)readers.getReader(id)).getIndex(PreloadedBAMFileIndex.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -136,65 +113,6 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
return readerPositions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of levels employed by this index.
|
||||
* @return Number of levels in this index.
|
||||
*/
|
||||
public int getNumIndexLevels() {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getNumIndexLevels();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLevelForBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLastLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the given buffering shard with reads.
|
||||
* @param shard Shard to fill.
|
||||
|
|
@ -264,6 +182,8 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)readers.getReader(id);
|
||||
if(shard.getChunks().get(id) == null)
|
||||
continue;
|
||||
CloseableIterator<SAMRecord> iterator = reader2.iterator(shard.getChunks().get(id));
|
||||
if(shard.getFilter() != null)
|
||||
iterator = new FilteringIterator(iterator,shard.getFilter());
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.16.359-sharding" status="release" />
|
||||
<info organisation="net.sf" module="picard" revision="1.16.360-sharding" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.16.359-sharding" status="release" />
|
||||
<info organisation="net.sf" module="sam" revision="1.16.360-sharding" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue