diff --git a/java/src/net/sf/samtools/BAMFileIndex2.java b/java/src/net/sf/samtools/BAMFileIndex2.java index 6b93ace94..5b303da03 100644 --- a/java/src/net/sf/samtools/BAMFileIndex2.java +++ b/java/src/net/sf/samtools/BAMFileIndex2.java @@ -183,6 +183,101 @@ public class BAMFileIndex2 extends BAMFileIndex } } + /** + * Perform an overlapping query of all bins bounding the given location. + * @param bin The bin over which to perform an overlapping query. + * @return The file pointers + */ + long[] getFilePointersBounding(final Bin bin) { + if(bin == null) + return null; + + final int referenceSequence = bin.referenceSequence; + final Bin[] allBins = referenceToBins.get(referenceSequence); + + final int binLevel = getLevelForBinNumber(bin.binNumber); + final int firstLocusInBin = getFirstLocusInBin(bin); + + List binTree = new ArrayList(); + binTree.add(bin); + + int currentBinLevel = binLevel; + while(--currentBinLevel >= 0) { + final int binStart = LEVEL_STARTS[currentBinLevel]; + final int binWidth = BIN_SPAN/(LEVEL_STARTS[currentBinLevel+1]-LEVEL_STARTS[currentBinLevel]); + final int binNumber = firstLocusInBin/binWidth + binStart; + for(Bin referenceBin: allBins) { + if(binNumber == referenceBin.binNumber) + binTree.add(referenceBin); + } + } + + List chunkList = new ArrayList(); + for(Bin coveringBin: binTree) + chunkList.addAll(binToChunks.get(coveringBin)); + + // Find the nearest adjacent bin. This can act as a minimum offset + Bin closestAdjacentBin = null; + for(Bin adjacentBin: allBins) { + if(getLevelForBinNumber(adjacentBin.binNumber) != binLevel) + continue; + if(adjacentBin.binNumber> BAM_LIDX_SHIFT; + LinearIndex index = referenceToLinearIndices.get(referenceSequence); + long minimumOffset = 0; + if (regionLinearBin < index.indexEntries.length) + minimumOffset = index.indexEntries[regionLinearBin]; + + chunkList = optimizeChunkList(chunkList, minimumOffset); + long[] chunkArray = convertToArray(chunkList); + + // Trim off anything before the first desired bin. + int location = Arrays.binarySearch(chunkArray,adjacentBinOffset); + // location not found, but insertion point was determined. + long trimmedChunkArray[] = chunkArray; + + // If the location of the element is in an even bucket (a start position), trim everything before it. + if(location >= 0) { + if(location%2==0) { + trimmedChunkArray = new long[chunkArray.length-location]; + System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length); + } + else { + trimmedChunkArray = new long[chunkArray.length-location-1]; + System.arraycopy(chunkArray,location+1,trimmedChunkArray,0,trimmedChunkArray.length); + } + } + else { + location = -(location+1); + if(location < chunkArray.length) { + if(location%2==0) { + trimmedChunkArray = new long[chunkArray.length-location]; + System.arraycopy(chunkArray,location,trimmedChunkArray,0,trimmedChunkArray.length); + } + else { + trimmedChunkArray = new long[chunkArray.length-location+1]; + trimmedChunkArray[0] = adjacentBinOffset; + System.arraycopy(chunkArray,location,trimmedChunkArray,1,trimmedChunkArray.length-1); + } + } + } + + return trimmedChunkArray; + } + /** * Get list of regions of BAM file that may contain SAMRecords for the given range * @param referenceIndex sequence of desired SAMRecords diff --git a/java/src/net/sf/samtools/BAMFileReader2.java b/java/src/net/sf/samtools/BAMFileReader2.java index 993aa908c..5f21f6e13 100644 --- a/java/src/net/sf/samtools/BAMFileReader2.java +++ b/java/src/net/sf/samtools/BAMFileReader2.java @@ -194,14 +194,9 @@ class BAMFileReader2 return bins; } - public List getFilePointersBounding(final String sequence, final int start, final int end) { - final SAMFileHeader fileHeader = getFileHeader(); - long[] filePointers = null; - int referenceIndex = fileHeader.getSequenceIndex(sequence); - if (referenceIndex != -1) { - final BAMFileIndex2 fileIndex = getFileIndex(); - filePointers = fileIndex.getFilePointersContaining(referenceIndex,start,end); - } + public List getFilePointersBounding(final Bin bin) { + final BAMFileIndex2 fileIndex = getFileIndex(); + long[] filePointers = fileIndex.getFilePointersBounding(bin); return (filePointers != null) ? Chunk.toChunkList(filePointers) : Collections.emptyList(); } diff --git a/java/src/net/sf/samtools/SAMFileReader2.java b/java/src/net/sf/samtools/SAMFileReader2.java index 823737f64..e1120e1ef 100644 --- a/java/src/net/sf/samtools/SAMFileReader2.java +++ b/java/src/net/sf/samtools/SAMFileReader2.java @@ -147,10 +147,10 @@ public class SAMFileReader2 extends SAMFileReader { return reader.getOverlappingBins(sequence,start,end); } - public List getFilePointersBounding(final String sequence, final int start, final int end) { + public List getFilePointersBounding(final Bin bin) { // TODO: Add sanity checks so that we're not doing this against an unsupported BAM file. BAMFileReader2 reader = (BAMFileReader2)JVMUtils.getFieldValue(getField("mReader"),this); - return reader.getFilePointersBounding(sequence,start,end); + return reader.getFilePointersBounding(bin); } private Field getField(String fieldName) { diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java index 695d86fdc..8a17cbc64 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java @@ -127,7 +127,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy { start = Math.min(loc.getStart(),start); stop = Math.max(loc.getStop(),stop); } - Map> chunksBounding = blockDrivenDataSource.getFilePointersBounding(GenomeLocParser.createGenomeLoc(contig,start,stop)); + Map> chunksBounding = blockDrivenDataSource.getFilePointersBounding(nextFilePointer.bin); return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index a38080eb0..59ea2a2ab 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.JVMUtils; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; import net.sf.picard.sam.SamFileHeaderMerger; @@ -70,14 +69,14 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { /** * Gets the file pointers bounded by this bin, grouped by the reader of origination. - * @param locus The loci for which to load data. + * @param bin The bin for which to load data. * @return A map of the file pointers bounding the bin. */ - public Map> getFilePointersBounding(GenomeLoc locus) { + public Map> getFilePointersBounding(Bin bin) { Map> filePointers = new HashMap>(); for(SAMFileReader reader: headerMerger.getReaders()) { SAMFileReader2 reader2 = (SAMFileReader2)reader; - filePointers.put(reader2,reader2.getFilePointersBounding(locus.getContig(),(int)locus.getStart(),(int)locus.getStop())); + filePointers.put(reader2,reader2.getFilePointersBounding(bin)); } return filePointers; } @@ -154,8 +153,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { Map> readerToIteratorMap = new HashMap>(); for(Map.Entry> chunksByReader: bamAwareShard.getChunks().entrySet()) { SAMFileReader2 reader = chunksByReader.getKey(); - GenomeLoc bounds = bamAwareShard.getBounds(); - readerToIteratorMap.put(reader,reader.queryOverlapping(bounds.getContig(),(int)bounds.getStart(),(int)bounds.getStop())); + readerToIteratorMap.put(reader,reader.iterator(bamAwareShard.getChunks().get(reader))); } // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.