Rewrite of GATKBAMIndex to avoid mmaps causing false reports of heavy memory
usage. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5620 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
36d8f55286
commit
22a11e41e1
|
|
@ -1,328 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.GATKBin;
|
||||
import net.sf.samtools.GATKChunk;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Creates an 'index of the index' for a particular reference sequence
|
||||
* within the BAM file, for easier whole-BAM-file traversal.
|
||||
* file.
|
||||
*/
|
||||
public class BAMIndexBinIterator {
|
||||
/**
|
||||
* The source of index data.
|
||||
*/
|
||||
private final GATKBAMIndex index;
|
||||
|
||||
/**
|
||||
* The file storing the index data.
|
||||
*/
|
||||
private final File indexFile;
|
||||
|
||||
/**
|
||||
* File storing index metadata.
|
||||
*/
|
||||
private final File metaIndexFile;
|
||||
|
||||
/**
|
||||
* Reference sequence that temporary file is based on.
|
||||
*/
|
||||
private final int referenceSequence;
|
||||
|
||||
/**
|
||||
* Position of the linear index in the BAM on disk for the given reference sequence.
|
||||
*/
|
||||
private final long linearIndexPosition;
|
||||
|
||||
/**
|
||||
* Size of the linear index for the given reference sequence.
|
||||
*/
|
||||
private final int linearIndexSize;
|
||||
|
||||
/**
|
||||
* Size of a long in bytes.
|
||||
*/
|
||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||
|
||||
public BAMIndexBinIterator(final GATKBAMIndex index, final File indexFile, final int referenceSequence) {
|
||||
this.index = index;
|
||||
this.indexFile = indexFile;
|
||||
this.referenceSequence = referenceSequence;
|
||||
|
||||
index.seek(4);
|
||||
|
||||
final int sequenceCount = index.readInteger();
|
||||
|
||||
if (referenceSequence >= sequenceCount)
|
||||
throw new ReviewedStingException(String.format("Reference sequence past end of genome; reference sequence = %d, sequence count = %d",referenceSequence,sequenceCount));
|
||||
|
||||
index.skipToSequence(referenceSequence);
|
||||
|
||||
int binCount = index.readInteger();
|
||||
|
||||
try {
|
||||
metaIndexFile = File.createTempFile("bammetaindex."+referenceSequence,null);
|
||||
metaIndexFile.deleteOnExit();
|
||||
|
||||
FileOutputStream metaIndex = new FileOutputStream(metaIndexFile);
|
||||
FileChannel metaIndexChannel = metaIndex.getChannel();
|
||||
|
||||
// zero out the contents of the file. Arrays of primitives in java are always zeroed out by default.
|
||||
byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*LONG_SIZE_IN_BYTES]; // byte array is zeroed out by default.
|
||||
metaIndexChannel.write(ByteBuffer.wrap(emptyContents));
|
||||
|
||||
ByteBuffer positionBuffer = ByteBuffer.allocate(emptyContents.length);
|
||||
positionBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
// Write the positions of the bins in the index
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
long position = index.position();
|
||||
|
||||
final int indexBin = index.readInteger();
|
||||
|
||||
metaIndexChannel.position(indexBin*LONG_SIZE_IN_BYTES);
|
||||
positionBuffer.putLong(position);
|
||||
positionBuffer.flip();
|
||||
|
||||
metaIndexChannel.write(positionBuffer);
|
||||
positionBuffer.flip();
|
||||
|
||||
final int nChunks = index.readInteger();
|
||||
index.skipBytes(16 * nChunks);
|
||||
}
|
||||
|
||||
// Read the size of the linear index and the first position.
|
||||
linearIndexSize = index.readInteger();
|
||||
linearIndexPosition = index.position();
|
||||
|
||||
metaIndexChannel.close();
|
||||
metaIndex.close();
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to write BAM metaindex",ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
metaIndexFile.delete();
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKBin> getIteratorOverLevel(int level) {
|
||||
return new LevelIterator(level);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the linear index entry for the specified genomic start.
|
||||
* @param genomicStart Starting location for which to search.
|
||||
* @return Linear index entry corresponding to this genomic start.
|
||||
*/
|
||||
public long getLinearIndexEntry(final int genomicStart) {
|
||||
final int indexOffset = (genomicStart-1 >> 14);
|
||||
// If the number of bins is exceeded, don't perform any trimming.
|
||||
if(indexOffset >= linearIndexSize)
|
||||
return 0L;
|
||||
|
||||
FileInputStream indexInputStream;
|
||||
try {
|
||||
indexInputStream = new FileInputStream(indexFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to open index file for reading");
|
||||
}
|
||||
|
||||
try {
|
||||
indexInputStream.getChannel().position(linearIndexPosition+(indexOffset*LONG_SIZE_IN_BYTES));
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to position index file for reading");
|
||||
}
|
||||
|
||||
ByteBuffer indexReader = ByteBuffer.allocate(LONG_SIZE_IN_BYTES);
|
||||
indexReader.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
indexInputStream.getChannel().read(indexReader);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to position index file for reading");
|
||||
}
|
||||
|
||||
indexReader.flip();
|
||||
final long linearIndexEntry = indexReader.getLong();
|
||||
|
||||
try {
|
||||
indexInputStream.close();
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to close index file.");
|
||||
}
|
||||
|
||||
return linearIndexEntry;
|
||||
}
|
||||
|
||||
private class LevelIterator implements CloseableIterator<GATKBin> {
|
||||
/**
|
||||
* The raw BAM index file with unordered bins.
|
||||
*/
|
||||
private final FileInputStream indexInputStream;
|
||||
|
||||
/**
|
||||
* The index metafile, with pointers to ordered bins.
|
||||
*/
|
||||
private final FileInputStream metaIndexInputStream;
|
||||
|
||||
/**
|
||||
* The first and last bins in the level.
|
||||
*/
|
||||
private final int firstBinNumber, lastBinNumber;
|
||||
|
||||
/**
|
||||
* The current bin in the index.
|
||||
*/
|
||||
private int currentBinNumber;
|
||||
|
||||
/**
|
||||
* Position of the most recent chunk data.
|
||||
*/
|
||||
private GATKBin nextBin = null;
|
||||
|
||||
public LevelIterator(final int level) {
|
||||
try {
|
||||
indexInputStream = new FileInputStream(indexFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to open index file for reading");
|
||||
}
|
||||
try {
|
||||
metaIndexInputStream = new FileInputStream(metaIndexFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to open index metafile for reading");
|
||||
}
|
||||
|
||||
firstBinNumber = GATKBAMIndex.getFirstBinInLevel(level);
|
||||
lastBinNumber = firstBinNumber + index.getLevelSize(level) - 1;
|
||||
|
||||
currentBinNumber = firstBinNumber - 1;
|
||||
advance();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
indexInputStream.close();
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to close index file.");
|
||||
}
|
||||
try {
|
||||
metaIndexInputStream.close();
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to close index metafile");
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextBin != null;
|
||||
}
|
||||
|
||||
public GATKBin next() {
|
||||
if(!hasNext())
|
||||
throw new NoSuchElementException("Out of elements in BAMIndexBinIterator");
|
||||
GATKBin currentPosition = nextBin;
|
||||
advance();
|
||||
return currentPosition;
|
||||
}
|
||||
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a LevelIterator"); }
|
||||
|
||||
private void advance() {
|
||||
ByteBuffer indexFilePositionDecoder = ByteBuffer.allocate(LONG_SIZE_IN_BYTES*2);
|
||||
indexFilePositionDecoder.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
nextBin = null;
|
||||
try {
|
||||
indexFilePositionDecoder.limit(LONG_SIZE_IN_BYTES);
|
||||
while(nextBin == null && currentBinNumber < lastBinNumber) {
|
||||
currentBinNumber++;
|
||||
metaIndexInputStream.getChannel().position(currentBinNumber*LONG_SIZE_IN_BYTES);
|
||||
metaIndexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||
indexFilePositionDecoder.flip();
|
||||
long currentPosition = indexFilePositionDecoder.getLong();
|
||||
indexFilePositionDecoder.flip();
|
||||
|
||||
if(currentPosition != 0) {
|
||||
indexInputStream.getChannel().position(currentPosition);
|
||||
indexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||
|
||||
indexFilePositionDecoder.flip();
|
||||
int binNumber = indexFilePositionDecoder.getInt();
|
||||
if(binNumber != currentBinNumber)
|
||||
throw new ReviewedStingException("Index file and metaindex file are out of sync.");
|
||||
|
||||
int nChunks = indexFilePositionDecoder.getInt();
|
||||
GATKChunk[] chunks = new GATKChunk[nChunks];
|
||||
|
||||
indexFilePositionDecoder.limit(LONG_SIZE_IN_BYTES*2);
|
||||
indexFilePositionDecoder.clear();
|
||||
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
indexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||
|
||||
indexFilePositionDecoder.flip();
|
||||
final long chunkBegin = indexFilePositionDecoder.getLong();
|
||||
final long chunkEnd = indexFilePositionDecoder.getLong();
|
||||
chunks[ci] = new GATKChunk(chunkBegin, chunkEnd);
|
||||
|
||||
indexFilePositionDecoder.flip();
|
||||
}
|
||||
|
||||
nextBin = new GATKBin(referenceSequence,binNumber);
|
||||
nextBin.setChunkList(chunks);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to close index metafile");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -88,10 +88,10 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
|
|||
|
||||
/**
|
||||
* Create a new BAM schedule based on the given index.
|
||||
* @param indices Mapping from readers to indices.
|
||||
* @param indexFiles Index files.
|
||||
* @param intervals List of
|
||||
*/
|
||||
public BAMSchedule(final Map<SAMReaderID,GATKBAMIndex> indices, final List<GenomeLoc> intervals) {
|
||||
public BAMSchedule(final Map<SAMReaderID,File> indexFiles, final List<GenomeLoc> intervals) {
|
||||
if(intervals.isEmpty())
|
||||
throw new ReviewedStingException("Tried to write schedule for empty interval list.");
|
||||
|
||||
|
|
@ -99,11 +99,10 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
|
|||
|
||||
createScheduleFile();
|
||||
|
||||
readerIDs.addAll(indices.keySet());
|
||||
readerIDs.addAll(indexFiles.keySet());
|
||||
|
||||
for(final SAMReaderID reader: readerIDs) {
|
||||
GATKBAMIndex index = indices.get(reader);
|
||||
BAMIndexContent indexContent = index.getQueryResults(referenceSequence);
|
||||
GATKBAMIndex index = new GATKBAMIndex(indexFiles.get(reader),referenceSequence);
|
||||
|
||||
int currentBinInLowestLevel = GATKBAMIndex.getFirstBinInLevel(GATKBAMIndex.getNumIndexLevels()-1);
|
||||
Iterator<GenomeLoc> locusIterator = intervals.iterator();
|
||||
|
|
@ -133,7 +132,7 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
|
|||
|
||||
// Code at this point knows that the current bin is neither before nor after the current locus,
|
||||
// so it must overlap. Add this region to the filesystem.
|
||||
final GATKBAMFileSpan fileSpan = index.getSpanOverlapping(indexContent,bin);
|
||||
final GATKBAMFileSpan fileSpan = index.getSpanOverlapping(bin);
|
||||
|
||||
if(!fileSpan.isEmpty()) {
|
||||
// File format is binary in little endian; start of region, end of region, num chunks, then the chunks themselves.
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
|
|
@ -46,7 +47,7 @@ import java.util.NoSuchElementException;
|
|||
public class BAMScheduler implements Iterator<FilePointer> {
|
||||
private final SAMDataSource dataSource;
|
||||
|
||||
private final Map<SAMReaderID,GATKBAMIndex> indices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
||||
private final Map<SAMReaderID, File> indexFiles = new HashMap<SAMReaderID,File>();
|
||||
|
||||
private FilePointer nextFilePointer = null;
|
||||
|
||||
|
|
@ -59,7 +60,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
public BAMScheduler(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||
this.dataSource = dataSource;
|
||||
for(SAMReaderID reader: dataSource.getReaderIDs())
|
||||
indices.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
|
||||
indexFiles.put(reader,(File)dataSource.getIndex(reader));
|
||||
this.loci = loci;
|
||||
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
|
||||
if(locusIterator.hasNext())
|
||||
|
|
@ -104,7 +105,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
int coveredRegionStop = Integer.MAX_VALUE;
|
||||
GenomeLoc coveredRegion = null;
|
||||
|
||||
BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indices,currentLocus);
|
||||
BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indexFiles,currentLocus);
|
||||
|
||||
// No overlapping data at all.
|
||||
if(scheduleEntry != null) {
|
||||
|
|
@ -117,7 +118,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
else {
|
||||
// Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty.
|
||||
//System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
|
||||
for(SAMReaderID reader: indices.keySet())
|
||||
for(SAMReaderID reader: indexFiles.keySet())
|
||||
nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
|
||||
}
|
||||
|
||||
|
|
@ -183,11 +184,11 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
|
||||
/**
|
||||
* Get the next overlapping tree of bins associated with the given BAM file.
|
||||
* @param indices BAM index representation.
|
||||
* @param indexFiles BAM index file.
|
||||
* @param currentLocus The actual locus for which to check overlap.
|
||||
* @return The next schedule entry overlapping with the given list of loci.
|
||||
*/
|
||||
private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
|
||||
private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,File> indexFiles, final GenomeLoc currentLocus) {
|
||||
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
|
||||
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
|
||||
if(bamScheduleIterator != null)
|
||||
|
|
@ -201,7 +202,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
lociInContig.add(locus);
|
||||
}
|
||||
|
||||
bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
|
||||
bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indexFiles,lociInContig));
|
||||
}
|
||||
|
||||
if(!bamScheduleIterator.hasNext())
|
||||
|
|
|
|||
|
|
@ -23,24 +23,20 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.BAMIndex;
|
||||
import net.sf.samtools.BAMIndexMetaData;
|
||||
import net.sf.samtools.Bin;
|
||||
import net.sf.samtools.BrowseableBAMIndex;
|
||||
import net.sf.samtools.GATKBAMFileSpan;
|
||||
import net.sf.samtools.GATKBin;
|
||||
import net.sf.samtools.GATKBinList;
|
||||
import net.sf.samtools.GATKChunk;
|
||||
import net.sf.samtools.LinearIndex;
|
||||
import net.sf.samtools.SAMException;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -50,7 +46,7 @@ import java.util.*;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||
public class GATKBAMIndex {
|
||||
/**
|
||||
* Reports the total amount of genomic data that any bin can index.
|
||||
*/
|
||||
|
|
@ -66,46 +62,75 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
*/
|
||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||
|
||||
public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1];
|
||||
|
||||
private final File mFile;
|
||||
private final MappedByteBuffer mFileBuffer;
|
||||
|
||||
private SAMSequenceDictionary mBamDictionary = null;
|
||||
private final List<GATKBin> bins = new ArrayList<GATKBin>();
|
||||
private final LinearIndex linearIndex;
|
||||
|
||||
public GATKBAMIndex(final File file, final SAMSequenceDictionary dictionary) {
|
||||
public GATKBAMIndex(final File file, final int referenceSequence) {
|
||||
mFile = file;
|
||||
mBamDictionary = dictionary;
|
||||
// Open the file stream.
|
||||
try {
|
||||
FileInputStream fileStream = new FileInputStream(mFile);
|
||||
FileChannel fileChannel = fileStream.getChannel();
|
||||
mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size());
|
||||
mFileBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
fileChannel.close();
|
||||
fileStream.close();
|
||||
FileInputStream fileStream = null;
|
||||
FileChannel fileChannel = null;
|
||||
|
||||
try {
|
||||
fileStream = new FileInputStream(mFile);
|
||||
fileChannel = fileStream.getChannel();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new RuntimeIOException(exc.getMessage(), exc);
|
||||
}
|
||||
|
||||
// Verify the magic number.
|
||||
seek(0);
|
||||
seek(fileChannel,0);
|
||||
final byte[] buffer = new byte[4];
|
||||
readBytes(buffer);
|
||||
readBytes(fileChannel,buffer);
|
||||
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
|
||||
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file backing this index.
|
||||
* @return The index file.
|
||||
*/
|
||||
public File getIndexFile() {
|
||||
return mFile;
|
||||
seek(fileChannel,4);
|
||||
|
||||
final int sequenceCount = readInteger(fileChannel);
|
||||
|
||||
if (referenceSequence >= sequenceCount)
|
||||
throw new ReviewedStingException("Invalid sequence number " + referenceSequence);
|
||||
|
||||
skipToSequence(fileChannel,referenceSequence);
|
||||
|
||||
int binCount = readInteger(fileChannel);
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger(fileChannel);
|
||||
final int nChunks = readInteger(fileChannel);
|
||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong(fileChannel);
|
||||
final long chunkEnd = readLong(fileChannel);
|
||||
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
||||
}
|
||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||
while(indexBin >= bins.size())
|
||||
bins.add(null);
|
||||
bins.set(indexBin,bin);
|
||||
}
|
||||
|
||||
final int nLinearBins = readInteger(fileChannel);
|
||||
long[] linearIndexEntries = new long[nLinearBins];
|
||||
for(int linearIndexOffset = 0; linearIndexOffset < nLinearBins; linearIndexOffset++)
|
||||
linearIndexEntries[linearIndexOffset] = readLong(fileChannel);
|
||||
|
||||
linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
||||
|
||||
try {
|
||||
fileChannel.close();
|
||||
fileStream.close();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new RuntimeIOException(exc.getMessage(), exc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -142,7 +167,6 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
@Override
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||
|
|
@ -171,7 +195,6 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
@Override
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
|
|
@ -179,164 +202,32 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||
}
|
||||
|
||||
public int getNumberOfReferences() {
|
||||
seek(4);
|
||||
return readInteger();
|
||||
}
|
||||
|
||||
/**
|
||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||
* @return The file offset of the first record in the last linear bin, or -1
|
||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||
*/
|
||||
public long getStartOfLastLinearBin() {
|
||||
seek(4);
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||
// grab the last element of the linear index for each sequence, and return
|
||||
// the last one from the last sequence that has one.
|
||||
long lastLinearIndexPointer = -1;
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j1 = 0; j1 < nBins; j1++) {
|
||||
// Skip bin #
|
||||
skipBytes(4);
|
||||
final int nChunks = readInteger();
|
||||
// Skip chunks
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
if (nLinearBins > 0) {
|
||||
// Skip to last element of list of linear bins
|
||||
skipBytes(8 * (nLinearBins - 1));
|
||||
lastLinearIndexPointer = readLong();
|
||||
}
|
||||
}
|
||||
|
||||
return lastLinearIndexPointer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records
|
||||
* @param reference the reference of interest
|
||||
* @return meta data for the reference
|
||||
*/
|
||||
public BAMIndexMetaData getMetaData(int reference) {
|
||||
throw new UnsupportedOperationException("Cannot retrieve metadata for GATKBAMIndex");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns count of records unassociated with any reference. Call before the index file is closed
|
||||
*
|
||||
* @return meta data at the end of the bam index that indicates count of records holding no coordinates
|
||||
* or null if no meta data (old index format)
|
||||
*/
|
||||
public Long getNoCoordinateCount() {
|
||||
|
||||
seek(4);
|
||||
final int sequenceCount = readInteger();
|
||||
|
||||
skipToSequence(sequenceCount);
|
||||
try { // in case of old index file without meta data
|
||||
return readLong();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of regions of BAM file that may contain SAMRecords for the given range
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return the virtual file position. Each pair is the first and last virtual file position
|
||||
* in a range that can be scanned to find SAMRecords that overlap the given positions.
|
||||
*/
|
||||
@Override
|
||||
public GATKBAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
|
||||
BAMIndexContent queryResults = getQueryResults(referenceIndex);
|
||||
|
||||
if(queryResults == null)
|
||||
return null;
|
||||
|
||||
GATKBinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
|
||||
|
||||
// System.out.println("# Sequence target TID: " + referenceIndex);
|
||||
List<GATKBin> bins = new ArrayList<GATKBin>();
|
||||
for(GATKBin bin: queryResults.getBins()) {
|
||||
if (overlappingBins.getBins().get(bin.getBinNumber()))
|
||||
bins.add(bin);
|
||||
}
|
||||
|
||||
if (bins.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||
for(GATKBin bin: bins) {
|
||||
for(GATKChunk chunk: bin.getChunkList())
|
||||
chunkList.add(chunk.clone());
|
||||
}
|
||||
|
||||
if (chunkList.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
|
||||
return new GATKBAMFileSpan(chunkList.toArray(new GATKChunk[chunkList.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an overlapping query of all bins bounding the given location.
|
||||
* @param bin The bin over which to perform an overlapping query.
|
||||
* @return The file pointers
|
||||
*/
|
||||
@Override
|
||||
public GATKBAMFileSpan getSpanOverlapping(final Bin bin) {
|
||||
if(bin == null)
|
||||
return null;
|
||||
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
|
||||
final int referenceSequence = gatkBin.getReferenceSequence();
|
||||
BAMIndexContent indexQuery = getQueryResults(referenceSequence);
|
||||
|
||||
return getSpanOverlapping(indexQuery,bin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an overlapping query of all bins bounding the given location.
|
||||
* @param bin The bin over which to perform an overlapping query.
|
||||
* @return The file pointers
|
||||
*/
|
||||
public GATKBAMFileSpan getSpanOverlapping(final BAMIndexContent indexContent, final Bin bin) {
|
||||
if(bin == null)
|
||||
return null;
|
||||
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
|
||||
if(indexContent == null)
|
||||
return null;
|
||||
|
||||
final int binLevel = getLevelForBin(bin);
|
||||
final int firstLocusInBin = getFirstLocusInBin(bin);
|
||||
|
||||
// Add the specified bin to the tree if it exists.
|
||||
List<GATKBin> binTree = new ArrayList<GATKBin>();
|
||||
if(indexContent.containsBin(gatkBin))
|
||||
binTree.add(indexContent.getBins().getBin(gatkBin.getBinNumber()));
|
||||
if(gatkBin.getBinNumber() < bins.size() && bins.get(gatkBin.getBinNumber()) != null)
|
||||
binTree.add(bins.get(gatkBin.getBinNumber()));
|
||||
|
||||
int currentBinLevel = binLevel;
|
||||
while(--currentBinLevel >= 0) {
|
||||
final int binStart = getFirstBinInLevel(currentBinLevel);
|
||||
final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel);
|
||||
final int binNumber = firstLocusInBin/binWidth + binStart;
|
||||
GATKBin parentBin = indexContent.getBins().getBin(binNumber);
|
||||
if(parentBin != null && indexContent.containsBin(parentBin))
|
||||
binTree.add(parentBin);
|
||||
if(binNumber < bins.size() && bins.get(binNumber) != null)
|
||||
binTree.add(bins.get(binNumber));
|
||||
}
|
||||
|
||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||
|
|
@ -346,173 +237,10 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
}
|
||||
|
||||
final int start = getFirstLocusInBin(bin);
|
||||
chunkList = optimizeChunkList(chunkList,indexContent.getLinearIndex().getMinimumOffset(start));
|
||||
chunkList = optimizeChunkList(chunkList,linearIndex.getMinimumOffset(start));
|
||||
return new GATKBAMFileSpan(chunkList.toArray(new GATKChunk[chunkList.size()]));
|
||||
}
|
||||
|
||||
public GATKBAMFileSpan getContentsOfBin(final Bin bin) {
|
||||
if(bin == null)
|
||||
return null;
|
||||
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
|
||||
BAMIndexContent indexQuery = getQueryResults(gatkBin.getReferenceSequence());
|
||||
|
||||
if(indexQuery == null)
|
||||
return null;
|
||||
|
||||
GATKBin queriedBin = indexQuery.getBins().getBin(gatkBin.getBinNumber());
|
||||
|
||||
return queriedBin != null ? new GATKBAMFileSpan(queriedBin.getChunkList()) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the linear index for the given reference sequence.
|
||||
* @param referenceSequence Reference sequence number for which to retrieve the reference.
|
||||
* @return The linear index for the given reference sequence.
|
||||
*/
|
||||
public LinearIndex getLinearIndex(int referenceSequence) {
|
||||
return getQueryResults(referenceSequence).getLinearIndex();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of bins in the BAM file that may contain SAMRecords for the given range.
|
||||
* @param referenceIndex sequence of desired SAMRecords
|
||||
* @param startPos 1-based start of the desired interval, inclusive
|
||||
* @param endPos 1-based end of the desired interval, inclusive
|
||||
* @return a list of bins that contain relevant data.
|
||||
*/
|
||||
public GATKBinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) {
|
||||
final BitSet regionBins = regionToBins(startPos,endPos);
|
||||
if (regionBins == null) {
|
||||
return null;
|
||||
}
|
||||
return new GATKBinList(referenceIndex,regionBins);
|
||||
}
|
||||
|
||||
protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) {
|
||||
seek(4);
|
||||
|
||||
List<GATKChunk> metaDataChunks = new ArrayList<GATKChunk>();
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
|
||||
if (referenceSequence >= sequenceCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final BitSet regionBins = regionToBins(startPos, endPos);
|
||||
if (regionBins == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
skipToSequence(referenceSequence);
|
||||
|
||||
int binCount = readInteger();
|
||||
boolean metaDataSeen = false;
|
||||
GATKBin[] bins = new GATKBin[getMaxBinNumberForReference(referenceSequence) +1];
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
||||
// System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
|
||||
GATKChunk lastChunk = null;
|
||||
if (regionBins.get(indexBin)) {
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong();
|
||||
final long chunkEnd = readLong();
|
||||
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
|
||||
chunks.add(lastChunk);
|
||||
}
|
||||
} else if (indexBin == MAX_BINS) {
|
||||
// meta data - build the bin so that the count of bins is correct;
|
||||
// but don't attach meta chunks to the bin, or normal queries will be off
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = readLong();
|
||||
final long chunkEnd = readLong();
|
||||
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
|
||||
metaDataChunks.add(lastChunk);
|
||||
}
|
||||
metaDataSeen = true;
|
||||
continue; // don't create a Bin
|
||||
} else {
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||
bins[indexBin] = bin;
|
||||
}
|
||||
|
||||
final int nLinearBins = readInteger();
|
||||
|
||||
final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos);
|
||||
final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1;
|
||||
final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1);
|
||||
|
||||
long[] linearIndexEntries = new long[0];
|
||||
if (regionLinearBinStart < nLinearBins) {
|
||||
linearIndexEntries = new long[actualStop-regionLinearBinStart+1];
|
||||
skipBytes(8 * regionLinearBinStart);
|
||||
for(int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++)
|
||||
linearIndexEntries[linearBin-regionLinearBinStart] = readLong();
|
||||
}
|
||||
|
||||
final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries);
|
||||
|
||||
return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), linearIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* The maxiumum bin number for a reference sequence of a given length
|
||||
*/
|
||||
static int getMaxBinNumberForSequenceLength(int sequenceLength) {
|
||||
return getFirstBinInLevel(getNumIndexLevels() - 1) + (sequenceLength >> 14);
|
||||
// return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise,
|
||||
* retrieves the cache results from disk.
|
||||
* @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references.
|
||||
* @return The index information for this reference.
|
||||
*/
|
||||
public BAMIndexContent getQueryResults(final int referenceIndex) {
|
||||
// If not in the cache, attempt to load it from disk.
|
||||
return query(referenceIndex,1,-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the possible number of bins for a given reference sequence.
|
||||
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
||||
*/
|
||||
protected int getMaxAddressibleGenomicLocation() {
|
||||
return BIN_GENOMIC_SPAN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get candidate bins for the specified region
|
||||
* @param startPos 1-based start of target region, inclusive.
|
||||
* @param endPos 1-based end of target region, inclusive.
|
||||
* @return bit set for each bin that may contain SAMRecords in the target region.
|
||||
*/
|
||||
protected BitSet regionToBins(final int startPos, final int endPos) {
|
||||
final int maxPos = 0x1FFFFFFF;
|
||||
final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
|
||||
final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
|
||||
if (start > end) {
|
||||
return null;
|
||||
}
|
||||
int k;
|
||||
final BitSet bitSet = new BitSet(MAX_BINS);
|
||||
bitSet.set(0);
|
||||
for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k);
|
||||
for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k);
|
||||
for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bitSet.set(k);
|
||||
for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bitSet.set(k);
|
||||
for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k);
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
protected List<GATKChunk> optimizeChunkList(final List<GATKChunk> chunks, final long minimumOffset) {
|
||||
GATKChunk lastChunk = null;
|
||||
Collections.sort(chunks);
|
||||
|
|
@ -541,61 +269,76 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
|||
}
|
||||
|
||||
/**
|
||||
* The maximum possible bin number for this reference sequence.
|
||||
* This is based on the maximum coordinate position of the reference
|
||||
* which is based on the size of the reference
|
||||
* Gets the possible number of bins for a given reference sequence.
|
||||
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
||||
*/
|
||||
private int getMaxBinNumberForReference(final int reference) {
|
||||
try {
|
||||
final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength();
|
||||
return getMaxBinNumberForSequenceLength(sequenceLength);
|
||||
} catch (Exception e) {
|
||||
return MAX_BINS;
|
||||
}
|
||||
}
|
||||
protected int getMaxAddressibleGenomicLocation() {
|
||||
return BIN_GENOMIC_SPAN;
|
||||
}
|
||||
|
||||
protected void skipToSequence(final int sequenceIndex) {
|
||||
protected void skipToSequence(final FileChannel fileChannel, final int sequenceIndex) {
|
||||
for (int i = 0; i < sequenceIndex; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
final int nBins = readInteger(fileChannel);
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j = 0; j < nBins; j++) {
|
||||
final int bin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
final int bin = readInteger(fileChannel);
|
||||
final int nChunks = readInteger(fileChannel);
|
||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||
skipBytes(16 * nChunks);
|
||||
skipBytes(fileChannel, 16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
final int nLinearBins = readInteger(fileChannel);
|
||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||
skipBytes(8 * nLinearBins);
|
||||
skipBytes(fileChannel, 8 * nLinearBins);
|
||||
}
|
||||
}
|
||||
|
||||
private void readBytes(final byte[] bytes) {
|
||||
mFileBuffer.get(bytes);
|
||||
private void readBytes(final FileChannel fileChannel, final byte[] bytes) {
|
||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
||||
try {
|
||||
fileChannel.read(buffer);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to read bytes from index file.");
|
||||
}
|
||||
}
|
||||
|
||||
protected int readInteger() {
|
||||
return mFileBuffer.getInt();
|
||||
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||
|
||||
private ByteBuffer wrapBuffer(final byte[] bytes) {
|
||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
protected long readLong() {
|
||||
return mFileBuffer.getLong();
|
||||
private int readInteger(final FileChannel fileChannel) {
|
||||
byte[] bytes = new byte[INT_SIZE_IN_BYTES];
|
||||
readBytes(fileChannel,bytes);
|
||||
return wrapBuffer(bytes).getInt();
|
||||
}
|
||||
|
||||
protected void skipBytes(final int count) {
|
||||
mFileBuffer.position(mFileBuffer.position() + count);
|
||||
private long readLong(final FileChannel fileChannel) {
|
||||
byte[] bytes = new byte[LONG_SIZE_IN_BYTES];
|
||||
readBytes(fileChannel,bytes);
|
||||
return wrapBuffer(bytes).getLong();
|
||||
}
|
||||
|
||||
protected void seek(final int position) {
|
||||
mFileBuffer.position(position);
|
||||
private void skipBytes(final FileChannel fileChannel, final int count) {
|
||||
try {
|
||||
fileChannel.position(fileChannel.position() + count);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition file channel.");
|
||||
}
|
||||
}
|
||||
|
||||
protected long position() {
|
||||
return mFileBuffer.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
private void seek(final FileChannel fileChannel, final int position) {
|
||||
try {
|
||||
fileChannel.position(position);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition of file channel.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ public class IntervalSharder {
|
|||
// If this contig can't be found in the reference, skip over it.
|
||||
if(referenceSequence == null && contig != null)
|
||||
continue;
|
||||
final BrowseableBAMIndex index = dataSource.getIndex(id);
|
||||
final BrowseableBAMIndex index = (BrowseableBAMIndex)dataSource.getIndex(id);
|
||||
binMerger.addReader(id,
|
||||
index,
|
||||
referenceSequence.getSequenceIndex(),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
private final GenomeLocParser parser;
|
||||
|
||||
public LowMemoryIntervalSharder(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||
System.out.println("Low memory interval sharding enabled");
|
||||
wrappedIterator = new PeekableIterator<FilePointer>(new BAMScheduler(dataSource,loci));
|
||||
parser = loci.getGenomeLocParser();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Store BAM indices for each reader present.
|
||||
*/
|
||||
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
||||
private final Map<SAMReaderID,Object> bamIndices = new HashMap<SAMReaderID,Object>();
|
||||
|
||||
/**
|
||||
* How far along is each reader?
|
||||
|
|
@ -292,9 +292,7 @@ public class SAMDataSource {
|
|||
for(SAMReaderID id: readerIDs) {
|
||||
File indexFile = findIndexFile(id.samFile);
|
||||
if(indexFile != null) {
|
||||
SAMSequenceDictionary sequenceDictionary = readers.getReader(id).getFileHeader().getSequenceDictionary();
|
||||
GATKBAMIndex index = new GATKBAMIndex(indexFile,sequenceDictionary);
|
||||
bamIndices.put(id,index);
|
||||
bamIndices.put(id,indexFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -428,7 +426,7 @@ public class SAMDataSource {
|
|||
* @param id Id of the reader.
|
||||
* @return The index. Will preload the index if necessary.
|
||||
*/
|
||||
public BrowseableBAMIndex getIndex(final SAMReaderID id) {
|
||||
public Object getIndex(final SAMReaderID id) {
|
||||
if(enableLowMemorySharding)
|
||||
return bamIndices.get(id);
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||
reader.close();
|
||||
|
||||
bamIndex = new GATKBAMIndex(bamIndexFile,sequenceDictionary);
|
||||
bamIndex = new GATKBAMIndex(bamIndexFile,0);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue