Efficiency improvement requested by the Picard team in IndexedFastaSequenceFile: improve the memory efficiency
(and loading time) of long reference sequences by better controlling the input buffer size. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3665 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ed71e53dd4
commit
2953c9f069
|
|
@ -24,6 +24,11 @@ import java.util.Iterator;
|
||||||
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
||||||
*/
|
*/
|
||||||
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
|
/**
|
||||||
|
* Size of the read buffer.
|
||||||
|
*/
|
||||||
|
private static final int BUFFER_SIZE = 128 * 1024;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores the main fasta file.
|
* Stores the main fasta file.
|
||||||
*/
|
*/
|
||||||
|
|
@ -208,27 +213,45 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
||||||
|
|
||||||
final int basesPerLine = indexEntry.getBasesPerLine();
|
final int basesPerLine = indexEntry.getBasesPerLine();
|
||||||
final int bytesPerLine = indexEntry.getBytesPerLine();
|
final int bytesPerLine = indexEntry.getBytesPerLine();
|
||||||
|
final int terminatorLength = bytesPerLine - basesPerLine;
|
||||||
|
|
||||||
final long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
|
long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
|
||||||
final long stopOffset = ((stop-1)/basesPerLine)*bytesPerLine + (stop-1)%basesPerLine;
|
|
||||||
final int size = (int)(stopOffset-startOffset)+1;
|
|
||||||
|
|
||||||
ByteBuffer channelBuffer = ByteBuffer.allocate(size);
|
// Allocate a 128K buffer for reading in sequence data.
|
||||||
try {
|
ByteBuffer channelBuffer = ByteBuffer.allocate(BUFFER_SIZE);
|
||||||
channel.read(channelBuffer,indexEntry.getLocation()+startOffset);
|
|
||||||
}
|
|
||||||
catch(IOException ex) {
|
|
||||||
throw new PicardException("Unable to map FASTA file into memory.");
|
|
||||||
}
|
|
||||||
|
|
||||||
channelBuffer.position(0);
|
while(targetBuffer.position() < length) {
|
||||||
channelBuffer.limit(Math.min(basesPerLine-(int)startOffset%bytesPerLine,size));
|
// If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward to the next printable character.
|
||||||
|
startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);
|
||||||
|
|
||||||
while( channelBuffer.hasRemaining() ) {
|
try {
|
||||||
targetBuffer.put(channelBuffer);
|
startOffset += channel.read(channelBuffer,indexEntry.getLocation()+startOffset);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new PicardException("Unable to map FASTA file into memory.");
|
||||||
|
}
|
||||||
|
|
||||||
channelBuffer.limit(Math.min(channelBuffer.limit()+bytesPerLine,size));
|
// Reset the buffer for outbound transfers.
|
||||||
channelBuffer.position(Math.min(channelBuffer.position()+bytesPerLine-basesPerLine,size));
|
channelBuffer.flip();
|
||||||
|
|
||||||
|
// Calculate the size of the next run of bases based on the contents we've already retrieved.
|
||||||
|
final int positionInContig = (int)start-1+targetBuffer.position();
|
||||||
|
final int nextBaseSpan = Math.min(basesPerLine-positionInContig%basesPerLine,length-targetBuffer.position());
|
||||||
|
// Cap the bytes to transfer by limiting the nextBaseSpan to the size of the channel buffer.
|
||||||
|
int bytesToTransfer = Math.min(nextBaseSpan,channelBuffer.capacity());
|
||||||
|
|
||||||
|
channelBuffer.limit(channelBuffer.position()+bytesToTransfer);
|
||||||
|
|
||||||
|
while(channelBuffer.hasRemaining()) {
|
||||||
|
targetBuffer.put(channelBuffer);
|
||||||
|
|
||||||
|
bytesToTransfer = Math.min(basesPerLine,length-targetBuffer.position());
|
||||||
|
channelBuffer.limit(Math.min(channelBuffer.position()+bytesToTransfer+terminatorLength,channelBuffer.capacity()));
|
||||||
|
channelBuffer.position(Math.min(channelBuffer.position()+terminatorLength,channelBuffer.capacity()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the buffer for inbound transfers.
|
||||||
|
channelBuffer.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ReferenceSequence( contig, sequenceDictionary.getSequenceIndex(contig), target );
|
return new ReferenceSequence( contig, sequenceDictionary.getSequenceIndex(contig), target );
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue