Temporarily back out the Picard team's patches to GATKBAMIndex from December
These patches to GATKBAMIndex are causing massive BAM index reading errors in combination with the latest version of Picard. The bug is either in the patches themselves or in the underlying SeekableBufferedStream class they rely on. Until the cause can be identified, we are temporarily backing out these changes so that we can continue to run with the latest Picard/Tribble. This reverts commits: 81483ec21e528790dfa719d18cdee27d577ca98e 68cf0309db490b79eecdabb4034987ff825ffea8 54bb68f28ad5fe1b3df01702e9c5e108106a0176
This commit is contained in:
parent
a520058ef6
commit
c4b0ba4d45
|
|
@ -25,17 +25,17 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.seekablestream.SeekableBufferedStream;
|
||||
import net.sf.samtools.seekablestream.SeekableFileStream;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -70,9 +70,6 @@ public class GATKBAMIndex {
|
|||
|
||||
private final File mFile;
|
||||
|
||||
//TODO: figure out a good value for this buffer size
|
||||
private final int BUFFERED_STREAM_BUFFER_SIZE=8192;
|
||||
|
||||
/**
|
||||
* Number of sequences stored in this index.
|
||||
*/
|
||||
|
|
@ -83,8 +80,8 @@ public class GATKBAMIndex {
|
|||
*/
|
||||
private final long[] sequenceStartCache;
|
||||
|
||||
private SeekableFileStream fileStream;
|
||||
private SeekableBufferedStream bufferedStream;
|
||||
private FileInputStream fileStream;
|
||||
private FileChannel fileChannel;
|
||||
|
||||
public GATKBAMIndex(final File file) {
|
||||
mFile = file;
|
||||
|
|
@ -282,6 +279,7 @@ public class GATKBAMIndex {
|
|||
|
||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||
sequenceStartCache[i] = position();
|
||||
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
|
|
@ -294,18 +292,15 @@ public class GATKBAMIndex {
|
|||
final int nLinearBins = readInteger();
|
||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||
skipBytes(8 * nLinearBins);
|
||||
|
||||
}
|
||||
|
||||
sequenceStartCache[referenceSequence] = position();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void openIndexFile() {
|
||||
try {
|
||||
fileStream = new SeekableFileStream(mFile);
|
||||
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
|
||||
fileStream = new FileInputStream(mFile);
|
||||
fileChannel = fileStream.getChannel();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||
|
|
@ -314,7 +309,7 @@ public class GATKBAMIndex {
|
|||
|
||||
private void closeIndexFile() {
|
||||
try {
|
||||
bufferedStream.close();
|
||||
fileChannel.close();
|
||||
fileStream.close();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
|
|
@ -359,12 +354,7 @@ public class GATKBAMIndex {
|
|||
private void read(final ByteBuffer buffer) {
|
||||
try {
|
||||
int bytesExpected = buffer.limit();
|
||||
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||
//and put the result into the bytebuffer after the if statement.
|
||||
|
||||
//SeekableBufferedStream is evil, it will "read" beyond the end of the file if you let it!
|
||||
final int bytesToRead = (int) Math.min(bufferedStream.length() - bufferedStream.position(), bytesExpected); //min of int and long will definitely be castable to an int.
|
||||
int bytesRead = bufferedStream.read(byteArray,0,bytesToRead);
|
||||
int bytesRead = fileChannel.read(buffer);
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
||||
|
|
@ -375,7 +365,6 @@ public class GATKBAMIndex {
|
|||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
buffer.put(byteArray,0,bytesRead);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||
|
|
@ -389,13 +378,10 @@ public class GATKBAMIndex {
|
|||
*/
|
||||
private ByteBuffer buffer = null;
|
||||
|
||||
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||
private byte[] byteArray=null;
|
||||
private ByteBuffer getBuffer(final int size) {
|
||||
if(buffer == null || buffer.capacity() < size) {
|
||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||
buffer = ByteBuffer.allocate(size);
|
||||
byteArray = new byte[size];
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
buffer.clear();
|
||||
|
|
@ -405,13 +391,7 @@ public class GATKBAMIndex {
|
|||
|
||||
private void skipBytes(final int count) {
|
||||
try {
|
||||
|
||||
//try to skip forward the requested amount.
|
||||
long skipped = bufferedStream.skip(count);
|
||||
|
||||
if( skipped != count ) { //if not managed to skip the requested amount
|
||||
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
fileChannel.position(fileChannel.position() + count);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||
|
|
@ -420,8 +400,7 @@ public class GATKBAMIndex {
|
|||
|
||||
private void seek(final long position) {
|
||||
try {
|
||||
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||
bufferedStream.seek(position);
|
||||
fileChannel.position(position);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
||||
|
|
@ -434,7 +413,7 @@ public class GATKBAMIndex {
|
|||
*/
|
||||
private long position() {
|
||||
try {
|
||||
return bufferedStream.position();
|
||||
return fileChannel.position();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
|
||||
|
|
|
|||
|
|
@ -31,9 +31,6 @@ import org.testng.annotations.Test;
|
|||
import java.util.Arrays;
|
||||
|
||||
public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||
String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam "
|
||||
+ "-R " + hg19Reference + " -o %s ";
|
||||
|
||||
@Test
|
||||
public void testGnarleyFHSPileup() {
|
||||
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
||||
|
|
@ -67,31 +64,4 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
|
||||
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
|
||||
}
|
||||
|
||||
/************************/
|
||||
|
||||
//testing speedup to GATKBAMIndex
|
||||
|
||||
|
||||
@Test
|
||||
public void testPileupOnLargeBamChr20(){
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775"));
|
||||
executeTest("Testing single on big BAM at start of chromosome 20", spec);
|
||||
}
|
||||
@Test
|
||||
public void testPileupOnLargeBamMid20(){
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3"));
|
||||
executeTest("Testing single on big BAM somewhere in chromosome 20", spec);
|
||||
}
|
||||
@Test
|
||||
public void testPileupOnLargeBamEnd20(){
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63"));
|
||||
executeTest("Testing single at end of chromosome 20", spec);
|
||||
}
|
||||
@Test
|
||||
public void testPileupOnLargeBam20Many(){
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ",
|
||||
1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1"));
|
||||
executeTest("Testing single on big BAM many places", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue