Temporarily back out the Picard team's patches to GATKBAMIndex from December

These patches to GATKBAMIndex are causing massive BAM index reading errors in
combination with the latest version of Picard. The bug is either in the patches
themselves or in the underlying SeekableBufferedStream class they rely on. Until
the cause can be identified, we are temporarily backing out these changes so that
we can continue to run with the latest Picard/Tribble.

This reverts commits:
81483ec21e528790dfa719d18cdee27d577ca98e
68cf0309db490b79eecdabb4034987ff825ffea8
54bb68f28ad5fe1b3df01702e9c5e108106a0176
This commit is contained in:
David Roazen 2013-02-01 00:36:54 -05:00 committed by Ryan Poplin
parent a520058ef6
commit c4b0ba4d45
2 changed files with 15 additions and 66 deletions

View File

@ -25,17 +25,17 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.seekablestream.SeekableBufferedStream;
import net.sf.samtools.seekablestream.SeekableFileStream;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -70,9 +70,6 @@ public class GATKBAMIndex {
private final File mFile;
//TODO: figure out a good value for this buffer size
private final int BUFFERED_STREAM_BUFFER_SIZE=8192;
/**
* Number of sequences stored in this index.
*/
@ -83,8 +80,8 @@ public class GATKBAMIndex {
*/
private final long[] sequenceStartCache;
private SeekableFileStream fileStream;
private SeekableBufferedStream bufferedStream;
private FileInputStream fileStream;
private FileChannel fileChannel;
public GATKBAMIndex(final File file) {
mFile = file;
@ -282,6 +279,7 @@ public class GATKBAMIndex {
for (int i = sequenceIndex; i < referenceSequence; i++) {
sequenceStartCache[i] = position();
// System.out.println("# Sequence TID: " + i);
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
@ -294,18 +292,15 @@ public class GATKBAMIndex {
final int nLinearBins = readInteger();
// System.out.println("# nLinearBins: " + nLinearBins);
skipBytes(8 * nLinearBins);
}
sequenceStartCache[referenceSequence] = position();
}
private void openIndexFile() {
try {
fileStream = new SeekableFileStream(mFile);
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
fileStream = new FileInputStream(mFile);
fileChannel = fileStream.getChannel();
}
catch (IOException exc) {
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
@ -314,7 +309,7 @@ public class GATKBAMIndex {
private void closeIndexFile() {
try {
bufferedStream.close();
fileChannel.close();
fileStream.close();
}
catch (IOException exc) {
@ -359,12 +354,7 @@ public class GATKBAMIndex {
private void read(final ByteBuffer buffer) {
try {
int bytesExpected = buffer.limit();
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
//and put the result into the bytebuffer after the if statement.
//SeekableBufferedStream is evil, it will "read" beyond the end of the file if you let it!
final int bytesToRead = (int) Math.min(bufferedStream.length() - bufferedStream.position(), bytesExpected); //min of int and long will definitely be castable to an int.
int bytesRead = bufferedStream.read(byteArray,0,bytesToRead);
int bytesRead = fileChannel.read(buffer);
// We have a rigid expectation here to read in exactly the number of bytes we've limited
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
@ -375,7 +365,6 @@ public class GATKBAMIndex {
"Please try re-indexing the corresponding BAM file.",
mFile));
}
buffer.put(byteArray,0,bytesRead);
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
@ -389,13 +378,10 @@ public class GATKBAMIndex {
*/
private ByteBuffer buffer = null;
//BufferedStream don't read into ByteBuffers, so we need this temporary array
private byte[] byteArray=null;
private ByteBuffer getBuffer(final int size) {
if(buffer == null || buffer.capacity() < size) {
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
buffer = ByteBuffer.allocate(size);
byteArray = new byte[size];
buffer.order(ByteOrder.LITTLE_ENDIAN);
}
buffer.clear();
@ -405,13 +391,7 @@ public class GATKBAMIndex {
private void skipBytes(final int count) {
try {
//try to skip forward the requested amount.
long skipped = bufferedStream.skip(count);
if( skipped != count ) { //if not managed to skip the requested amount
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
}
fileChannel.position(fileChannel.position() + count);
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
@ -420,8 +400,7 @@ public class GATKBAMIndex {
private void seek(final long position) {
try {
//to seek a new position, move the fileChannel, and reposition the bufferedStream
bufferedStream.seek(position);
fileChannel.position(position);
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
@ -434,7 +413,7 @@ public class GATKBAMIndex {
*/
private long position() {
try {
return bufferedStream.position();
return fileChannel.position();
}
catch (IOException exc) {
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);

View File

@ -31,9 +31,6 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PileupWalkerIntegrationTest extends WalkerTest {
String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam "
+ "-R " + hg19Reference + " -o %s ";
@Test
public void testGnarleyFHSPileup() {
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
@ -67,31 +64,4 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
}
/************************/
//testing speedup to GATKBAMIndex
@Test
public void testPileupOnLargeBamChr20(){
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775"));
executeTest("Testing single on big BAM at start of chromosome 20", spec);
}
@Test
public void testPileupOnLargeBamMid20(){
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3"));
executeTest("Testing single on big BAM somewhere in chromosome 20", spec);
}
@Test
public void testPileupOnLargeBamEnd20(){
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63"));
executeTest("Testing single at end of chromosome 20", spec);
}
@Test
public void testPileupOnLargeBam20Many(){
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ",
1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1"));
executeTest("Testing single on big BAM many places", spec);
}
}