diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 1cfb527cd..aec41e340 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -25,17 +25,17 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.samtools.seekablestream.SeekableBufferedStream; -import net.sf.samtools.seekablestream.SeekableFileStream; - import net.sf.samtools.*; - +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -70,9 +70,6 @@ public class GATKBAMIndex { private final File mFile; - //TODO: figure out a good value for this buffer size - private final int BUFFERED_STREAM_BUFFER_SIZE=8192; - /** * Number of sequences stored in this index. */ @@ -83,8 +80,8 @@ public class GATKBAMIndex { */ private final long[] sequenceStartCache; - private SeekableFileStream fileStream; - private SeekableBufferedStream bufferedStream; + private FileInputStream fileStream; + private FileChannel fileChannel; public GATKBAMIndex(final File file) { mFile = file; @@ -282,6 +279,7 @@ public class GATKBAMIndex { for (int i = sequenceIndex; i < referenceSequence; i++) { sequenceStartCache[i] = position(); + // System.out.println("# Sequence TID: " + i); final int nBins = readInteger(); // System.out.println("# nBins: " + nBins); @@ -294,18 +292,15 @@ public class GATKBAMIndex { final int nLinearBins = readInteger(); // System.out.println("# nLinearBins: " + nLinearBins); skipBytes(8 * nLinearBins); - } sequenceStartCache[referenceSequence] = position(); } - - private void openIndexFile() { try { - fileStream = new SeekableFileStream(mFile); - bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE); + fileStream = new FileInputStream(mFile); + fileChannel = fileStream.getChannel(); } catch (IOException exc) { throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc); @@ -314,7 +309,7 @@ public class GATKBAMIndex { private void closeIndexFile() { try { - bufferedStream.close(); + fileChannel.close(); fileStream.close(); } catch (IOException exc) { @@ -359,12 +354,7 @@ public class GATKBAMIndex { private void read(final ByteBuffer buffer) { try { int bytesExpected = buffer.limit(); - //BufferedInputStream cannot read directly into a byte buffer, so we read into an array - //and put the result into the bytebuffer after the if statement. - - //SeekableBufferedStream is evil, it will "read" beyond the end of the file if you let it! - final int bytesToRead = (int) Math.min(bufferedStream.length() - bufferedStream.position(), bytesExpected); //min of int and long will definitely be castable to an int. - int bytesRead = bufferedStream.read(byteArray,0,bytesToRead); + int bytesRead = fileChannel.read(buffer); // We have a rigid expectation here to read in exactly the number of bytes we've limited // our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index @@ -375,7 +365,6 @@ public class GATKBAMIndex { "Please try re-indexing the corresponding BAM file.", mFile)); } - buffer.put(byteArray,0,bytesRead); } catch(IOException ex) { throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile); @@ -389,13 +378,10 @@ public class GATKBAMIndex { */ private ByteBuffer buffer = null; - //BufferedStream don't read into ByteBuffers, so we need this temporary array - private byte[] byteArray=null; private ByteBuffer getBuffer(final int size) { if(buffer == null || buffer.capacity() < size) { // Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging. buffer = ByteBuffer.allocate(size); - byteArray = new byte[size]; buffer.order(ByteOrder.LITTLE_ENDIAN); } buffer.clear(); @@ -405,13 +391,7 @@ public class GATKBAMIndex { private void skipBytes(final int count) { try { - - //try to skip forward the requested amount. - long skipped = bufferedStream.skip(count); - - if( skipped != count ) { //if not managed to skip the requested amount - throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); - } + fileChannel.position(fileChannel.position() + count); } catch(IOException ex) { throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); @@ -420,8 +400,7 @@ public class GATKBAMIndex { private void seek(final long position) { try { - //to seek a new position, move the fileChannel, and reposition the bufferedStream - bufferedStream.seek(position); + fileChannel.position(position); } catch(IOException ex) { throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile); @@ -434,7 +413,7 @@ public class GATKBAMIndex { */ private long position() { try { - return bufferedStream.position(); + return fileChannel.position(); } catch (IOException exc) { throw new ReviewedStingException("Unable to read position from index file " + mFile, exc); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java index 76654fb74..a6191802b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java @@ -31,9 +31,6 @@ import org.testng.annotations.Test; import java.util.Arrays; public class PileupWalkerIntegrationTest extends WalkerTest { - String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " - + "-R " + hg19Reference + " -o %s "; - @Test public void testGnarleyFHSPileup() { String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam " @@ -67,31 +64,4 @@ public class PileupWalkerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5)); executeTest("Testing single read spanning off chromosome 1 unindexed", spec); } - - /************************/ - - //testing speedup to GATKBAMIndex - - - @Test - public void testPileupOnLargeBamChr20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775")); - executeTest("Testing single on big BAM at start of chromosome 20", spec); - } - @Test - public void testPileupOnLargeBamMid20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3")); - executeTest("Testing single on big BAM somewhere in chromosome 20", spec); - } - @Test - public void testPileupOnLargeBamEnd20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63")); - executeTest("Testing single at end of chromosome 20", spec); - } - @Test - public void testPileupOnLargeBam20Many(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ", - 1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1")); - executeTest("Testing single on big BAM many places", spec); - } }