parent
5f1afb4136
commit
19dd2d628a
|
|
@ -24,13 +24,10 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
|
|
@ -80,6 +77,7 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private FileInputStream fileStream;
|
private FileInputStream fileStream;
|
||||||
private FileChannel fileChannel;
|
private FileChannel fileChannel;
|
||||||
|
private BufferedInputStream bufferedStream;
|
||||||
|
|
||||||
public GATKBAMIndex(final File file) {
|
public GATKBAMIndex(final File file) {
|
||||||
mFile = file;
|
mFile = file;
|
||||||
|
|
@ -277,12 +275,11 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||||
sequenceStartCache[i] = position();
|
sequenceStartCache[i] = position();
|
||||||
|
|
||||||
// System.out.println("# Sequence TID: " + i);
|
// System.out.println("# Sequence TID: " + i);
|
||||||
final int nBins = readInteger();
|
final int nBins = readInteger();
|
||||||
// System.out.println("# nBins: " + nBins);
|
// System.out.println("# nBins: " + nBins);
|
||||||
for (int j = 0; j < nBins; j++) {
|
for (int j = 0; j < nBins; j++) {
|
||||||
skipInteger();
|
final int bin = readInteger();
|
||||||
final int nChunks = readInteger();
|
final int nChunks = readInteger();
|
||||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||||
skipBytes(16 * nChunks);
|
skipBytes(16 * nChunks);
|
||||||
|
|
@ -290,15 +287,19 @@ public class GATKBAMIndex {
|
||||||
final int nLinearBins = readInteger();
|
final int nLinearBins = readInteger();
|
||||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||||
skipBytes(8 * nLinearBins);
|
skipBytes(8 * nLinearBins);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sequenceStartCache[referenceSequence] = position();
|
sequenceStartCache[referenceSequence] = position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void openIndexFile() {
|
private void openIndexFile() {
|
||||||
try {
|
try {
|
||||||
fileStream = new FileInputStream(mFile);
|
fileStream = new FileInputStream(mFile);
|
||||||
fileChannel = fileStream.getChannel();
|
fileChannel = fileStream.getChannel();
|
||||||
|
bufferedStream = new BufferedInputStream(fileStream);
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||||
|
|
@ -308,6 +309,7 @@ public class GATKBAMIndex {
|
||||||
private void closeIndexFile() {
|
private void closeIndexFile() {
|
||||||
try {
|
try {
|
||||||
fileChannel.close();
|
fileChannel.close();
|
||||||
|
bufferedStream.close();
|
||||||
fileStream.close();
|
fileStream.close();
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
|
|
@ -334,10 +336,6 @@ public class GATKBAMIndex {
|
||||||
return buffer.getInt();
|
return buffer.getInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipInteger() {
|
|
||||||
skipBytes(INT_SIZE_IN_BYTES);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||||
* @param count Number of longs to read.
|
* @param count Number of longs to read.
|
||||||
|
|
@ -356,7 +354,9 @@ public class GATKBAMIndex {
|
||||||
private void read(final ByteBuffer buffer) {
|
private void read(final ByteBuffer buffer) {
|
||||||
try {
|
try {
|
||||||
int bytesExpected = buffer.limit();
|
int bytesExpected = buffer.limit();
|
||||||
int bytesRead = fileChannel.read(buffer);
|
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||||
|
//and put the result into the bytebuffer after the if statement.
|
||||||
|
int bytesRead = bufferedStream.read(byteArray,0,bytesExpected);
|
||||||
|
|
||||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||||
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
|
||||||
|
|
@ -367,6 +367,7 @@ public class GATKBAMIndex {
|
||||||
"Please try re-indexing the corresponding BAM file.",
|
"Please try re-indexing the corresponding BAM file.",
|
||||||
mFile));
|
mFile));
|
||||||
}
|
}
|
||||||
|
buffer.put(byteArray,0,bytesRead);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||||
|
|
@ -380,10 +381,13 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
private ByteBuffer buffer = null;
|
private ByteBuffer buffer = null;
|
||||||
|
|
||||||
|
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||||
|
private byte[] byteArray=null;
|
||||||
private ByteBuffer getBuffer(final int size) {
|
private ByteBuffer getBuffer(final int size) {
|
||||||
if(buffer == null || buffer.capacity() < size) {
|
if(buffer == null || buffer.capacity() < size) {
|
||||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||||
buffer = ByteBuffer.allocate(size);
|
buffer = ByteBuffer.allocate(size);
|
||||||
|
byteArray = new byte[size];
|
||||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
}
|
}
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
|
|
@ -393,7 +397,13 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void skipBytes(final int count) {
|
private void skipBytes(final int count) {
|
||||||
try {
|
try {
|
||||||
fileChannel.position(fileChannel.position() + count);
|
|
||||||
|
//try to skip forward the requested amount.
|
||||||
|
long remainingCount = count - bufferedStream.skip(count);
|
||||||
|
|
||||||
|
if( remainingCount > 0 ) { //if not enough data in buffer, reset buffer
|
||||||
|
seek(position() + remainingCount);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
|
|
@ -402,7 +412,9 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
private void seek(final long position) {
|
private void seek(final long position) {
|
||||||
try {
|
try {
|
||||||
|
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||||
fileChannel.position(position);
|
fileChannel.position(position);
|
||||||
|
bufferedStream = new BufferedInputStream(fileStream);
|
||||||
}
|
}
|
||||||
catch(IOException ex) {
|
catch(IOException ex) {
|
||||||
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
||||||
|
|
@ -415,7 +427,10 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
private long position() {
|
private long position() {
|
||||||
try {
|
try {
|
||||||
return fileChannel.position();
|
// It's a little complicated to figure out the position from a bufferedStream because it could be
|
||||||
|
// connected to a stream for which it makes no sense. Since we are on a file, this is OK.
|
||||||
|
final int bufferRemaining = bufferedStream.available() - fileStream.available();
|
||||||
|
return fileChannel.position() - bufferRemaining;
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
|
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,9 @@ import org.testng.annotations.Test;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class PileupWalkerIntegrationTest extends WalkerTest {
|
public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||||
|
String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam "
|
||||||
|
+ "-R " + hg19Reference + " -o %s ";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGnarleyFHSPileup() {
|
public void testGnarleyFHSPileup() {
|
||||||
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
||||||
|
|
@ -39,4 +42,31 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
|
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
|
||||||
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
|
executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/************************/
|
||||||
|
|
||||||
|
//testing speedup to GATKBAMIndex
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamChr20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775"));
|
||||||
|
executeTest("Testing single on big BAM at start of chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamMid20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3"));
|
||||||
|
executeTest("Testing single on big BAM somewhere in chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBamEnd20(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63"));
|
||||||
|
executeTest("Testing single at end of chromosome 20", spec);
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testPileupOnLargeBam20Many(){
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ",
|
||||||
|
1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1"));
|
||||||
|
executeTest("Testing single on big BAM many places", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue