Merge pull request #1148 from broadinstitute/ks_cram_crai
Using `SamIndexes.asBaiSeekableStreamOrNull()` to support `.cram.crai`.
This commit is contained in:
commit
7e894f3b4c
1
pom.xml
1
pom.xml
|
|
@ -161,6 +161,7 @@
|
||||||
<configuration>
|
<configuration>
|
||||||
<outputDirectory>${gatk.executable.directory}/lib</outputDirectory>
|
<outputDirectory>${gatk.executable.directory}/lib</outputDirectory>
|
||||||
<includeScope>runtime</includeScope>
|
<includeScope>runtime</includeScope>
|
||||||
|
<useBaseVersion>false</useBaseVersion>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
import htsjdk.samtools.Bin;
|
import htsjdk.samtools.*;
|
||||||
import htsjdk.samtools.GATKBin;
|
|
||||||
import htsjdk.samtools.GATKChunk;
|
|
||||||
import htsjdk.samtools.LinearIndex;
|
|
||||||
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
||||||
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
||||||
|
import htsjdk.samtools.seekablestream.SeekableStream;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
|
@ -70,10 +68,11 @@ public class GATKBAMIndex {
|
||||||
*/
|
*/
|
||||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||||
|
|
||||||
|
private final SAMSequenceDictionary sequenceDictionary;
|
||||||
private final File mFile;
|
private final File mFile;
|
||||||
|
|
||||||
//TODO: figure out a good value for this buffer size
|
//TODO: figure out a good value for this buffer size
|
||||||
private final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of sequences stored in this index.
|
* Number of sequences stored in this index.
|
||||||
|
|
@ -86,11 +85,14 @@ public class GATKBAMIndex {
|
||||||
private final long[] sequenceStartCache;
|
private final long[] sequenceStartCache;
|
||||||
|
|
||||||
private SeekableFileStream fileStream;
|
private SeekableFileStream fileStream;
|
||||||
|
private SeekableStream baiStream;
|
||||||
private SeekableBufferedStream bufferedStream;
|
private SeekableBufferedStream bufferedStream;
|
||||||
private long fileLength;
|
private long fileLength;
|
||||||
|
|
||||||
public GATKBAMIndex(final File file) {
|
public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
||||||
mFile = file;
|
mFile = file;
|
||||||
|
this.sequenceDictionary = sequenceDictionary;
|
||||||
|
|
||||||
// Open the file stream.
|
// Open the file stream.
|
||||||
openIndexFile();
|
openIndexFile();
|
||||||
|
|
||||||
|
|
@ -127,12 +129,12 @@ public class GATKBAMIndex {
|
||||||
skipToSequence(referenceSequence);
|
skipToSequence(referenceSequence);
|
||||||
|
|
||||||
int binCount = readInteger();
|
int binCount = readInteger();
|
||||||
List<GATKBin> bins = new ArrayList<GATKBin>();
|
List<GATKBin> bins = new ArrayList<>();
|
||||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||||
final int indexBin = readInteger();
|
final int indexBin = readInteger();
|
||||||
final int nChunks = readInteger();
|
final int nChunks = readInteger();
|
||||||
|
|
||||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
||||||
long[] rawChunkData = readLongs(nChunks*2);
|
long[] rawChunkData = readLongs(nChunks*2);
|
||||||
for (int ci = 0; ci < nChunks; ci++) {
|
for (int ci = 0; ci < nChunks; ci++) {
|
||||||
final long chunkBegin = rawChunkData[ci*2];
|
final long chunkBegin = rawChunkData[ci*2];
|
||||||
|
|
@ -289,7 +291,8 @@ public class GATKBAMIndex {
|
||||||
final int nBins = readInteger();
|
final int nBins = readInteger();
|
||||||
// System.out.println("# nBins: " + nBins);
|
// System.out.println("# nBins: " + nBins);
|
||||||
for (int j = 0; j < nBins; j++) {
|
for (int j = 0; j < nBins; j++) {
|
||||||
final int bin = readInteger();
|
/* final int bin = */
|
||||||
|
readInteger();
|
||||||
final int nChunks = readInteger();
|
final int nChunks = readInteger();
|
||||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||||
skipBytes(16 * nChunks);
|
skipBytes(16 * nChunks);
|
||||||
|
|
@ -308,7 +311,8 @@ public class GATKBAMIndex {
|
||||||
private void openIndexFile() {
|
private void openIndexFile() {
|
||||||
try {
|
try {
|
||||||
fileStream = new SeekableFileStream(mFile);
|
fileStream = new SeekableFileStream(mFile);
|
||||||
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
|
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
||||||
|
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
||||||
fileLength=bufferedStream.length();
|
fileLength=bufferedStream.length();
|
||||||
}
|
}
|
||||||
catch (IOException exc) {
|
catch (IOException exc) {
|
||||||
|
|
@ -319,6 +323,7 @@ public class GATKBAMIndex {
|
||||||
private void closeIndexFile() {
|
private void closeIndexFile() {
|
||||||
try {
|
try {
|
||||||
bufferedStream.close();
|
bufferedStream.close();
|
||||||
|
baiStream.close();
|
||||||
fileStream.close();
|
fileStream.close();
|
||||||
fileLength = -1;
|
fileLength = -1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
import htsjdk.samtools.MergingSamRecordIterator;
|
import htsjdk.samtools.MergingSamRecordIterator;
|
||||||
import htsjdk.samtools.SamFileHeaderMerger;
|
import htsjdk.samtools.SamFileHeaderMerger;
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
|
||||||
import htsjdk.samtools.util.CloseableIterator;
|
import htsjdk.samtools.util.CloseableIterator;
|
||||||
import htsjdk.samtools.util.CloserUtil;
|
import htsjdk.samtools.util.CloserUtil;
|
||||||
import htsjdk.samtools.util.RuntimeIOException;
|
import htsjdk.samtools.util.RuntimeIOException;
|
||||||
|
|
@ -372,10 +373,19 @@ public class SAMDataSource {
|
||||||
originalToMergedReadGroupMappings.put(id,mappingToMerged);
|
originalToMergedReadGroupMappings.put(id,mappingToMerged);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final SAMSequenceDictionary samSequenceDictionary;
|
||||||
|
if (referenceFile == null) {
|
||||||
|
samSequenceDictionary = mergedHeader.getSequenceDictionary();
|
||||||
|
} else {
|
||||||
|
samSequenceDictionary = ReferenceSequenceFileFactory.
|
||||||
|
getReferenceSequenceFile(referenceFile).
|
||||||
|
getSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
for(SAMReaderID id: readerIDs) {
|
for(SAMReaderID id: readerIDs) {
|
||||||
File indexFile = findIndexFile(id.getSamFile());
|
File indexFile = findIndexFile(id.getSamFile());
|
||||||
if(indexFile != null)
|
if(indexFile != null)
|
||||||
bamIndices.put(id,new GATKBAMIndex(indexFile));
|
bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
resourcePool.releaseReaders(readers);
|
resourcePool.releaseReaders(readers);
|
||||||
|
|
|
||||||
|
|
@ -50,11 +50,14 @@ public class CramIntegrationTest extends WalkerTest {
|
||||||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
|
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
|
||||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||||
|
{"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
|
||||||
|
{"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||||
|
{"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "cramData")
|
@Test(dataProvider = "cramData")
|
||||||
public void testCRAM(String walker, String input, String args, String ext, String md5) {
|
public void testCram(String walker, String input, String args, String ext, String md5) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
" -T Test" + walker + "Walker" +
|
" -T Test" + walker + "Walker" +
|
||||||
" -I " + publicTestDir + input +
|
" -I " + publicTestDir + input +
|
||||||
|
|
@ -64,25 +67,24 @@ public class CramIntegrationTest extends WalkerTest {
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
Collections.singletonList(ext),
|
Collections.singletonList(ext),
|
||||||
Collections.singletonList(md5));
|
Collections.singletonList(md5));
|
||||||
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
|
executeTest(String.format("testCram %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "cramNoBaiData")
|
@DataProvider(name = "cramNoIndexData")
|
||||||
public Object[][] getCRAMNoBaiData() {
|
public Object[][] getCramNoIndexData() {
|
||||||
return new Object[][]{
|
return new Object[][]{
|
||||||
{"exampleCRAM-nobai-nocrai.cram"},
|
{"exampleCRAM-nobai-nocrai.cram"},
|
||||||
{"exampleCRAM-nobai-withcrai.cram"},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "cramNoBaiData")
|
@Test(dataProvider = "cramNoIndexData")
|
||||||
public void testCRAMNoBai(String input) {
|
public void testCramNoIndex(String input) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
" -T TestPrintReadsWalker" +
|
" -T TestPrintReadsWalker" +
|
||||||
" -I " + publicTestDir + input +
|
" -I " + publicTestDir + input +
|
||||||
" -R " + exampleFASTA,
|
" -R " + exampleFASTA,
|
||||||
0,
|
0,
|
||||||
UserException.class);
|
UserException.class);
|
||||||
executeTest(String.format("testCRAMNoBai %s", input), spec);
|
executeTest(String.format("testCramNoIndex %s", input), spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,14 +52,19 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
*/
|
*/
|
||||||
private GATKBAMIndex bamIndex;
|
private GATKBAMIndex bamIndex;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sequences.
|
||||||
|
*/
|
||||||
|
private SAMSequenceDictionary sequenceDictionary;
|
||||||
|
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void init() throws FileNotFoundException {
|
public void init() throws FileNotFoundException {
|
||||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
bamIndex = new GATKBAMIndex(bamIndexFile);
|
bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -95,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
|
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
||||||
index.readReferenceSequence(0);
|
index.readReferenceSequence(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
|
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
||||||
index.readReferenceSequence(0);
|
index.readReferenceSequence(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,8 +44,8 @@
|
||||||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||||
|
|
||||||
<!-- Version numbers for picard and htsjdk -->
|
<!-- Version numbers for picard and htsjdk -->
|
||||||
<htsjdk.version>1.138</htsjdk.version>
|
<htsjdk.version>1.139</htsjdk.version>
|
||||||
<picard.version>1.138</picard.version>
|
<picard.version>1.139</picard.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<!-- Dependency configuration (versions, etc.) -->
|
<!-- Dependency configuration (versions, etc.) -->
|
||||||
|
|
@ -718,6 +718,17 @@
|
||||||
<name>GATK Public Local Repository</name>
|
<name>GATK Public Local Repository</name>
|
||||||
<url>file:${gatk.basedir}/public/repo</url>
|
<url>file:${gatk.basedir}/public/repo</url>
|
||||||
</repository>
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</snapshots>
|
||||||
|
<id>broad.artifactory.snapshots</id>
|
||||||
|
<name>Broad Institute Artifactory SNAPSHOTs</name>
|
||||||
|
<url>https://artifactory.broadinstitute.org/artifactory/libs-snapshot</url>
|
||||||
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue