Using `SamIndexes.asBaiSeekableStreamOrNull()` to support `.cram.crai`.

Updated other IntelliJ IDEA warnings in GATKBAMIndex.
Updated example .cram files to match versions generated by current GATK/HTSJDK.
Bumped HTSJDK and Picard to 1.139 releases.
Added support for using `-SNAPSHOT` of HTSJDK in the future.
This commit is contained in:
Khalid Shakir 2015-08-12 01:23:54 -03:00
parent b5cda3e7dc
commit 24e24b9468
9 changed files with 60 additions and 26 deletions

View File

@ -161,6 +161,7 @@
<configuration> <configuration>
<outputDirectory>${gatk.executable.directory}/lib</outputDirectory> <outputDirectory>${gatk.executable.directory}/lib</outputDirectory>
<includeScope>runtime</includeScope> <includeScope>runtime</includeScope>
<useBaseVersion>false</useBaseVersion>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>

View File

@ -25,12 +25,10 @@
package org.broadinstitute.gatk.engine.datasources.reads; package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.Bin; import htsjdk.samtools.*;
import htsjdk.samtools.GATKBin;
import htsjdk.samtools.GATKChunk;
import htsjdk.samtools.LinearIndex;
import htsjdk.samtools.seekablestream.SeekableBufferedStream; import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -70,10 +68,11 @@ public class GATKBAMIndex {
*/ */
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1 public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
private final SAMSequenceDictionary sequenceDictionary;
private final File mFile; private final File mFile;
//TODO: figure out a good value for this buffer size //TODO: figure out a good value for this buffer size
private final int BUFFERED_STREAM_BUFFER_SIZE = 8192; private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
/** /**
* Number of sequences stored in this index. * Number of sequences stored in this index.
@ -86,11 +85,14 @@ public class GATKBAMIndex {
private final long[] sequenceStartCache; private final long[] sequenceStartCache;
private SeekableFileStream fileStream; private SeekableFileStream fileStream;
private SeekableStream baiStream;
private SeekableBufferedStream bufferedStream; private SeekableBufferedStream bufferedStream;
private long fileLength; private long fileLength;
public GATKBAMIndex(final File file) { public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
mFile = file; mFile = file;
this.sequenceDictionary = sequenceDictionary;
// Open the file stream. // Open the file stream.
openIndexFile(); openIndexFile();
@ -127,12 +129,12 @@ public class GATKBAMIndex {
skipToSequence(referenceSequence); skipToSequence(referenceSequence);
int binCount = readInteger(); int binCount = readInteger();
List<GATKBin> bins = new ArrayList<GATKBin>(); List<GATKBin> bins = new ArrayList<>();
for (int binNumber = 0; binNumber < binCount; binNumber++) { for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger(); final int indexBin = readInteger();
final int nChunks = readInteger(); final int nChunks = readInteger();
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks); List<GATKChunk> chunks = new ArrayList<>(nChunks);
long[] rawChunkData = readLongs(nChunks*2); long[] rawChunkData = readLongs(nChunks*2);
for (int ci = 0; ci < nChunks; ci++) { for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = rawChunkData[ci*2]; final long chunkBegin = rawChunkData[ci*2];
@ -289,7 +291,8 @@ public class GATKBAMIndex {
final int nBins = readInteger(); final int nBins = readInteger();
// System.out.println("# nBins: " + nBins); // System.out.println("# nBins: " + nBins);
for (int j = 0; j < nBins; j++) { for (int j = 0; j < nBins; j++) {
final int bin = readInteger(); /* final int bin = */
readInteger();
final int nChunks = readInteger(); final int nChunks = readInteger();
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks); // System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
skipBytes(16 * nChunks); skipBytes(16 * nChunks);
@ -308,7 +311,8 @@ public class GATKBAMIndex {
private void openIndexFile() { private void openIndexFile() {
try { try {
fileStream = new SeekableFileStream(mFile); fileStream = new SeekableFileStream(mFile);
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE); baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
fileLength=bufferedStream.length(); fileLength=bufferedStream.length();
} }
catch (IOException exc) { catch (IOException exc) {
@ -319,6 +323,7 @@ public class GATKBAMIndex {
private void closeIndexFile() { private void closeIndexFile() {
try { try {
bufferedStream.close(); bufferedStream.close();
baiStream.close();
fileStream.close(); fileStream.close();
fileLength = -1; fileLength = -1;
} }

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.MergingSamRecordIterator; import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SamFileHeaderMerger; import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.*; import htsjdk.samtools.*;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.RuntimeIOException;
@ -372,10 +373,19 @@ public class SAMDataSource {
originalToMergedReadGroupMappings.put(id,mappingToMerged); originalToMergedReadGroupMappings.put(id,mappingToMerged);
} }
final SAMSequenceDictionary samSequenceDictionary;
if (referenceFile == null) {
samSequenceDictionary = mergedHeader.getSequenceDictionary();
} else {
samSequenceDictionary = ReferenceSequenceFileFactory.
getReferenceSequenceFile(referenceFile).
getSequenceDictionary();
}
for(SAMReaderID id: readerIDs) { for(SAMReaderID id: readerIDs) {
File indexFile = findIndexFile(id.getSamFile()); File indexFile = findIndexFile(id.getSamFile());
if(indexFile != null) if(indexFile != null)
bamIndices.put(id,new GATKBAMIndex(indexFile)); bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
} }
resourcePool.releaseReaders(readers); resourcePool.releaseReaders(readers);

View File

@ -50,11 +50,14 @@ public class CramIntegrationTest extends WalkerTest {
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"}, {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
{"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
{"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
}; };
} }
@Test(dataProvider = "cramData") @Test(dataProvider = "cramData")
public void testCRAM(String walker, String input, String args, String ext, String md5) { public void testCram(String walker, String input, String args, String ext, String md5) {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
" -T Test" + walker + "Walker" + " -T Test" + walker + "Walker" +
" -I " + publicTestDir + input + " -I " + publicTestDir + input +
@ -64,25 +67,24 @@ public class CramIntegrationTest extends WalkerTest {
1, // just one output file 1, // just one output file
Collections.singletonList(ext), Collections.singletonList(ext),
Collections.singletonList(md5)); Collections.singletonList(md5));
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec); executeTest(String.format("testCram %s %s -> %s: %s", walker, input, ext, args), spec);
} }
@DataProvider(name = "cramNoBaiData") @DataProvider(name = "cramNoIndexData")
public Object[][] getCRAMNoBaiData() { public Object[][] getCramNoIndexData() {
return new Object[][]{ return new Object[][]{
{"exampleCRAM-nobai-nocrai.cram"}, {"exampleCRAM-nobai-nocrai.cram"},
{"exampleCRAM-nobai-withcrai.cram"},
}; };
} }
@Test(dataProvider = "cramNoBaiData") @Test(dataProvider = "cramNoIndexData")
public void testCRAMNoBai(String input) { public void testCramNoIndex(String input) {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
" -T TestPrintReadsWalker" + " -T TestPrintReadsWalker" +
" -I " + publicTestDir + input + " -I " + publicTestDir + input +
" -R " + exampleFASTA, " -R " + exampleFASTA,
0, 0,
UserException.class); UserException.class);
executeTest(String.format("testCRAMNoBai %s", input), spec); executeTest(String.format("testCramNoIndex %s", input), spec);
} }
} }

View File

@ -52,14 +52,19 @@ public class GATKBAMIndexUnitTest extends BaseTest {
*/ */
private GATKBAMIndex bamIndex; private GATKBAMIndex bamIndex;
/**
* Sequences.
*/
private SAMSequenceDictionary sequenceDictionary;
@BeforeClass @BeforeClass
public void init() throws FileNotFoundException { public void init() throws FileNotFoundException {
SAMFileReader reader = new SAMFileReader(bamFile); SAMFileReader reader = new SAMFileReader(bamFile);
SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary(); this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
reader.close(); reader.close();
bamIndex = new GATKBAMIndex(bamIndexFile); bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
} }
@Test @Test
@ -95,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
@Test( expectedExceptions = UserException.MalformedFile.class ) @Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexWordBoundary() { public void testDetectTruncatedBamIndexWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai")); GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0); index.readReferenceSequence(0);
} }
@Test( expectedExceptions = UserException.MalformedFile.class ) @Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexNonWordBoundary() { public void testDetectTruncatedBamIndexNonWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai")); GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0); index.readReferenceSequence(0);
} }

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners> <test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk --> <!-- Version numbers for picard and htsjdk -->
<htsjdk.version>1.138</htsjdk.version> <htsjdk.version>1.139</htsjdk.version>
<picard.version>1.138</picard.version> <picard.version>1.139</picard.version>
</properties> </properties>
<!-- Dependency configuration (versions, etc.) --> <!-- Dependency configuration (versions, etc.) -->
@ -718,6 +718,17 @@
<name>GATK Public Local Repository</name> <name>GATK Public Local Repository</name>
<url>file:${gatk.basedir}/public/repo</url> <url>file:${gatk.basedir}/public/repo</url>
</repository> </repository>
<repository>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
<id>broad.artifactory.snapshots</id>
<name>Broad Institute Artifactory SNAPSHOTs</name>
<url>https://artifactory.broadinstitute.org/artifactory/libs-snapshot</url>
</repository>
</repositories> </repositories>
</project> </project>