Using `SamIndexes.asBaiSeekableStreamOrNull()` to support `.cram.crai`.

Updated other IntelliJ IDEA warnings in GATKBAMIndex.
Updated example .cram files to match versions generated by current GATK/HTSJDK.
Bumped HTSJDK and Picard to 1.139 releases.
Added support for using `-SNAPSHOT` of HTSJDK in the future.
This commit is contained in:
Khalid Shakir 2015-08-12 01:23:54 -03:00
parent b5cda3e7dc
commit 24e24b9468
9 changed files with 60 additions and 26 deletions

View File

@ -161,6 +161,7 @@
<configuration>
<outputDirectory>${gatk.executable.directory}/lib</outputDirectory>
<includeScope>runtime</includeScope>
<useBaseVersion>false</useBaseVersion>
</configuration>
</execution>
</executions>

View File

@ -25,12 +25,10 @@
package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.Bin;
import htsjdk.samtools.GATKBin;
import htsjdk.samtools.GATKChunk;
import htsjdk.samtools.LinearIndex;
import htsjdk.samtools.*;
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -70,10 +68,11 @@ public class GATKBAMIndex {
*/
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
private final SAMSequenceDictionary sequenceDictionary;
private final File mFile;
//TODO: figure out a good value for this buffer size
private final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
/**
* Number of sequences stored in this index.
@ -86,11 +85,14 @@ public class GATKBAMIndex {
private final long[] sequenceStartCache;
private SeekableFileStream fileStream;
private SeekableStream baiStream;
private SeekableBufferedStream bufferedStream;
private long fileLength;
public GATKBAMIndex(final File file) {
public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
mFile = file;
this.sequenceDictionary = sequenceDictionary;
// Open the file stream.
openIndexFile();
@ -127,12 +129,12 @@ public class GATKBAMIndex {
skipToSequence(referenceSequence);
int binCount = readInteger();
List<GATKBin> bins = new ArrayList<GATKBin>();
List<GATKBin> bins = new ArrayList<>();
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
List<GATKChunk> chunks = new ArrayList<>(nChunks);
long[] rawChunkData = readLongs(nChunks*2);
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = rawChunkData[ci*2];
@ -289,7 +291,8 @@ public class GATKBAMIndex {
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j = 0; j < nBins; j++) {
final int bin = readInteger();
/* final int bin = */
readInteger();
final int nChunks = readInteger();
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
skipBytes(16 * nChunks);
@ -308,7 +311,8 @@ public class GATKBAMIndex {
private void openIndexFile() {
try {
fileStream = new SeekableFileStream(mFile);
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
fileLength=bufferedStream.length();
}
catch (IOException exc) {
@ -319,6 +323,7 @@ public class GATKBAMIndex {
private void closeIndexFile() {
try {
bufferedStream.close();
baiStream.close();
fileStream.close();
fileLength = -1;
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.*;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.RuntimeIOException;
@ -372,10 +373,19 @@ public class SAMDataSource {
originalToMergedReadGroupMappings.put(id,mappingToMerged);
}
final SAMSequenceDictionary samSequenceDictionary;
if (referenceFile == null) {
samSequenceDictionary = mergedHeader.getSequenceDictionary();
} else {
samSequenceDictionary = ReferenceSequenceFileFactory.
getReferenceSequenceFile(referenceFile).
getSequenceDictionary();
}
for(SAMReaderID id: readerIDs) {
File indexFile = findIndexFile(id.getSamFile());
if(indexFile != null)
bamIndices.put(id,new GATKBAMIndex(indexFile));
bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
}
resourcePool.releaseReaders(readers);

View File

@ -50,11 +50,14 @@ public class CramIntegrationTest extends WalkerTest {
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
{"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
{"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
};
}
@Test(dataProvider = "cramData")
public void testCRAM(String walker, String input, String args, String ext, String md5) {
public void testCram(String walker, String input, String args, String ext, String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
" -T Test" + walker + "Walker" +
" -I " + publicTestDir + input +
@ -64,25 +67,24 @@ public class CramIntegrationTest extends WalkerTest {
1, // just one output file
Collections.singletonList(ext),
Collections.singletonList(md5));
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
executeTest(String.format("testCram %s %s -> %s: %s", walker, input, ext, args), spec);
}
@DataProvider(name = "cramNoBaiData")
public Object[][] getCRAMNoBaiData() {
@DataProvider(name = "cramNoIndexData")
public Object[][] getCramNoIndexData() {
return new Object[][]{
{"exampleCRAM-nobai-nocrai.cram"},
{"exampleCRAM-nobai-withcrai.cram"},
};
}
@Test(dataProvider = "cramNoBaiData")
public void testCRAMNoBai(String input) {
@Test(dataProvider = "cramNoIndexData")
public void testCramNoIndex(String input) {
WalkerTestSpec spec = new WalkerTestSpec(
" -T TestPrintReadsWalker" +
" -I " + publicTestDir + input +
" -R " + exampleFASTA,
0,
UserException.class);
executeTest(String.format("testCRAMNoBai %s", input), spec);
executeTest(String.format("testCramNoIndex %s", input), spec);
}
}

View File

@ -52,14 +52,19 @@ public class GATKBAMIndexUnitTest extends BaseTest {
*/
private GATKBAMIndex bamIndex;
/**
* Sequences.
*/
private SAMSequenceDictionary sequenceDictionary;
@BeforeClass
public void init() throws FileNotFoundException {
SAMFileReader reader = new SAMFileReader(bamFile);
SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
reader.close();
bamIndex = new GATKBAMIndex(bamIndexFile);
bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
}
@Test
@ -95,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0);
}
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexNonWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0);
}

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk -->
<htsjdk.version>1.138</htsjdk.version>
<picard.version>1.138</picard.version>
<htsjdk.version>1.139</htsjdk.version>
<picard.version>1.139</picard.version>
</properties>
<!-- Dependency configuration (versions, etc.) -->
@ -718,6 +718,17 @@
<name>GATK Public Local Repository</name>
<url>file:${gatk.basedir}/public/repo</url>
</repository>
<repository>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
<id>broad.artifactory.snapshots</id>
<name>Broad Institute Artifactory SNAPSHOTs</name>
<url>https://artifactory.broadinstitute.org/artifactory/libs-snapshot</url>
</repository>
</repositories>
</project>