diff --git a/pom.xml b/pom.xml
index da256634c..6490280d1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -161,6 +161,7 @@
${gatk.executable.directory}/lib
runtime
+ false
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndex.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndex.java
index eba6c017f..b1d54d2b1 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndex.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndex.java
@@ -25,12 +25,10 @@
package org.broadinstitute.gatk.engine.datasources.reads;
-import htsjdk.samtools.Bin;
-import htsjdk.samtools.GATKBin;
-import htsjdk.samtools.GATKChunk;
-import htsjdk.samtools.LinearIndex;
+import htsjdk.samtools.*;
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@@ -70,10 +68,11 @@ public class GATKBAMIndex {
*/
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
+ private final SAMSequenceDictionary sequenceDictionary;
private final File mFile;
//TODO: figure out a good value for this buffer size
- private final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
+ private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
/**
* Number of sequences stored in this index.
@@ -86,11 +85,14 @@ public class GATKBAMIndex {
private final long[] sequenceStartCache;
private SeekableFileStream fileStream;
+ private SeekableStream baiStream;
private SeekableBufferedStream bufferedStream;
private long fileLength;
- public GATKBAMIndex(final File file) {
+ public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
mFile = file;
+ this.sequenceDictionary = sequenceDictionary;
+
// Open the file stream.
openIndexFile();
@@ -127,12 +129,12 @@ public class GATKBAMIndex {
skipToSequence(referenceSequence);
int binCount = readInteger();
- List bins = new ArrayList();
+ List bins = new ArrayList<>();
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
- List chunks = new ArrayList(nChunks);
+ List chunks = new ArrayList<>(nChunks);
long[] rawChunkData = readLongs(nChunks*2);
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = rawChunkData[ci*2];
@@ -289,7 +291,8 @@ public class GATKBAMIndex {
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j = 0; j < nBins; j++) {
- final int bin = readInteger();
+ /* final int bin = */
+ readInteger();
final int nChunks = readInteger();
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
skipBytes(16 * nChunks);
@@ -308,7 +311,8 @@ public class GATKBAMIndex {
private void openIndexFile() {
try {
fileStream = new SeekableFileStream(mFile);
- bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
+ baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
+ bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
fileLength=bufferedStream.length();
}
catch (IOException exc) {
@@ -319,6 +323,7 @@ public class GATKBAMIndex {
private void closeIndexFile() {
try {
bufferedStream.close();
+ baiStream.close();
fileStream.close();
fileLength = -1;
}
diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java
index b735ff833..c97201b09 100644
--- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java
+++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java
@@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.*;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.RuntimeIOException;
@@ -372,10 +373,19 @@ public class SAMDataSource {
originalToMergedReadGroupMappings.put(id,mappingToMerged);
}
+ final SAMSequenceDictionary samSequenceDictionary;
+ if (referenceFile == null) {
+ samSequenceDictionary = mergedHeader.getSequenceDictionary();
+ } else {
+ samSequenceDictionary = ReferenceSequenceFileFactory.
+ getReferenceSequenceFile(referenceFile).
+ getSequenceDictionary();
+ }
+
for(SAMReaderID id: readerIDs) {
File indexFile = findIndexFile(id.getSamFile());
if(indexFile != null)
- bamIndices.put(id,new GATKBAMIndex(indexFile));
+ bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
}
resourcePool.releaseReaders(readers);
diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java
index 2748b1a81..f1e832d2c 100644
--- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java
+++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java
@@ -50,11 +50,14 @@ public class CramIntegrationTest extends WalkerTest {
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
+ {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
+ {"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
+ {"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
};
}
@Test(dataProvider = "cramData")
- public void testCRAM(String walker, String input, String args, String ext, String md5) {
+ public void testCram(String walker, String input, String args, String ext, String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
" -T Test" + walker + "Walker" +
" -I " + publicTestDir + input +
@@ -64,25 +67,24 @@ public class CramIntegrationTest extends WalkerTest {
1, // just one output file
Collections.singletonList(ext),
Collections.singletonList(md5));
- executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
+ executeTest(String.format("testCram %s %s -> %s: %s", walker, input, ext, args), spec);
}
- @DataProvider(name = "cramNoBaiData")
- public Object[][] getCRAMNoBaiData() {
+ @DataProvider(name = "cramNoIndexData")
+ public Object[][] getCramNoIndexData() {
return new Object[][]{
{"exampleCRAM-nobai-nocrai.cram"},
- {"exampleCRAM-nobai-withcrai.cram"},
};
}
- @Test(dataProvider = "cramNoBaiData")
- public void testCRAMNoBai(String input) {
+ @Test(dataProvider = "cramNoIndexData")
+ public void testCramNoIndex(String input) {
WalkerTestSpec spec = new WalkerTestSpec(
" -T TestPrintReadsWalker" +
" -I " + publicTestDir + input +
" -R " + exampleFASTA,
0,
UserException.class);
- executeTest(String.format("testCRAMNoBai %s", input), spec);
+ executeTest(String.format("testCramNoIndex %s", input), spec);
}
}
diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexUnitTest.java
index 8e7f86de1..13f356959 100644
--- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexUnitTest.java
+++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexUnitTest.java
@@ -52,14 +52,19 @@ public class GATKBAMIndexUnitTest extends BaseTest {
*/
private GATKBAMIndex bamIndex;
-
+ /**
+ * Sequences.
+ */
+ private SAMSequenceDictionary sequenceDictionary;
+
+
@BeforeClass
public void init() throws FileNotFoundException {
SAMFileReader reader = new SAMFileReader(bamFile);
- SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
+ this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
reader.close();
- bamIndex = new GATKBAMIndex(bamIndexFile);
+ bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
}
@Test
@@ -95,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexWordBoundary() {
- GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
+ GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0);
}
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexNonWordBoundary() {
- GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
+ GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
index.readReferenceSequence(0);
}
diff --git a/public/gatk-root/pom.xml b/public/gatk-root/pom.xml
index a67020fdd..11fcd7c98 100644
--- a/public/gatk-root/pom.xml
+++ b/public/gatk-root/pom.xml
@@ -44,8 +44,8 @@
org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter
- 1.138
- 1.138
+ 1.139
+ 1.139
@@ -718,6 +718,17 @@
GATK Public Local Repository
file:${gatk.basedir}/public/repo
+
+
+ false
+
+
+ true
+
+ broad.artifactory.snapshots
+ Broad Institute Artifactory SNAPSHOTs
+ https://artifactory.broadinstitute.org/artifactory/libs-snapshot
+
diff --git a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram
index b688b9c28..334090031 100644
Binary files a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram and b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-nocrai.cram differ
diff --git a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram
index b688b9c28..334090031 100644
Binary files a/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram and b/public/gatk-utils/src/test/resources/exampleCRAM-nobai-withcrai.cram differ
diff --git a/public/gatk-utils/src/test/resources/exampleCRAM.cram b/public/gatk-utils/src/test/resources/exampleCRAM.cram
index b688b9c28..334090031 100644
Binary files a/public/gatk-utils/src/test/resources/exampleCRAM.cram and b/public/gatk-utils/src/test/resources/exampleCRAM.cram differ