Merge pull request #1148 from broadinstitute/ks_cram_crai
Using `SamIndexes.asBaiSeekableStreamOrNull()` to support `.cram.crai`.
This commit is contained in:
commit
7e894f3b4c
1
pom.xml
1
pom.xml
|
|
@ -161,6 +161,7 @@
|
|||
<configuration>
|
||||
<outputDirectory>${gatk.executable.directory}/lib</outputDirectory>
|
||||
<includeScope>runtime</includeScope>
|
||||
<useBaseVersion>false</useBaseVersion>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
|
|
|
|||
|
|
@ -25,12 +25,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.Bin;
|
||||
import htsjdk.samtools.GATKBin;
|
||||
import htsjdk.samtools.GATKChunk;
|
||||
import htsjdk.samtools.LinearIndex;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableStream;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -70,10 +68,11 @@ public class GATKBAMIndex {
|
|||
*/
|
||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
private final File mFile;
|
||||
|
||||
//TODO: figure out a good value for this buffer size
|
||||
private final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||
|
||||
/**
|
||||
* Number of sequences stored in this index.
|
||||
|
|
@ -86,11 +85,14 @@ public class GATKBAMIndex {
|
|||
private final long[] sequenceStartCache;
|
||||
|
||||
private SeekableFileStream fileStream;
|
||||
private SeekableStream baiStream;
|
||||
private SeekableBufferedStream bufferedStream;
|
||||
private long fileLength;
|
||||
|
||||
public GATKBAMIndex(final File file) {
|
||||
public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
||||
mFile = file;
|
||||
this.sequenceDictionary = sequenceDictionary;
|
||||
|
||||
// Open the file stream.
|
||||
openIndexFile();
|
||||
|
||||
|
|
@ -127,12 +129,12 @@ public class GATKBAMIndex {
|
|||
skipToSequence(referenceSequence);
|
||||
|
||||
int binCount = readInteger();
|
||||
List<GATKBin> bins = new ArrayList<GATKBin>();
|
||||
List<GATKBin> bins = new ArrayList<>();
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
|
||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
||||
List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
||||
long[] rawChunkData = readLongs(nChunks*2);
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = rawChunkData[ci*2];
|
||||
|
|
@ -289,7 +291,8 @@ public class GATKBAMIndex {
|
|||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j = 0; j < nBins; j++) {
|
||||
final int bin = readInteger();
|
||||
/* final int bin = */
|
||||
readInteger();
|
||||
final int nChunks = readInteger();
|
||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||
skipBytes(16 * nChunks);
|
||||
|
|
@ -308,7 +311,8 @@ public class GATKBAMIndex {
|
|||
private void openIndexFile() {
|
||||
try {
|
||||
fileStream = new SeekableFileStream(mFile);
|
||||
bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
|
||||
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
||||
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
||||
fileLength=bufferedStream.length();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
|
|
@ -319,6 +323,7 @@ public class GATKBAMIndex {
|
|||
private void closeIndexFile() {
|
||||
try {
|
||||
bufferedStream.close();
|
||||
baiStream.close();
|
||||
fileStream.close();
|
||||
fileLength = -1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.gatk.engine.datasources.reads;
|
|||
import htsjdk.samtools.MergingSamRecordIterator;
|
||||
import htsjdk.samtools.SamFileHeaderMerger;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import htsjdk.samtools.util.CloserUtil;
|
||||
import htsjdk.samtools.util.RuntimeIOException;
|
||||
|
|
@ -372,10 +373,19 @@ public class SAMDataSource {
|
|||
originalToMergedReadGroupMappings.put(id,mappingToMerged);
|
||||
}
|
||||
|
||||
final SAMSequenceDictionary samSequenceDictionary;
|
||||
if (referenceFile == null) {
|
||||
samSequenceDictionary = mergedHeader.getSequenceDictionary();
|
||||
} else {
|
||||
samSequenceDictionary = ReferenceSequenceFileFactory.
|
||||
getReferenceSequenceFile(referenceFile).
|
||||
getSequenceDictionary();
|
||||
}
|
||||
|
||||
for(SAMReaderID id: readerIDs) {
|
||||
File indexFile = findIndexFile(id.getSamFile());
|
||||
if(indexFile != null)
|
||||
bamIndices.put(id,new GATKBAMIndex(indexFile));
|
||||
bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
|
||||
}
|
||||
|
||||
resourcePool.releaseReaders(readers);
|
||||
|
|
|
|||
|
|
@ -50,11 +50,14 @@ public class CramIntegrationTest extends WalkerTest {
|
|||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
|
||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||
{"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "2e1b175c9b36154e2bbd1a23ebaf4c22"},
|
||||
{"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||
{"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "cramData")
|
||||
public void testCRAM(String walker, String input, String args, String ext, String md5) {
|
||||
public void testCram(String walker, String input, String args, String ext, String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T Test" + walker + "Walker" +
|
||||
" -I " + publicTestDir + input +
|
||||
|
|
@ -64,25 +67,24 @@ public class CramIntegrationTest extends WalkerTest {
|
|||
1, // just one output file
|
||||
Collections.singletonList(ext),
|
||||
Collections.singletonList(md5));
|
||||
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||
executeTest(String.format("testCram %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||
}
|
||||
|
||||
@DataProvider(name = "cramNoBaiData")
|
||||
public Object[][] getCRAMNoBaiData() {
|
||||
@DataProvider(name = "cramNoIndexData")
|
||||
public Object[][] getCramNoIndexData() {
|
||||
return new Object[][]{
|
||||
{"exampleCRAM-nobai-nocrai.cram"},
|
||||
{"exampleCRAM-nobai-withcrai.cram"},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "cramNoBaiData")
|
||||
public void testCRAMNoBai(String input) {
|
||||
@Test(dataProvider = "cramNoIndexData")
|
||||
public void testCramNoIndex(String input) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T TestPrintReadsWalker" +
|
||||
" -I " + publicTestDir + input +
|
||||
" -R " + exampleFASTA,
|
||||
0,
|
||||
UserException.class);
|
||||
executeTest(String.format("testCRAMNoBai %s", input), spec);
|
||||
executeTest(String.format("testCramNoIndex %s", input), spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,14 +52,19 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
*/
|
||||
private GATKBAMIndex bamIndex;
|
||||
|
||||
/**
|
||||
* Sequences.
|
||||
*/
|
||||
private SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
SAMSequenceDictionary sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||
this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||
reader.close();
|
||||
|
||||
bamIndex = new GATKBAMIndex(bamIndexFile);
|
||||
bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -95,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@
|
|||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||
|
||||
<!-- Version numbers for picard and htsjdk -->
|
||||
<htsjdk.version>1.138</htsjdk.version>
|
||||
<picard.version>1.138</picard.version>
|
||||
<htsjdk.version>1.139</htsjdk.version>
|
||||
<picard.version>1.139</picard.version>
|
||||
</properties>
|
||||
|
||||
<!-- Dependency configuration (versions, etc.) -->
|
||||
|
|
@ -718,6 +718,17 @@
|
|||
<name>GATK Public Local Repository</name>
|
||||
<url>file:${gatk.basedir}/public/repo</url>
|
||||
</repository>
|
||||
<repository>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>true</enabled>
|
||||
</snapshots>
|
||||
<id>broad.artifactory.snapshots</id>
|
||||
<name>Broad Institute Artifactory SNAPSHOTs</name>
|
||||
<url>https://artifactory.broadinstitute.org/artifactory/libs-snapshot</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
</project>
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue