From cfd3ffa2c00c3edd177895a302fd2d2b38d7350c Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 20 Sep 2016 12:49:06 -0400 Subject: [PATCH] Replace SAMFileReader with calls to SamReaderFactory --- .../bwa/java/AlignerTestHarness.java | 10 +- .../datasources/reads/BlockInputStream.java | 2 +- .../reads/utilities/BAMFileStat.java | 18 +- .../reads/utilities/BAMTagRenamer.java | 15 +- .../reads/utilities/PrintBAMRegion.java | 12 +- .../reads/utilities/PrintBGZFBounds.java | 2 - .../gatk/engine/io/OutputTracker.java | 2 +- .../io/storage/SAMFileWriterStorage.java | 9 +- .../SAMReaderArgumentTypeDescriptor.java | 8 +- .../gatk/engine/CommandLineGATKUnitTest.java | 15 +- .../engine/EngineFeaturesIntegrationTest.java | 43 ++-- .../reads/DownsamplerBenchmark.java | 4 +- .../GATKBAMIndexFromDataSourceUnitTest.java | 4 +- .../reads/GATKBAMIndexFromFileUnitTest.java | 9 +- .../reads/PicardBaselineBenchmark.java | 18 +- .../reads/ReadProcessingBenchmark.java | 15 +- .../reads/TheoreticalMinimaBenchmark.java | 12 +- .../queue/extensions/gatk/ArgumentField.java | 4 +- .../gatk/queue/util/QScriptUtils.scala | 11 +- .../gatk/queue/util/VCF_BAM_utilities.scala | 4 +- .../utils/diffengine/BAMDiffableReader.java | 13 +- .../utils/locusiterator/LIBSPerformance.java | 7 +- .../locusiterator/LocusIteratorByState.java | 6 +- .../utils/sam/ArtificialSAMFileReader.java | 198 ++++++++++++++++-- .../gatk/utils/ExampleToCopyUnitTest.java | 12 +- .../sam/ArtificialBAMBuilderUnitTest.java | 12 +- 26 files changed, 339 insertions(+), 126 deletions(-) diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/java/AlignerTestHarness.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/java/AlignerTestHarness.java index db098e0e3..ff9f2dcc4 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/java/AlignerTestHarness.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/alignment/bwa/java/AlignerTestHarness.java @@ -35,6 +35,7 @@ import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import java.io.File; import java.io.FileNotFoundException; +import java.io.IOException; /** * A test harness to ensure that the perfect aligner works. @@ -63,8 +64,7 @@ public class AlignerTestHarness { Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); int count = 0; - SAMFileReader reader = new SAMFileReader(bamFile); - reader.setValidationStringency(ValidationStringency.SILENT); + final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile); int mismatches = 0; int failures = 0; @@ -160,6 +160,12 @@ public class AlignerTestHarness { System.out.printf("%d reads examined.%n",count); } + try { + reader.close(); + } catch ( IOException ex ) { + throw new ReviewedGATKException("Unable to close " + bamFile , ex); + } + System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/BlockInputStream.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/BlockInputStream.java index aa9462914..1ea1f4276 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/BlockInputStream.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/BlockInputStream.java @@ -40,7 +40,7 @@ import java.util.LinkedList; import java.util.List; /** - * Presents decompressed blocks to the SAMFileReader. + * Presents decompressed blocks to the SamReader. */ public class BlockInputStream extends InputStream { /** diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMFileStat.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMFileStat.java index 32bb8368a..da31c49b3 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMFileStat.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMFileStat.java @@ -25,15 +25,14 @@ package org.broadinstitute.gatk.engine.datasources.reads.utilities; -import htsjdk.samtools.BAMIndex; -import htsjdk.samtools.SAMFileReader; -import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.*; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.instrumentation.Sizeof; import java.io.File; +import java.io.IOException; import java.lang.reflect.Field; import java.util.List; import java.util.Map; @@ -56,7 +55,7 @@ public class BAMFileStat extends CommandLineProgram { @Argument(doc="The range to inspect.",required=false) private String range; - public int execute() { + public int execute() throws IOException { switch(command) { case ShowBlocks: throw new ReviewedGATKException("The BAM block inspector has been disabled."); @@ -81,14 +80,11 @@ public class BAMFileStat extends CommandLineProgram { } } - private void showIndexBins(File bamFile,String contigName) { - SAMFileReader reader; - BAMIndex index; + private void showIndexBins(File bamFile,String contigName) throws IOException { - reader = new SAMFileReader(bamFile); - reader.setValidationStringency(ValidationStringency.SILENT); - reader.enableIndexCaching(true); - index = reader.getIndex(); + final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES). + validationStringency(ValidationStringency.SILENT).open(bamFile); + final SamReader.Indexing index = reader.indexing(); reader.queryOverlapping(contigName,1,reader.getFileHeader().getSequence(contigName).getSequenceLength()).close(); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMTagRenamer.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMTagRenamer.java index 385292d8b..17408c017 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMTagRenamer.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/BAMTagRenamer.java @@ -25,14 +25,17 @@ package org.broadinstitute.gatk.engine.datasources.reads.utilities; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import java.io.File; +import java.io.IOException; /** * A simple utility written directly in Picard that will rename tags @@ -62,7 +65,7 @@ public class BAMTagRenamer extends CommandLineProgram { long readsWritten = 0; long readsAltered = 0; - SAMFileReader reader = new SAMFileReader(input); + final SamReader reader = SamReaderFactory.makeDefault().open(input); SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(),true,output,compressionLevel); for(SAMRecord read: reader) { @@ -79,7 +82,13 @@ public class BAMTagRenamer extends CommandLineProgram { } writer.close(); - System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName); + System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName); + + try { + reader.close(); + } catch ( IOException ex ) { + throw new ReviewedGATKException("Unable to close " + input , ex); + } return 0; } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBAMRegion.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBAMRegion.java index 26f0e4d29..d2cbc588b 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBAMRegion.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBAMRegion.java @@ -31,6 +31,7 @@ import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; import org.broadinstitute.gatk.utils.exceptions.UserException; import java.io.File; +import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -53,9 +54,8 @@ public class PrintBAMRegion extends CommandLineProgram { private static final int MIN_OFFSET_SIZE = 0; private static final int MAX_OFFSET_SIZE = (int)Math.pow(2,16)-1; - public int execute() { - SAMFileReader reader = new SAMFileReader(input); - reader.setValidationStringency(ValidationStringency.SILENT); + public int execute() throws IOException { + final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(input); Pattern regionPattern = Pattern.compile("(\\d+):(\\d+)-(\\d+):(\\d+)"); Matcher matcher = regionPattern.matcher(region); @@ -76,10 +76,10 @@ public class PrintBAMRegion extends CommandLineProgram { if(lastOffset < MIN_OFFSET_SIZE || lastOffset > MAX_OFFSET_SIZE) throw new UserException(String.format("Last offset is invalid; must be between %d and %d; actually is %d",MIN_OFFSET_SIZE,MAX_OFFSET_SIZE,lastOffset)); - GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset); - GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk); + final GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset); + final GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk); - SAMRecordIterator iterator = reader.iterator(fileSpan); + final SAMRecordIterator iterator = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(fileSpan); long readCount = 0; while(iterator.hasNext()) { System.out.printf("%s%n",iterator.next().format()); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBGZFBounds.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBGZFBounds.java index 5ec07390f..8c7b62fca 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBGZFBounds.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/PrintBGZFBounds.java @@ -65,8 +65,6 @@ public class PrintBGZFBounds extends CommandLineProgram { float uncompressedSize = 0; long totalBlocks = 0; - //SAMFileReader reader = new SAMFileReader(input); - while(true) { final long blockStart = fis.getChannel().position(); diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java index 87f601923..1db822192 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java @@ -98,7 +98,7 @@ public abstract class OutputTracker implements ReferenceBacked { ArgumentSource targetField = io.getKey(); Object targetValue = io.getValue(); - // Ghastly hack: reaches in and finishes building out the SAMFileReader. + // Ghastly hack: reaches in and finishes building out the SameReader. // TODO: Generalize this, and move it to its own initialization step. if( targetValue instanceof SAMReaderBuilder) { SAMReaderBuilder builder = (SAMReaderBuilder)targetValue; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java index b1aecb22c..acce6addb 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java @@ -31,6 +31,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface; import htsjdk.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; +import org.broadinstitute.gatk.utils.exceptions.GATKException; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter; @@ -112,7 +113,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage iterator = reader.iterator(); while( iterator.hasNext() ) @@ -120,7 +121,11 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage readGroupToNewSampleMap = new HashMap<>(); for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); - final SAMFileReader inputBamReader = new SAMFileReader(inputBam); + final SamReader reader = SamReaderFactory.makeDefault().open(inputBam); final String newSampleName = String.format("newSampleFor%s", inputBamID); - for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); } - inputBamReader.close(); + reader.close(); } final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + @@ -420,10 +419,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { 1, Arrays.asList("")); // No MD5s; we only want to check the read groups final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0); - final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + final SamReader reader = SamReaderFactory.makeDefault().open(outputBam); int totalReadGroupsSeen = 0; - for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); totalReadGroupsSeen++; @@ -431,7 +430,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); - outputBamReader.close(); + reader.close(); } // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam, @@ -446,15 +445,15 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { final Map readGroupToNewSampleMap = new HashMap<>(); for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); - final SAMFileReader inputBamReader = new SAMFileReader(inputBam); + final SamReader reader = SamReaderFactory.makeDefault().open(inputBam); // Special-case NA12891, which we're not renaming: final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID); - for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); } - inputBamReader.close(); + reader.close(); } final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + @@ -467,10 +466,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { 1, Arrays.asList("")); // No MD5s; we only want to check the read groups final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0); - final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + final SamReader reader = SamReaderFactory.makeDefault().open(outputBam); int totalReadGroupsSeen = 0; - for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); totalReadGroupsSeen++; @@ -478,7 +477,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); - outputBamReader.close(); + reader.close(); } // On-the-fly sample renaming test case: two single-sample bams with read group collisions @@ -489,11 +488,11 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam newSampleForNot12878")); final Set na12878ReadGroups = new HashSet<>(); - final SAMFileReader inputBamReader = new SAMFileReader(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam")); - for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + final SamReader inpuBAMreader = SamReaderFactory.makeDefault().open(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam")); + for ( final SAMReadGroupRecord readGroup : inpuBAMreader.getFileHeader().getReadGroups() ) { na12878ReadGroups.add(readGroup.getId()); } - inputBamReader.close(); + inpuBAMreader.close(); final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + " -R " + b37KGReference + @@ -504,10 +503,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { 1, Arrays.asList("")); // No MD5s; we only want to check the read groups final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0); - final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + final SamReader outputBAMreader = SamReaderFactory.makeDefault().open(outputBam); int totalReadGroupsSeen = 0; - for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + for ( final SAMReadGroupRecord readGroup : outputBAMreader.getFileHeader().getReadGroups() ) { String expectedSampleName = ""; if ( na12878ReadGroups.contains(readGroup.getId()) ) { expectedSampleName = "newSampleFor12878"; @@ -523,7 +522,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file"); - outputBamReader.close(); + outputBAMreader.close(); } // On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException) diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/DownsamplerBenchmark.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/DownsamplerBenchmark.java index b45db15b0..47d10d234 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/DownsamplerBenchmark.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/DownsamplerBenchmark.java @@ -55,12 +55,12 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { // public void timeDownsampling(int reps) { // for(int i = 0; i < reps; i++) { -// SAMFileReader reader = new SAMFileReader(inputFile); +// SamReader reader = SamReaderFactory.makeDefault().open(inputFile); // ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(inputFile,new Tags())), // reader.getFileHeader(), // SAMFileHeader.SortOrder.coordinate, // false, -// SAMFileReader.ValidationStringency.SILENT, +// ValidationStringency.SILENT, // downsampling.create(), // new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), // Collections.emptyList(), diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromDataSourceUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromDataSourceUnitTest.java index 3cd4ac6e4..5d051e37f 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromDataSourceUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromDataSourceUnitTest.java @@ -25,7 +25,6 @@ package org.broadinstitute.gatk.engine.datasources.reads; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import org.broadinstitute.gatk.utils.BaseTest; @@ -50,8 +49,7 @@ public class GATKBAMIndexFromDataSourceUnitTest extends BaseTest { @BeforeClass public void init() throws IOException { - final SAMFileReader reader = new SAMFileReader(bamFile); - reader.enableIndexCaching(true); // needed ot get BrowseableBAMIndex + final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile); Assert.assertTrue(reader.hasIndex()); Assert.assertTrue(reader.indexing().hasBrowseableIndex()); diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromFileUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromFileUnitTest.java index c13a0006d..bfa53ff74 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromFileUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/GATKBAMIndexFromFileUnitTest.java @@ -25,8 +25,9 @@ package org.broadinstitute.gatk.engine.datasources.reads; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.testng.Assert; @@ -34,7 +35,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.io.File; -import java.io.FileNotFoundException; +import java.io.IOException; /** * Test basic functionality in the GATK's implementation of the BAM index classes. @@ -59,8 +60,8 @@ public class GATKBAMIndexFromFileUnitTest extends BaseTest { @BeforeClass - public void init() throws FileNotFoundException { - final SAMFileReader reader = new SAMFileReader(bamFile); + public void init() throws IOException { + final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile); sequenceDictionary = reader.getFileHeader().getSequenceDictionary(); reader.close(); diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/PicardBaselineBenchmark.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/PicardBaselineBenchmark.java index a05a852d2..0a118408c 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/PicardBaselineBenchmark.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/PicardBaselineBenchmark.java @@ -26,13 +26,13 @@ package org.broadinstitute.gatk.engine.datasources.reads; import com.google.caliper.Param; -import com.google.caliper.SimpleBenchmark; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.SamLocusIterator; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; -import java.io.File; +import java.io.IOException; import java.util.Iterator; /** @@ -55,9 +55,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark { @Override public Integer getMaxReads() { return maxReads; } - public void timeDecompressBamFile(int reps) { + public void timeDecompressBamFile(int reps) throws IOException { for(int i = 0; i < reps; i++) { - SAMFileReader reader = new SAMFileReader(inputFile); + final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); CloseableIterator iterator = reader.iterator(); while(iterator.hasNext()) iterator.next(); @@ -66,9 +66,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark { } } - public void timeExtractTag(int reps) { + public void timeExtractTag(int reps) throws IOException { for(int i = 0; i < reps; i++) { - SAMFileReader reader = new SAMFileReader(inputFile); + final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); CloseableIterator iterator = reader.iterator(); while(iterator.hasNext()) { SAMRecord read = iterator.next(); @@ -79,9 +79,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark { } } - public void timeSamLocusIterator(int reps) { + public void timeSamLocusIterator(int reps) throws IOException { for(int i = 0; i < reps; i++) { - SAMFileReader reader = new SAMFileReader(inputFile); + final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); long loci = 0; SamLocusIterator samLocusIterator = new SamLocusIterator(reader); diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadProcessingBenchmark.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadProcessingBenchmark.java index d176249d5..a7bf2ebc1 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadProcessingBenchmark.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadProcessingBenchmark.java @@ -25,11 +25,11 @@ package org.broadinstitute.gatk.engine.datasources.reads; -import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMRecord; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; @@ -51,7 +51,7 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark { @Override public void setUp() { - SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile())); + SamReader reader = SamReaderFactory.makeDefault().open(new File(getBAMFile())); File tempFile = null; try { @@ -62,15 +62,20 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark { } SAMFileWriterFactory factory = new SAMFileWriterFactory(); factory.setCreateIndex(true); - SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile); + SAMFileWriter writer = factory.makeBAMWriter(reader.getFileHeader(),true,tempFile); long numReads = 0; - for(SAMRecord read: fullInputFile) { + for(SAMRecord read: reader) { if(numReads++ >= getMaxReads()) break; writer.addAlignment(read); } + try { + reader.close(); + } catch ( IOException ex ) { + throw new ReviewedGATKException("Unable to close " + getBAMFile() , ex); + } writer.close(); inputFile = tempFile; diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/TheoreticalMinimaBenchmark.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/TheoreticalMinimaBenchmark.java index 01ec4238f..754b288a6 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/TheoreticalMinimaBenchmark.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/TheoreticalMinimaBenchmark.java @@ -28,11 +28,13 @@ package org.broadinstitute.gatk.engine.datasources.reads; import com.google.caliper.Param; import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; import java.io.File; +import java.io.IOException; /** * Created by IntelliJ IDEA. @@ -54,10 +56,10 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark { @Override public Integer getMaxReads() { return maxReads; } - public void timeIterateOverEachBase(int reps) { + public void timeIterateOverEachBase(int reps) throws IOException { System.out.printf("Processing " + inputFile); for(int i = 0; i < reps; i++) { - SAMFileReader reader = new SAMFileReader(inputFile); + final SamReader reader = SamReaderFactory.makeDefault().open((inputFile)); CloseableIterator iterator = reader.iterator(); long As=0,Cs=0,Gs=0,Ts=0; @@ -78,14 +80,14 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark { } } - public void timeIterateOverCigarString(int reps) { + public void timeIterateOverCigarString(int reps) throws IOException { for(int i = 0; i < reps; i++) { long matchMismatches = 0; long insertions = 0; long deletions = 0; long others = 0; - SAMFileReader reader = new SAMFileReader(inputFile); + final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); CloseableIterator iterator = reader.iterator(); while(iterator.hasNext()) { SAMRecord read = iterator.next(); diff --git a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java index 9012e1d56..71aa55bdb 100644 --- a/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java +++ b/public/gatk-queue-extensions-generator/src/main/java/org/broadinstitute/gatk/queue/extensions/gatk/ArgumentField.java @@ -25,7 +25,7 @@ package org.broadinstitute.gatk.queue.extensions.gatk; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; import htsjdk.samtools.SAMFileWriter; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; @@ -245,7 +245,7 @@ public abstract class ArgumentField { protected static Class mapType(Class clazz) { if (InputStream.class.isAssignableFrom(clazz)) return File.class; - if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class; + if (SamReader.class.isAssignableFrom(clazz)) return File.class; if (OutputStream.class.isAssignableFrom(clazz)) return File.class; if (VariantContextWriter.class.isAssignableFrom(clazz)) return File.class; if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class; diff --git a/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/QScriptUtils.scala b/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/QScriptUtils.scala index b8923f212..b995c40f0 100644 --- a/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/QScriptUtils.scala +++ b/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/QScriptUtils.scala @@ -27,7 +27,7 @@ package org.broadinstitute.gatk.queue.util import java.io.File import io.Source._ -import htsjdk.samtools.{SAMReadGroupRecord, SAMFileReader} +import htsjdk.samtools.{SamReaderFactory, SAMReadGroupRecord} import collection.JavaConversions._ @@ -87,8 +87,10 @@ object QScriptUtils { * Returns the number of contigs in the BAM file header. */ def getNumberOfContigs(bamFile: File): Int = { - val samReader = new SAMFileReader(bamFile) - samReader.getFileHeader.getSequenceDictionary.getSequences.size() + val samReader = SamReaderFactory.makeDefault().open(bamFile) + val size = samReader.getFileHeader.getSequenceDictionary.getSequences.size() + samReader.close + return size } /** @@ -112,11 +114,12 @@ object QScriptUtils { * @return a set with all distinct samples (in no particular order) */ def getSamplesFromBAM(bam: File) : Set[String] = { - val reader = new SAMFileReader(bam) + val reader = SamReaderFactory.makeDefault().open(bam); var samples: Set[String] = Set() for (rg <- reader.getFileHeader.getReadGroups) { samples += rg.getSample } + reader.close samples } } diff --git a/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/VCF_BAM_utilities.scala b/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/VCF_BAM_utilities.scala index 099ab79e8..55d932056 100644 --- a/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/VCF_BAM_utilities.scala +++ b/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/util/VCF_BAM_utilities.scala @@ -28,7 +28,7 @@ package org.broadinstitute.gatk.queue.util import java.io.File import org.apache.commons.io.FilenameUtils import scala.io.Source._ -import htsjdk.samtools.SAMFileReader +import htsjdk.samtools.{SamReaderFactory} import htsjdk.variant.vcf.{VCFHeader, VCFCodec} import scala.collection.JavaConversions._ import htsjdk.tribble.AbstractFeatureReader @@ -40,7 +40,7 @@ object VCF_BAM_utilities { } def getSamplesInBAM(bam: File): List[String] = { - return new SAMFileReader(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList + return SamReaderFactory.makeDefault().open(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList } def parseBAMsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match { diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/diffengine/BAMDiffableReader.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/diffengine/BAMDiffableReader.java index c423d78fe..d8250b51f 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/diffengine/BAMDiffableReader.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/diffengine/BAMDiffableReader.java @@ -25,11 +25,13 @@ package org.broadinstitute.gatk.utils.diffengine; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.util.BlockCompressedInputStream; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import java.io.*; import java.util.Arrays; @@ -49,8 +51,7 @@ public class BAMDiffableReader implements DiffableReader { @Override public DiffElement readFromFile(File file, int maxElementsToRead) { - final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index - reader.setValidationStringency(ValidationStringency.SILENT); + final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file); DiffNode root = DiffNode.rooted(file.getName()); SAMRecordIterator iterator = reader.iterator(); @@ -93,7 +94,11 @@ public class BAMDiffableReader implements DiffableReader { break; } - reader.close(); + try { + reader.close(); + } catch (final IOException ex ) { + throw new ReviewedGATKException("Unable to close " + file , ex); + } return root.getBinding(); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java index 5bb518a50..c165b3120 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java @@ -25,7 +25,8 @@ package org.broadinstitute.gatk.utils.locusiterator; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.reference.ReferenceSequenceFile; @@ -65,7 +66,7 @@ public class LIBSPerformance extends CommandLineProgram { final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile); final GenomeLocParser genomeLocParser = new GenomeLocParser(reference); - final SAMFileReader reader = new SAMFileReader(samFile); + final SamReader reader = SamReaderFactory.makeDefault().open(samFile); SAMRecordIterator rawIterator; if ( location == null ) @@ -81,6 +82,8 @@ public class LIBSPerformance extends CommandLineProgram { for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() ) samples.add(rg.getSample()); + reader.close(); + final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(downsample, 250); final LocusIteratorByState libs = diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LocusIteratorByState.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LocusIteratorByState.java index 3ce8783b5..fe509948f 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LocusIteratorByState.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LocusIteratorByState.java @@ -28,7 +28,7 @@ package org.broadinstitute.gatk.utils.locusiterator; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; import org.apache.log4j.Logger; @@ -149,7 +149,7 @@ public final class LocusIteratorByState extends LocusIterator { } /** - * Create a new LocusIteratorByState based on a SAMFileReader using reads in an iterator it + * Create a new LocusIteratorByState based on a SamReader using reads in an iterator it * * Simple constructor that uses the samples in the reader, doesn't do any downsampling, * and makes a new GenomeLocParser using the reader. This constructor will be slow(ish) @@ -158,7 +158,7 @@ public final class LocusIteratorByState extends LocusIterator { * @param reader a non-null reader * @param it an iterator from reader that has the reads we want to use to create ReadBackPileups */ - public LocusIteratorByState(final SAMFileReader reader, final CloseableIterator it) { + public LocusIteratorByState(final SamReader reader, final CloseableIterator it) { this(new GATKSAMRecordIterator(it), new LIBSDownsamplingInfo(false, 0), true, diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java index 9491ed254..846cc7f3d 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java @@ -26,16 +26,19 @@ package org.broadinstitute.gatk.utils.sam; import htsjdk.samtools.*; +import htsjdk.samtools.SamReader.Indexing; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; + /** * User: hanna * Date: Jun 11, 2009 @@ -53,7 +56,13 @@ import java.util.List; * Pass specified reads into the given walker. */ -public class ArtificialSAMFileReader extends SAMFileReader { +public class ArtificialSAMFileReader implements SamReader, Indexing { + + /** + * The reader of SamRecords + */ + private SamReader reader; + /** * The parser, for GenomeLocs. */ @@ -64,15 +73,20 @@ public class ArtificialSAMFileReader extends SAMFileReader { */ private final List reads; + /** + * Input/custom SAM file header + */ private SAMFileHeader customHeader = null; /** * Construct an artificial SAM file reader. + * * @param sequenceDictionary sequence dictionary used to initialize our GenomeLocParser * @param reads Reads to use as backing data source. */ public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) { - super( createEmptyInputStream(),true ); + final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream()); + reader = SamReaderFactory.makeDefault().open(samInputResource); this.genomeLocParser = new GenomeLocParser(sequenceDictionary); this.reads = Arrays.asList(reads); } @@ -84,30 +98,75 @@ public class ArtificialSAMFileReader extends SAMFileReader { * @param reads Reads to use as backing data source. */ public ArtificialSAMFileReader( SAMFileHeader customHeader, SAMRecord... reads ) { - super(createEmptyInputStream(),true); + final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream()); + reader = SamReaderFactory.makeDefault().open(samInputResource); this.customHeader = customHeader; this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary()); this.reads = Arrays.asList(reads); } + @Override + public String getResourceDescription() { + return this.toString(); + } @Override - public SAMFileHeader getFileHeader() { - if ( customHeader != null ) { - return customHeader; - } + public boolean hasIndex() { + return this.reader.hasIndex(); + } - return super.getFileHeader(); + @Override + public Indexing indexing() { + return this; + } + + @Override + public BrowseableBAMIndex getBrowseableIndex() { + BAMIndex index = this.getIndex(); + if(!(index instanceof BrowseableBAMIndex)) { + throw new SAMException("Cannot return index: index created by BAM is not browseable."); + } else { + return BrowseableBAMIndex.class.cast(index); + } + } + + @Override + public boolean hasBrowseableIndex() { + return this.hasIndex() && this.getIndex() instanceof BrowseableBAMIndex; + } + + @Override + public BAMIndex getIndex() { + throw new UnsupportedOperationException(); + } + + @Override + public SAMRecordIterator iterator() { + return new SAMRecordIterator() { + private final Iterator iterator = reads.iterator(); + public boolean hasNext() { return iterator.hasNext(); } + public SAMRecord next() { return iterator.next(); } + public void close() {} + public void remove() { iterator.remove(); } + public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; } + }; } /** - * @{inheritDoc} + * Iterate through the the file. + * + * @param chunks List of chunks for which to retrieve data. + * @return An iterator. */ @Override + public SAMRecordIterator iterator(SAMFileSpan chunks) { + return new SamReader.AssertingIterator(this.reader.iterator()); + } + public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end); - List coveredSubset = new ArrayList(); + List coveredSubset = new ArrayList<>(); for( SAMRecord read: reads ) { GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read); @@ -126,15 +185,116 @@ public class ArtificialSAMFileReader extends SAMFileReader { } @Override - public SAMRecordIterator iterator() { - return new SAMRecordIterator() { - private final Iterator iterator = reads.iterator(); - public boolean hasNext() { return iterator.hasNext(); } - public SAMRecord next() { return iterator.next(); } - public void close() {} - public void remove() { iterator.remove(); } - public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; } - }; + public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end) { + return this.query(sequence, start, end, false); + } + + @Override + public SAMRecordIterator queryContained(final String sequence, final int start, final int end) { + return this.query(sequence, start, end, true); + } + + @Override + public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) { + return new AssertingIterator(this.reader.query(intervals, contained)); + } + + @Override + public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) { + return this.query(intervals, false); + } + + @Override + public SAMRecordIterator queryContained(final QueryInterval[] intervals) { + return this.query(intervals, true); + } + + @Override + public SAMRecordIterator queryUnmapped() { + return new AssertingIterator(this.reader.queryUnmapped()); + } + + @Override + public SAMRecordIterator queryAlignmentStart(final String sequence, final int start) { + return new AssertingIterator(this.reader.queryAlignmentStart(sequence, start)); + } + + @Override + public SAMRecord queryMate(final SAMRecord rec) { + if(!rec.getReadPairedFlag()) { + throw new IllegalArgumentException("queryMate called for unpaired read."); + } else if(rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) { + throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both."); + } else { + boolean firstOfPair = rec.getFirstOfPairFlag(); + SAMRecordIterator it; + if(rec.getMateReferenceIndex() == -1) { + it = this.queryUnmapped(); + } else { + it = this.queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart()); + } + + try { + SAMRecord mateRec = null; + + while(true) { + SAMRecord next; + while(it.hasNext()) { + next = it.next(); + if(!next.getReadPairedFlag()) { + if(rec.getReadName().equals(next.getReadName())) { + throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName()); + } + } else { + if(firstOfPair) { + if(next.getFirstOfPairFlag()) { + continue; + } + } else if(next.getSecondOfPairFlag()) { + continue; + } + + if(rec.getReadName().equals(next.getReadName())) { + if(mateRec != null) { + throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() + " for " + (firstOfPair?"second":"first") + " end."); + } + + mateRec = next; + } + } + } + + next = mateRec; + return next; + } + } finally { + it.close(); + } + } + } + + @Override + public SAMFileSpan getFilePointerSpanningReads() { + return this.reader.indexing().getFilePointerSpanningReads(); + } + + @Override + public void close() throws IOException{ + if(this.reader != null) { + this.reader.close(); + } + + this.reader = null; + } + + @Override + public Type type() { + return this.reader.type(); + } + + @Override + public SAMFileHeader getFileHeader() { + return customHeader != null ? customHeader : this.reader.getFileHeader(); } /** diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java index 637539395..e9775d1fe 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java @@ -30,9 +30,11 @@ package org.broadinstitute.gatk.utils; import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.reference.ReferenceSequenceFile; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; @@ -50,6 +52,7 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; +import java.io.IOException; import java.util.*; public class ExampleToCopyUnitTest extends BaseTest { @@ -217,13 +220,18 @@ public class ExampleToCopyUnitTest extends BaseTest { // create a fake BAM file, and iterate through it final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10); final File bam = bamBuilder.makeTemporarilyBAMFile(); - final SAMFileReader reader = new SAMFileReader(bam); + final SamReader reader = SamReaderFactory.makeDefault().open(bam); final Iterator bamIt = reader.iterator(); while ( bamIt.hasNext() ) { final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords // TODO -- add some tests that use reads from a BAM } + try { + reader.close(); + } catch ( IOException ex ) { + throw new ReviewedGATKException("Unable to close " + bam , ex); + } } /** diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/ArtificialBAMBuilderUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/ArtificialBAMBuilderUnitTest.java index c4dfdbe65..048292e30 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/ArtificialBAMBuilderUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/ArtificialBAMBuilderUnitTest.java @@ -25,14 +25,17 @@ package org.broadinstitute.gatk.utils.sam; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; @@ -94,7 +97,7 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest { } final File bam = bamBuilder.makeTemporarilyBAMFile(); - final SAMFileReader reader = new SAMFileReader(bam); + final SamReader reader = SamReaderFactory.makeDefault().open(bam); Assert.assertTrue(reader.hasIndex()); final Iterator bamIt = reader.iterator(); int nReadsFromBam = 0; @@ -106,6 +109,11 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest { Assert.assertTrue(read.getAlignmentStart() >= lastStart); lastStart = read.getAlignmentStart(); } + try { + reader.close(); + } catch ( IOException ex ) { + throw new ReviewedGATKException("Unable to close " + bam , ex); + } Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads()); }