Merge pull request #1485 from broadinstitute/rhl_mv_samfilereader_samreaderfactory

Replace SAMFileReader with calls to SamReaderFactory
This commit is contained in:
Ron Levine 2016-10-14 12:52:12 -04:00 committed by GitHub
commit bd6d050187
26 changed files with 339 additions and 126 deletions

View File

@ -35,6 +35,7 @@ import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException;
/** /**
* A test harness to ensure that the perfect aligner works. * A test harness to ensure that the perfect aligner works.
@ -63,8 +64,7 @@ public class AlignerTestHarness {
Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
int count = 0; int count = 0;
SAMFileReader reader = new SAMFileReader(bamFile); final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
reader.setValidationStringency(ValidationStringency.SILENT);
int mismatches = 0; int mismatches = 0;
int failures = 0; int failures = 0;
@ -160,6 +160,12 @@ public class AlignerTestHarness {
System.out.printf("%d reads examined.%n",count); System.out.printf("%d reads examined.%n",count);
} }
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bamFile , ex);
}
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
} }

View File

@ -40,7 +40,7 @@ import java.util.LinkedList;
import java.util.List; import java.util.List;
/** /**
* Presents decompressed blocks to the SAMFileReader. * Presents decompressed blocks to the SamReader.
*/ */
public class BlockInputStream extends InputStream { public class BlockInputStream extends InputStream {
/** /**

View File

@ -25,15 +25,14 @@
package org.broadinstitute.gatk.engine.datasources.reads.utilities; package org.broadinstitute.gatk.engine.datasources.reads.utilities;
import htsjdk.samtools.BAMIndex; import htsjdk.samtools.*;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.instrumentation.Sizeof; import org.broadinstitute.gatk.utils.instrumentation.Sizeof;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -56,7 +55,7 @@ public class BAMFileStat extends CommandLineProgram {
@Argument(doc="The range to inspect.",required=false) @Argument(doc="The range to inspect.",required=false)
private String range; private String range;
public int execute() { public int execute() throws IOException {
switch(command) { switch(command) {
case ShowBlocks: case ShowBlocks:
throw new ReviewedGATKException("The BAM block inspector has been disabled."); throw new ReviewedGATKException("The BAM block inspector has been disabled.");
@ -81,14 +80,11 @@ public class BAMFileStat extends CommandLineProgram {
} }
} }
private void showIndexBins(File bamFile,String contigName) { private void showIndexBins(File bamFile,String contigName) throws IOException {
SAMFileReader reader;
BAMIndex index;
reader = new SAMFileReader(bamFile); final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).
reader.setValidationStringency(ValidationStringency.SILENT); validationStringency(ValidationStringency.SILENT).open(bamFile);
reader.enableIndexCaching(true); final SamReader.Indexing index = reader.indexing();
index = reader.getIndex();
reader.queryOverlapping(contigName,1,reader.getFileHeader().getSequence(contigName).getSequenceLength()).close(); reader.queryOverlapping(contigName,1,reader.getFileHeader().getSequence(contigName).getSequenceLength()).close();

View File

@ -25,14 +25,17 @@
package org.broadinstitute.gatk.engine.datasources.reads.utilities; package org.broadinstitute.gatk.engine.datasources.reads.utilities;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.File; import java.io.File;
import java.io.IOException;
/** /**
* A simple utility written directly in Picard that will rename tags * A simple utility written directly in Picard that will rename tags
@ -62,7 +65,7 @@ public class BAMTagRenamer extends CommandLineProgram {
long readsWritten = 0; long readsWritten = 0;
long readsAltered = 0; long readsAltered = 0;
SAMFileReader reader = new SAMFileReader(input); final SamReader reader = SamReaderFactory.makeDefault().open(input);
SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(),true,output,compressionLevel); SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(),true,output,compressionLevel);
for(SAMRecord read: reader) { for(SAMRecord read: reader) {
@ -79,7 +82,13 @@ public class BAMTagRenamer extends CommandLineProgram {
} }
writer.close(); writer.close();
System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName); System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName);
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + input , ex);
}
return 0; return 0;
} }

View File

@ -31,6 +31,7 @@ import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -53,9 +54,8 @@ public class PrintBAMRegion extends CommandLineProgram {
private static final int MIN_OFFSET_SIZE = 0; private static final int MIN_OFFSET_SIZE = 0;
private static final int MAX_OFFSET_SIZE = (int)Math.pow(2,16)-1; private static final int MAX_OFFSET_SIZE = (int)Math.pow(2,16)-1;
public int execute() { public int execute() throws IOException {
SAMFileReader reader = new SAMFileReader(input); final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(input);
reader.setValidationStringency(ValidationStringency.SILENT);
Pattern regionPattern = Pattern.compile("(\\d+):(\\d+)-(\\d+):(\\d+)"); Pattern regionPattern = Pattern.compile("(\\d+):(\\d+)-(\\d+):(\\d+)");
Matcher matcher = regionPattern.matcher(region); Matcher matcher = regionPattern.matcher(region);
@ -76,10 +76,10 @@ public class PrintBAMRegion extends CommandLineProgram {
if(lastOffset < MIN_OFFSET_SIZE || lastOffset > MAX_OFFSET_SIZE) if(lastOffset < MIN_OFFSET_SIZE || lastOffset > MAX_OFFSET_SIZE)
throw new UserException(String.format("Last offset is invalid; must be between %d and %d; actually is %d",MIN_OFFSET_SIZE,MAX_OFFSET_SIZE,lastOffset)); throw new UserException(String.format("Last offset is invalid; must be between %d and %d; actually is %d",MIN_OFFSET_SIZE,MAX_OFFSET_SIZE,lastOffset));
GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset); final GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset);
GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk); final GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk);
SAMRecordIterator iterator = reader.iterator(fileSpan); final SAMRecordIterator iterator = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(fileSpan);
long readCount = 0; long readCount = 0;
while(iterator.hasNext()) { while(iterator.hasNext()) {
System.out.printf("%s%n",iterator.next().format()); System.out.printf("%s%n",iterator.next().format());

View File

@ -65,8 +65,6 @@ public class PrintBGZFBounds extends CommandLineProgram {
float uncompressedSize = 0; float uncompressedSize = 0;
long totalBlocks = 0; long totalBlocks = 0;
//SAMFileReader reader = new SAMFileReader(input);
while(true) { while(true) {
final long blockStart = fis.getChannel().position(); final long blockStart = fis.getChannel().position();

View File

@ -98,7 +98,7 @@ public abstract class OutputTracker implements ReferenceBacked {
ArgumentSource targetField = io.getKey(); ArgumentSource targetField = io.getKey();
Object targetValue = io.getValue(); Object targetValue = io.getValue();
// Ghastly hack: reaches in and finishes building out the SAMFileReader. // Ghastly hack: reaches in and finishes building out the SameReader.
// TODO: Generalize this, and move it to its own initialization step. // TODO: Generalize this, and move it to its own initialization step.
if( targetValue instanceof SAMReaderBuilder) { if( targetValue instanceof SAMReaderBuilder) {
SAMReaderBuilder builder = (SAMReaderBuilder)targetValue; SAMReaderBuilder builder = (SAMReaderBuilder)targetValue;

View File

@ -31,6 +31,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface;
import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.RuntimeIOException;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub; import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.utils.exceptions.GATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter; import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
@ -112,7 +113,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
} }
public void mergeInto( SAMFileWriter targetStream ) { public void mergeInto( SAMFileWriter targetStream ) {
SAMFileReader reader = new SAMFileReader( file ); final SamReader reader = SamReaderFactory.makeDefault().open(file);
try { try {
CloseableIterator<SAMRecord> iterator = reader.iterator(); CloseableIterator<SAMRecord> iterator = reader.iterator();
while( iterator.hasNext() ) while( iterator.hasNext() )
@ -120,7 +121,11 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
iterator.close(); iterator.close();
} }
finally { finally {
reader.close(); try {
reader.close();
} catch (IOException e ) {
throw new GATKException(e.getMessage());
}
file.delete(); file.delete();
} }
} }

View File

@ -25,7 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs; package org.broadinstitute.gatk.engine.io.stubs;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -43,7 +43,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private GenomeAnalysisEngine engine; private GenomeAnalysisEngine engine;
/** /**
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created. * Create a new SamReader argument, notifying the given engine when that argument has been created.
* @param engine engine * @param engine engine
*/ */
public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) { public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
@ -52,7 +52,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public boolean supports( Class type ) { public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type); return SamReader.class.isAssignableFrom(type);
} }
@Override @Override
@ -69,7 +69,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
// WARNING: Skipping required side-effect because stub is impossible to generate. // WARNING: Skipping required side-effect because stub is impossible to generate.
engine.addInput(source, builder); engine.addInput(source, builder);
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then // MASSIVE KLUDGE! SamReader is tricky to implement and we don't yet have a stub. Return null, then
// let the output tracker load it in. // let the output tracker load it in.
// TODO: Add a stub for SAMReader. // TODO: Add a stub for SAMReader.
return null; return null;

View File

@ -25,7 +25,10 @@
package org.broadinstitute.gatk.engine; package org.broadinstitute.gatk.engine;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -42,8 +45,11 @@ public class CommandLineGATKUnitTest extends BaseTest {
public void testSamTextFileError1() { public void testSamTextFileError1() {
final File samFile = new File(publicTestDir + "testfile.sam"); final File samFile = new File(publicTestDir + "testfile.sam");
final File indexFile = new File(publicTestDir + "HiSeq.1mb.1RG.bai"); final File indexFile = new File(publicTestDir + "HiSeq.1mb.1RG.bai");
final SamInputResource samInputResource = SamInputResource.of(samFile);
samInputResource.index(indexFile);
try { try {
final SAMFileReader reader = new SAMFileReader(samFile, indexFile, false); final SamReader reader =
SamReaderFactory.makeDefault().open(samInputResource);
// we shouldn't get here // we shouldn't get here
Assert.fail("We should have exceptioned out when trying to create a reader with an index for a textual SAM file"); Assert.fail("We should have exceptioned out when trying to create a reader with an index for a textual SAM file");
@ -56,8 +62,9 @@ public class CommandLineGATKUnitTest extends BaseTest {
public void testSamTextFileError2() { public void testSamTextFileError2() {
File samFile = new File(publicTestDir + "testfile.sam"); File samFile = new File(publicTestDir + "testfile.sam");
try { try {
final SAMFileReader reader = new SAMFileReader(samFile); final SamInputResource samInputResource = SamInputResource.of(samFile);
reader.getFilePointerSpanningReads(); final SamReader reader = SamReaderFactory.makeDefault().open(samInputResource);
reader.indexing().getFilePointerSpanningReads();
// we shouldn't get here // we shouldn't get here
Assert.fail("We should have exceptioned out when trying to call getFilePointerSpanningReads() for a textual SAM file"); Assert.fail("We should have exceptioned out when trying to call getFilePointerSpanningReads() for a textual SAM file");

View File

@ -344,7 +344,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
@Test @Test
public void testGATKEngineConsolidatesCigars() { public void testGATKEngineConsolidatesCigars() throws IOException {
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
" -R " + b37KGReference + " -R " + b37KGReference +
" -I " + privateTestDir + "zero_length_cigar_elements.bam" + " -I " + privateTestDir + "zero_length_cigar_elements.bam" +
@ -352,8 +352,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the cigar 1, Arrays.asList("")); // No MD5s; we only want to check the cigar
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0); final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
final SAMFileReader reader = new SAMFileReader(outputBam); final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(outputBam);
reader.setValidationStringency(ValidationStringency.SILENT);
final SAMRecord read = reader.iterator().next(); final SAMRecord read = reader.iterator().next();
reader.close(); reader.close();
@ -382,7 +381,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups 1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithSingleBamFile", spec).first.get(0); final File outputBam = executeTest("testOnTheFlySampleRenamingWithSingleBamFile", spec).first.get(0);
final SAMFileReader reader = new SAMFileReader(outputBam); final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), "myNewSampleName", String.format("Sample for read group %s not renamed correctly", readGroup.getId())); Assert.assertEquals(readGroup.getSample(), "myNewSampleName", String.format("Sample for read group %s not renamed correctly", readGroup.getId()));
@ -402,12 +401,12 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
final Map<String, String> readGroupToNewSampleMap = new HashMap<>(); final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
final SAMFileReader inputBamReader = new SAMFileReader(inputBam); final SamReader reader = SamReaderFactory.makeDefault().open(inputBam);
final String newSampleName = String.format("newSampleFor%s", inputBamID); final String newSampleName = String.format("newSampleFor%s", inputBamID);
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
} }
inputBamReader.close(); reader.close();
} }
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
@ -420,10 +419,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups 1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0); final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam); final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0; int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
totalReadGroupsSeen++; totalReadGroupsSeen++;
@ -431,7 +430,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");
outputBamReader.close(); reader.close();
} }
// On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam, // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam,
@ -446,15 +445,15 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
final Map<String, String> readGroupToNewSampleMap = new HashMap<>(); final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
final SAMFileReader inputBamReader = new SAMFileReader(inputBam); final SamReader reader = SamReaderFactory.makeDefault().open(inputBam);
// Special-case NA12891, which we're not renaming: // Special-case NA12891, which we're not renaming:
final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID); final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID);
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
} }
inputBamReader.close(); reader.close();
} }
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
@ -467,10 +466,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups 1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0); final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam); final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0; int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
totalReadGroupsSeen++; totalReadGroupsSeen++;
@ -478,7 +477,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");
outputBamReader.close(); reader.close();
} }
// On-the-fly sample renaming test case: two single-sample bams with read group collisions // On-the-fly sample renaming test case: two single-sample bams with read group collisions
@ -489,11 +488,11 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam newSampleForNot12878")); privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam newSampleForNot12878"));
final Set<String> na12878ReadGroups = new HashSet<>(); final Set<String> na12878ReadGroups = new HashSet<>();
final SAMFileReader inputBamReader = new SAMFileReader(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam")); final SamReader inpuBAMreader = SamReaderFactory.makeDefault().open(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam"));
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : inpuBAMreader.getFileHeader().getReadGroups() ) {
na12878ReadGroups.add(readGroup.getId()); na12878ReadGroups.add(readGroup.getId());
} }
inputBamReader.close(); inpuBAMreader.close();
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" + final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
" -R " + b37KGReference + " -R " + b37KGReference +
@ -504,10 +503,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups 1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0); final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam); final SamReader outputBAMreader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0; int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { for ( final SAMReadGroupRecord readGroup : outputBAMreader.getFileHeader().getReadGroups() ) {
String expectedSampleName = ""; String expectedSampleName = "";
if ( na12878ReadGroups.contains(readGroup.getId()) ) { if ( na12878ReadGroups.contains(readGroup.getId()) ) {
expectedSampleName = "newSampleFor12878"; expectedSampleName = "newSampleFor12878";
@ -523,7 +522,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file"); Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file");
outputBamReader.close(); outputBAMreader.close();
} }
// On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException) // On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException)

View File

@ -55,12 +55,12 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
// public void timeDownsampling(int reps) { // public void timeDownsampling(int reps) {
// for(int i = 0; i < reps; i++) { // for(int i = 0; i < reps; i++) {
// SAMFileReader reader = new SAMFileReader(inputFile); // SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
// ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())), // ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
// reader.getFileHeader(), // reader.getFileHeader(),
// SAMFileHeader.SortOrder.coordinate, // SAMFileHeader.SortOrder.coordinate,
// false, // false,
// SAMFileReader.ValidationStringency.SILENT, // ValidationStringency.SILENT,
// downsampling.create(), // downsampling.create(),
// new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), // new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
// Collections.<ReadFilter>emptyList(), // Collections.<ReadFilter>emptyList(),

View File

@ -25,7 +25,6 @@
package org.broadinstitute.gatk.engine.datasources.reads; package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
@ -50,8 +49,7 @@ public class GATKBAMIndexFromDataSourceUnitTest extends BaseTest {
@BeforeClass @BeforeClass
public void init() throws IOException { public void init() throws IOException {
final SAMFileReader reader = new SAMFileReader(bamFile); final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile);
reader.enableIndexCaching(true); // needed ot get BrowseableBAMIndex
Assert.assertTrue(reader.hasIndex()); Assert.assertTrue(reader.hasIndex());
Assert.assertTrue(reader.indexing().hasBrowseableIndex()); Assert.assertTrue(reader.indexing().hasBrowseableIndex());

View File

@ -25,8 +25,9 @@
package org.broadinstitute.gatk.engine.datasources.reads; package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.Assert; import org.testng.Assert;
@ -34,7 +35,7 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.IOException;
/** /**
* Test basic functionality in the GATK's implementation of the BAM index classes. * Test basic functionality in the GATK's implementation of the BAM index classes.
@ -59,8 +60,8 @@ public class GATKBAMIndexFromFileUnitTest extends BaseTest {
@BeforeClass @BeforeClass
public void init() throws FileNotFoundException { public void init() throws IOException {
final SAMFileReader reader = new SAMFileReader(bamFile); final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile);
sequenceDictionary = reader.getFileHeader().getSequenceDictionary(); sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
reader.close(); reader.close();

View File

@ -26,13 +26,13 @@
package org.broadinstitute.gatk.engine.datasources.reads; package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param; import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.SamLocusIterator; import htsjdk.samtools.util.SamLocusIterator;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloseableIterator;
import java.io.File; import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
/** /**
@ -55,9 +55,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
@Override @Override
public Integer getMaxReads() { return maxReads; } public Integer getMaxReads() { return maxReads; }
public void timeDecompressBamFile(int reps) { public void timeDecompressBamFile(int reps) throws IOException {
for(int i = 0; i < reps; i++) { for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile); final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator(); CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) while(iterator.hasNext())
iterator.next(); iterator.next();
@ -66,9 +66,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
} }
} }
public void timeExtractTag(int reps) { public void timeExtractTag(int reps) throws IOException {
for(int i = 0; i < reps; i++) { for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile); final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator(); CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) { while(iterator.hasNext()) {
SAMRecord read = iterator.next(); SAMRecord read = iterator.next();
@ -79,9 +79,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
} }
} }
public void timeSamLocusIterator(int reps) { public void timeSamLocusIterator(int reps) throws IOException {
for(int i = 0; i < reps; i++) { for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile); final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
long loci = 0; long loci = 0;
SamLocusIterator samLocusIterator = new SamLocusIterator(reader); SamLocusIterator samLocusIterator = new SamLocusIterator(reader);

View File

@ -25,11 +25,11 @@
package org.broadinstitute.gatk.engine.datasources.reads; package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark; import com.google.caliper.SimpleBenchmark;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
@ -51,7 +51,7 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
@Override @Override
public void setUp() { public void setUp() {
SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile())); SamReader reader = SamReaderFactory.makeDefault().open(new File(getBAMFile()));
File tempFile = null; File tempFile = null;
try { try {
@ -62,15 +62,20 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
} }
SAMFileWriterFactory factory = new SAMFileWriterFactory(); SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(true); factory.setCreateIndex(true);
SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile); SAMFileWriter writer = factory.makeBAMWriter(reader.getFileHeader(),true,tempFile);
long numReads = 0; long numReads = 0;
for(SAMRecord read: fullInputFile) { for(SAMRecord read: reader) {
if(numReads++ >= getMaxReads()) if(numReads++ >= getMaxReads())
break; break;
writer.addAlignment(read); writer.addAlignment(read);
} }
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + getBAMFile() , ex);
}
writer.close(); writer.close();
inputFile = tempFile; inputFile = tempFile;

View File

@ -28,11 +28,13 @@ package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param; import com.google.caliper.Param;
import htsjdk.samtools.Cigar; import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarElement;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloseableIterator;
import java.io.File; import java.io.File;
import java.io.IOException;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -54,10 +56,10 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
@Override @Override
public Integer getMaxReads() { return maxReads; } public Integer getMaxReads() { return maxReads; }
public void timeIterateOverEachBase(int reps) { public void timeIterateOverEachBase(int reps) throws IOException {
System.out.printf("Processing " + inputFile); System.out.printf("Processing " + inputFile);
for(int i = 0; i < reps; i++) { for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile); final SamReader reader = SamReaderFactory.makeDefault().open((inputFile));
CloseableIterator<SAMRecord> iterator = reader.iterator(); CloseableIterator<SAMRecord> iterator = reader.iterator();
long As=0,Cs=0,Gs=0,Ts=0; long As=0,Cs=0,Gs=0,Ts=0;
@ -78,14 +80,14 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
} }
} }
public void timeIterateOverCigarString(int reps) { public void timeIterateOverCigarString(int reps) throws IOException {
for(int i = 0; i < reps; i++) { for(int i = 0; i < reps; i++) {
long matchMismatches = 0; long matchMismatches = 0;
long insertions = 0; long insertions = 0;
long deletions = 0; long deletions = 0;
long others = 0; long others = 0;
SAMFileReader reader = new SAMFileReader(inputFile); final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator(); CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) { while(iterator.hasNext()) {
SAMRecord read = iterator.next(); SAMRecord read = iterator.next();

View File

@ -25,7 +25,7 @@
package org.broadinstitute.gatk.queue.extensions.gatk; package org.broadinstitute.gatk.queue.extensions.gatk;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriter;
import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -245,7 +245,7 @@ public abstract class ArgumentField {
protected static Class<?> mapType(Class<?> clazz) { protected static Class<?> mapType(Class<?> clazz) {
if (InputStream.class.isAssignableFrom(clazz)) return File.class; if (InputStream.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class; if (SamReader.class.isAssignableFrom(clazz)) return File.class;
if (OutputStream.class.isAssignableFrom(clazz)) return File.class; if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
if (VariantContextWriter.class.isAssignableFrom(clazz)) return File.class; if (VariantContextWriter.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class; if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;

View File

@ -27,7 +27,7 @@ package org.broadinstitute.gatk.queue.util
import java.io.File import java.io.File
import io.Source._ import io.Source._
import htsjdk.samtools.{SAMReadGroupRecord, SAMFileReader} import htsjdk.samtools.{SamReaderFactory, SAMReadGroupRecord}
import collection.JavaConversions._ import collection.JavaConversions._
@ -87,8 +87,10 @@ object QScriptUtils {
* Returns the number of contigs in the BAM file header. * Returns the number of contigs in the BAM file header.
*/ */
def getNumberOfContigs(bamFile: File): Int = { def getNumberOfContigs(bamFile: File): Int = {
val samReader = new SAMFileReader(bamFile) val samReader = SamReaderFactory.makeDefault().open(bamFile)
samReader.getFileHeader.getSequenceDictionary.getSequences.size() val size = samReader.getFileHeader.getSequenceDictionary.getSequences.size()
samReader.close
return size
} }
/** /**
@ -112,11 +114,12 @@ object QScriptUtils {
* @return a set with all distinct samples (in no particular order) * @return a set with all distinct samples (in no particular order)
*/ */
def getSamplesFromBAM(bam: File) : Set[String] = { def getSamplesFromBAM(bam: File) : Set[String] = {
val reader = new SAMFileReader(bam) val reader = SamReaderFactory.makeDefault().open(bam);
var samples: Set[String] = Set() var samples: Set[String] = Set()
for (rg <- reader.getFileHeader.getReadGroups) { for (rg <- reader.getFileHeader.getReadGroups) {
samples += rg.getSample samples += rg.getSample
} }
reader.close
samples samples
} }
} }

View File

@ -28,7 +28,7 @@ package org.broadinstitute.gatk.queue.util
import java.io.File import java.io.File
import org.apache.commons.io.FilenameUtils import org.apache.commons.io.FilenameUtils
import scala.io.Source._ import scala.io.Source._
import htsjdk.samtools.SAMFileReader import htsjdk.samtools.{SamReaderFactory}
import htsjdk.variant.vcf.{VCFHeader, VCFCodec} import htsjdk.variant.vcf.{VCFHeader, VCFCodec}
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import htsjdk.tribble.AbstractFeatureReader import htsjdk.tribble.AbstractFeatureReader
@ -40,7 +40,7 @@ object VCF_BAM_utilities {
} }
def getSamplesInBAM(bam: File): List[String] = { def getSamplesInBAM(bam: File): List[String] = {
return new SAMFileReader(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList return SamReaderFactory.makeDefault().open(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList
} }
def parseBAMsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match { def parseBAMsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match {

View File

@ -25,11 +25,13 @@
package org.broadinstitute.gatk.utils.diffengine; package org.broadinstitute.gatk.utils.diffengine;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.BlockCompressedInputStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.*; import java.io.*;
import java.util.Arrays; import java.util.Arrays;
@ -49,8 +51,7 @@ public class BAMDiffableReader implements DiffableReader {
@Override @Override
public DiffElement readFromFile(File file, int maxElementsToRead) { public DiffElement readFromFile(File file, int maxElementsToRead) {
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file);
reader.setValidationStringency(ValidationStringency.SILENT);
DiffNode root = DiffNode.rooted(file.getName()); DiffNode root = DiffNode.rooted(file.getName());
SAMRecordIterator iterator = reader.iterator(); SAMRecordIterator iterator = reader.iterator();
@ -93,7 +94,11 @@ public class BAMDiffableReader implements DiffableReader {
break; break;
} }
reader.close(); try {
reader.close();
} catch (final IOException ex ) {
throw new ReviewedGATKException("Unable to close " + file , ex);
}
return root.getBinding(); return root.getBinding();
} }

View File

@ -25,7 +25,8 @@
package org.broadinstitute.gatk.utils.locusiterator; package org.broadinstitute.gatk.utils.locusiterator;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFile;
@ -65,7 +66,7 @@ public class LIBSPerformance extends CommandLineProgram {
final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile); final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference); final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
final SAMFileReader reader = new SAMFileReader(samFile); final SamReader reader = SamReaderFactory.makeDefault().open(samFile);
SAMRecordIterator rawIterator; SAMRecordIterator rawIterator;
if ( location == null ) if ( location == null )
@ -81,6 +82,8 @@ public class LIBSPerformance extends CommandLineProgram {
for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() ) for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() )
samples.add(rg.getSample()); samples.add(rg.getSample());
reader.close();
final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(downsample, 250); final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(downsample, 250);
final LocusIteratorByState libs = final LocusIteratorByState libs =

View File

@ -28,7 +28,7 @@ package org.broadinstitute.gatk.utils.locusiterator;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import htsjdk.samtools.CigarOperator; import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloseableIterator;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -149,7 +149,7 @@ public final class LocusIteratorByState extends LocusIterator {
} }
/** /**
* Create a new LocusIteratorByState based on a SAMFileReader using reads in an iterator it * Create a new LocusIteratorByState based on a SamReader using reads in an iterator it
* *
* Simple constructor that uses the samples in the reader, doesn't do any downsampling, * Simple constructor that uses the samples in the reader, doesn't do any downsampling,
* and makes a new GenomeLocParser using the reader. This constructor will be slow(ish) * and makes a new GenomeLocParser using the reader. This constructor will be slow(ish)
@ -158,7 +158,7 @@ public final class LocusIteratorByState extends LocusIterator {
* @param reader a non-null reader * @param reader a non-null reader
* @param it an iterator from reader that has the reads we want to use to create ReadBackPileups * @param it an iterator from reader that has the reads we want to use to create ReadBackPileups
*/ */
public LocusIteratorByState(final SAMFileReader reader, final CloseableIterator<SAMRecord> it) { public LocusIteratorByState(final SamReader reader, final CloseableIterator<SAMRecord> it) {
this(new GATKSAMRecordIterator(it), this(new GATKSAMRecordIterator(it),
new LIBSDownsamplingInfo(false, 0), new LIBSDownsamplingInfo(false, 0),
true, true,

View File

@ -26,16 +26,19 @@
package org.broadinstitute.gatk.utils.sam; package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.*; import htsjdk.samtools.*;
import htsjdk.samtools.SamReader.Indexing;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
* User: hanna * User: hanna
* Date: Jun 11, 2009 * Date: Jun 11, 2009
@ -53,7 +56,13 @@ import java.util.List;
* Pass specified reads into the given walker. * Pass specified reads into the given walker.
*/ */
public class ArtificialSAMFileReader extends SAMFileReader { public class ArtificialSAMFileReader implements SamReader, Indexing {
/**
* The reader of SamRecords
*/
private SamReader reader;
/** /**
* The parser, for GenomeLocs. * The parser, for GenomeLocs.
*/ */
@ -64,15 +73,20 @@ public class ArtificialSAMFileReader extends SAMFileReader {
*/ */
private final List<SAMRecord> reads; private final List<SAMRecord> reads;
/**
* Input/custom SAM file header
*/
private SAMFileHeader customHeader = null; private SAMFileHeader customHeader = null;
/** /**
* Construct an artificial SAM file reader. * Construct an artificial SAM file reader.
*
* @param sequenceDictionary sequence dictionary used to initialize our GenomeLocParser * @param sequenceDictionary sequence dictionary used to initialize our GenomeLocParser
* @param reads Reads to use as backing data source. * @param reads Reads to use as backing data source.
*/ */
public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) { public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) {
super( createEmptyInputStream(),true ); final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream());
reader = SamReaderFactory.makeDefault().open(samInputResource);
this.genomeLocParser = new GenomeLocParser(sequenceDictionary); this.genomeLocParser = new GenomeLocParser(sequenceDictionary);
this.reads = Arrays.asList(reads); this.reads = Arrays.asList(reads);
} }
@ -84,30 +98,75 @@ public class ArtificialSAMFileReader extends SAMFileReader {
* @param reads Reads to use as backing data source. * @param reads Reads to use as backing data source.
*/ */
public ArtificialSAMFileReader( SAMFileHeader customHeader, SAMRecord... reads ) { public ArtificialSAMFileReader( SAMFileHeader customHeader, SAMRecord... reads ) {
super(createEmptyInputStream(),true); final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream());
reader = SamReaderFactory.makeDefault().open(samInputResource);
this.customHeader = customHeader; this.customHeader = customHeader;
this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary()); this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary());
this.reads = Arrays.asList(reads); this.reads = Arrays.asList(reads);
} }
@Override
public String getResourceDescription() {
return this.toString();
}
@Override @Override
public SAMFileHeader getFileHeader() { public boolean hasIndex() {
if ( customHeader != null ) { return this.reader.hasIndex();
return customHeader; }
}
return super.getFileHeader(); @Override
public Indexing indexing() {
return this;
}
@Override
public BrowseableBAMIndex getBrowseableIndex() {
BAMIndex index = this.getIndex();
if(!(index instanceof BrowseableBAMIndex)) {
throw new SAMException("Cannot return index: index created by BAM is not browseable.");
} else {
return BrowseableBAMIndex.class.cast(index);
}
}
@Override
public boolean hasBrowseableIndex() {
return this.hasIndex() && this.getIndex() instanceof BrowseableBAMIndex;
}
@Override
public BAMIndex getIndex() {
throw new UnsupportedOperationException();
}
@Override
public SAMRecordIterator iterator() {
return new SAMRecordIterator() {
private final Iterator<SAMRecord> iterator = reads.iterator();
public boolean hasNext() { return iterator.hasNext(); }
public SAMRecord next() { return iterator.next(); }
public void close() {}
public void remove() { iterator.remove(); }
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; }
};
} }
/** /**
* @{inheritDoc} * Iterate through the the file.
*
* @param chunks List of chunks for which to retrieve data.
* @return An iterator.
*/ */
@Override @Override
public SAMRecordIterator iterator(SAMFileSpan chunks) {
return new SamReader.AssertingIterator(this.reader.iterator());
}
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end); GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end);
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>(); List<SAMRecord> coveredSubset = new ArrayList<>();
for( SAMRecord read: reads ) { for( SAMRecord read: reads ) {
GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read); GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read);
@ -126,15 +185,116 @@ public class ArtificialSAMFileReader extends SAMFileReader {
} }
@Override @Override
public SAMRecordIterator iterator() { public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end) {
return new SAMRecordIterator() { return this.query(sequence, start, end, false);
private final Iterator<SAMRecord> iterator = reads.iterator(); }
public boolean hasNext() { return iterator.hasNext(); }
public SAMRecord next() { return iterator.next(); } @Override
public void close() {} public SAMRecordIterator queryContained(final String sequence, final int start, final int end) {
public void remove() { iterator.remove(); } return this.query(sequence, start, end, true);
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; } }
};
@Override
public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) {
return new AssertingIterator(this.reader.query(intervals, contained));
}
@Override
public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) {
return this.query(intervals, false);
}
@Override
public SAMRecordIterator queryContained(final QueryInterval[] intervals) {
return this.query(intervals, true);
}
@Override
public SAMRecordIterator queryUnmapped() {
return new AssertingIterator(this.reader.queryUnmapped());
}
@Override
public SAMRecordIterator queryAlignmentStart(final String sequence, final int start) {
return new AssertingIterator(this.reader.queryAlignmentStart(sequence, start));
}
@Override
public SAMRecord queryMate(final SAMRecord rec) {
if(!rec.getReadPairedFlag()) {
throw new IllegalArgumentException("queryMate called for unpaired read.");
} else if(rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
} else {
boolean firstOfPair = rec.getFirstOfPairFlag();
SAMRecordIterator it;
if(rec.getMateReferenceIndex() == -1) {
it = this.queryUnmapped();
} else {
it = this.queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
}
try {
SAMRecord mateRec = null;
while(true) {
SAMRecord next;
while(it.hasNext()) {
next = it.next();
if(!next.getReadPairedFlag()) {
if(rec.getReadName().equals(next.getReadName())) {
throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
}
} else {
if(firstOfPair) {
if(next.getFirstOfPairFlag()) {
continue;
}
} else if(next.getSecondOfPairFlag()) {
continue;
}
if(rec.getReadName().equals(next.getReadName())) {
if(mateRec != null) {
throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() + " for " + (firstOfPair?"second":"first") + " end.");
}
mateRec = next;
}
}
}
next = mateRec;
return next;
}
} finally {
it.close();
}
}
}
@Override
public SAMFileSpan getFilePointerSpanningReads() {
return this.reader.indexing().getFilePointerSpanningReads();
}
@Override
public void close() throws IOException{
if(this.reader != null) {
this.reader.close();
}
this.reader = null;
}
@Override
public Type type() {
return this.reader.type();
}
@Override
public SAMFileHeader getFileHeader() {
return customHeader != null ? customHeader : this.reader.getFileHeader();
} }
/** /**

View File

@ -30,9 +30,11 @@ package org.broadinstitute.gatk.utils;
import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFile;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
@ -50,6 +52,7 @@ import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*; import java.util.*;
public class ExampleToCopyUnitTest extends BaseTest { public class ExampleToCopyUnitTest extends BaseTest {
@ -217,13 +220,18 @@ public class ExampleToCopyUnitTest extends BaseTest {
// create a fake BAM file, and iterate through it // create a fake BAM file, and iterate through it
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10); final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
final File bam = bamBuilder.makeTemporarilyBAMFile(); final File bam = bamBuilder.makeTemporarilyBAMFile();
final SAMFileReader reader = new SAMFileReader(bam); final SamReader reader = SamReaderFactory.makeDefault().open(bam);
final Iterator<SAMRecord> bamIt = reader.iterator(); final Iterator<SAMRecord> bamIt = reader.iterator();
while ( bamIt.hasNext() ) { while ( bamIt.hasNext() ) {
final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords
// TODO -- add some tests that use reads from a BAM // TODO -- add some tests that use reads from a BAM
} }
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bam , ex);
}
} }
/** /**

View File

@ -25,14 +25,17 @@
package org.broadinstitute.gatk.utils.sam; package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
@ -94,7 +97,7 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest {
} }
final File bam = bamBuilder.makeTemporarilyBAMFile(); final File bam = bamBuilder.makeTemporarilyBAMFile();
final SAMFileReader reader = new SAMFileReader(bam); final SamReader reader = SamReaderFactory.makeDefault().open(bam);
Assert.assertTrue(reader.hasIndex()); Assert.assertTrue(reader.hasIndex());
final Iterator<SAMRecord> bamIt = reader.iterator(); final Iterator<SAMRecord> bamIt = reader.iterator();
int nReadsFromBam = 0; int nReadsFromBam = 0;
@ -106,6 +109,11 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest {
Assert.assertTrue(read.getAlignmentStart() >= lastStart); Assert.assertTrue(read.getAlignmentStart() >= lastStart);
lastStart = read.getAlignmentStart(); lastStart = read.getAlignmentStart();
} }
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bam , ex);
}
Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads()); Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
} }