Replace SAMFileReader with calls to SamReaderFactory

This commit is contained in:
Ron Levine 2016-09-20 12:49:06 -04:00
parent 2ee0755c35
commit cfd3ffa2c0
26 changed files with 339 additions and 126 deletions

View File

@ -35,6 +35,7 @@ import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* A test harness to ensure that the perfect aligner works.
@ -63,8 +64,7 @@ public class AlignerTestHarness {
Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
int count = 0;
SAMFileReader reader = new SAMFileReader(bamFile);
reader.setValidationStringency(ValidationStringency.SILENT);
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
int mismatches = 0;
int failures = 0;
@ -160,6 +160,12 @@ public class AlignerTestHarness {
System.out.printf("%d reads examined.%n",count);
}
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bamFile , ex);
}
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
}

View File

@ -40,7 +40,7 @@ import java.util.LinkedList;
import java.util.List;
/**
* Presents decompressed blocks to the SAMFileReader.
* Presents decompressed blocks to the SamReader.
*/
public class BlockInputStream extends InputStream {
/**

View File

@ -25,15 +25,14 @@
package org.broadinstitute.gatk.engine.datasources.reads.utilities;
import htsjdk.samtools.BAMIndex;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.*;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.instrumentation.Sizeof;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.List;
import java.util.Map;
@ -56,7 +55,7 @@ public class BAMFileStat extends CommandLineProgram {
@Argument(doc="The range to inspect.",required=false)
private String range;
public int execute() {
public int execute() throws IOException {
switch(command) {
case ShowBlocks:
throw new ReviewedGATKException("The BAM block inspector has been disabled.");
@ -81,14 +80,11 @@ public class BAMFileStat extends CommandLineProgram {
}
}
private void showIndexBins(File bamFile,String contigName) {
SAMFileReader reader;
BAMIndex index;
private void showIndexBins(File bamFile,String contigName) throws IOException {
reader = new SAMFileReader(bamFile);
reader.setValidationStringency(ValidationStringency.SILENT);
reader.enableIndexCaching(true);
index = reader.getIndex();
final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).
validationStringency(ValidationStringency.SILENT).open(bamFile);
final SamReader.Indexing index = reader.indexing();
reader.queryOverlapping(contigName,1,reader.getFileHeader().getSequence(contigName).getSequenceLength()).close();

View File

@ -25,14 +25,17 @@
package org.broadinstitute.gatk.engine.datasources.reads.utilities;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.File;
import java.io.IOException;
/**
* A simple utility written directly in Picard that will rename tags
@ -62,7 +65,7 @@ public class BAMTagRenamer extends CommandLineProgram {
long readsWritten = 0;
long readsAltered = 0;
SAMFileReader reader = new SAMFileReader(input);
final SamReader reader = SamReaderFactory.makeDefault().open(input);
SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(),true,output,compressionLevel);
for(SAMRecord read: reader) {
@ -79,7 +82,13 @@ public class BAMTagRenamer extends CommandLineProgram {
}
writer.close();
System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName);
System.out.printf("%d reads written. %d tag names updated from %s to %s.%n",readsWritten,readsAltered,sourceTagName,targetTagName);
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + input , ex);
}
return 0;
}

View File

@ -31,6 +31,7 @@ import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -53,9 +54,8 @@ public class PrintBAMRegion extends CommandLineProgram {
private static final int MIN_OFFSET_SIZE = 0;
private static final int MAX_OFFSET_SIZE = (int)Math.pow(2,16)-1;
public int execute() {
SAMFileReader reader = new SAMFileReader(input);
reader.setValidationStringency(ValidationStringency.SILENT);
public int execute() throws IOException {
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(input);
Pattern regionPattern = Pattern.compile("(\\d+):(\\d+)-(\\d+):(\\d+)");
Matcher matcher = regionPattern.matcher(region);
@ -76,10 +76,10 @@ public class PrintBAMRegion extends CommandLineProgram {
if(lastOffset < MIN_OFFSET_SIZE || lastOffset > MAX_OFFSET_SIZE)
throw new UserException(String.format("Last offset is invalid; must be between %d and %d; actually is %d",MIN_OFFSET_SIZE,MAX_OFFSET_SIZE,lastOffset));
GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset);
GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk);
final GATKChunk chunk = new GATKChunk(firstBlock<<16 | firstOffset,lastBlock<<16 | lastOffset);
final GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunk);
SAMRecordIterator iterator = reader.iterator(fileSpan);
final SAMRecordIterator iterator = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(fileSpan);
long readCount = 0;
while(iterator.hasNext()) {
System.out.printf("%s%n",iterator.next().format());

View File

@ -65,8 +65,6 @@ public class PrintBGZFBounds extends CommandLineProgram {
float uncompressedSize = 0;
long totalBlocks = 0;
//SAMFileReader reader = new SAMFileReader(input);
while(true) {
final long blockStart = fis.getChannel().position();

View File

@ -98,7 +98,7 @@ public abstract class OutputTracker implements ReferenceBacked {
ArgumentSource targetField = io.getKey();
Object targetValue = io.getValue();
// Ghastly hack: reaches in and finishes building out the SAMFileReader.
// Ghastly hack: reaches in and finishes building out the SameReader.
// TODO: Generalize this, and move it to its own initialization step.
if( targetValue instanceof SAMReaderBuilder) {
SAMReaderBuilder builder = (SAMReaderBuilder)targetValue;

View File

@ -31,6 +31,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface;
import htsjdk.samtools.util.RuntimeIOException;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
import org.broadinstitute.gatk.utils.exceptions.GATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
@ -112,7 +113,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
}
public void mergeInto( SAMFileWriter targetStream ) {
SAMFileReader reader = new SAMFileReader( file );
final SamReader reader = SamReaderFactory.makeDefault().open(file);
try {
CloseableIterator<SAMRecord> iterator = reader.iterator();
while( iterator.hasNext() )
@ -120,7 +121,11 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
iterator.close();
}
finally {
reader.close();
try {
reader.close();
} catch (IOException e ) {
throw new GATKException(e.getMessage());
}
file.delete();
}
}

View File

@ -25,7 +25,7 @@
package org.broadinstitute.gatk.engine.io.stubs;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -43,7 +43,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
private GenomeAnalysisEngine engine;
/**
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created.
* Create a new SamReader argument, notifying the given engine when that argument has been created.
* @param engine engine
*/
public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
@ -52,7 +52,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type);
return SamReader.class.isAssignableFrom(type);
}
@Override
@ -69,7 +69,7 @@ public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
// WARNING: Skipping required side-effect because stub is impossible to generate.
engine.addInput(source, builder);
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then
// MASSIVE KLUDGE! SamReader is tricky to implement and we don't yet have a stub. Return null, then
// let the output tracker load it in.
// TODO: Add a stub for SAMReader.
return null;

View File

@ -25,7 +25,10 @@
package org.broadinstitute.gatk.engine;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.gatk.utils.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -42,8 +45,11 @@ public class CommandLineGATKUnitTest extends BaseTest {
public void testSamTextFileError1() {
final File samFile = new File(publicTestDir + "testfile.sam");
final File indexFile = new File(publicTestDir + "HiSeq.1mb.1RG.bai");
final SamInputResource samInputResource = SamInputResource.of(samFile);
samInputResource.index(indexFile);
try {
final SAMFileReader reader = new SAMFileReader(samFile, indexFile, false);
final SamReader reader =
SamReaderFactory.makeDefault().open(samInputResource);
// we shouldn't get here
Assert.fail("We should have exceptioned out when trying to create a reader with an index for a textual SAM file");
@ -56,8 +62,9 @@ public class CommandLineGATKUnitTest extends BaseTest {
public void testSamTextFileError2() {
File samFile = new File(publicTestDir + "testfile.sam");
try {
final SAMFileReader reader = new SAMFileReader(samFile);
reader.getFilePointerSpanningReads();
final SamInputResource samInputResource = SamInputResource.of(samFile);
final SamReader reader = SamReaderFactory.makeDefault().open(samInputResource);
reader.indexing().getFilePointerSpanningReads();
// we shouldn't get here
Assert.fail("We should have exceptioned out when trying to call getFilePointerSpanningReads() for a textual SAM file");

View File

@ -344,7 +344,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------
@Test
public void testGATKEngineConsolidatesCigars() {
public void testGATKEngineConsolidatesCigars() throws IOException {
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
" -R " + b37KGReference +
" -I " + privateTestDir + "zero_length_cigar_elements.bam" +
@ -352,8 +352,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the cigar
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
final SAMFileReader reader = new SAMFileReader(outputBam);
reader.setValidationStringency(ValidationStringency.SILENT);
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(outputBam);
final SAMRecord read = reader.iterator().next();
reader.close();
@ -382,7 +381,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithSingleBamFile", spec).first.get(0);
final SAMFileReader reader = new SAMFileReader(outputBam);
final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), "myNewSampleName", String.format("Sample for read group %s not renamed correctly", readGroup.getId()));
@ -402,12 +401,12 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
final SAMFileReader inputBamReader = new SAMFileReader(inputBam);
final SamReader reader = SamReaderFactory.makeDefault().open(inputBam);
final String newSampleName = String.format("newSampleFor%s", inputBamID);
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
}
inputBamReader.close();
reader.close();
}
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
@ -420,10 +419,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam);
final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
totalReadGroupsSeen++;
@ -431,7 +430,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");
outputBamReader.close();
reader.close();
}
// On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam,
@ -446,15 +445,15 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
final Map<String, String> readGroupToNewSampleMap = new HashMap<>();
for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) {
final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID));
final SAMFileReader inputBamReader = new SAMFileReader(inputBam);
final SamReader reader = SamReaderFactory.makeDefault().open(inputBam);
// Special-case NA12891, which we're not renaming:
final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID);
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
readGroupToNewSampleMap.put(readGroup.getId(), newSampleName);
}
inputBamReader.close();
reader.close();
}
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
@ -467,10 +466,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam);
final SamReader reader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) {
Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()),
String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId()));
totalReadGroupsSeen++;
@ -478,7 +477,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file");
outputBamReader.close();
reader.close();
}
// On-the-fly sample renaming test case: two single-sample bams with read group collisions
@ -489,11 +488,11 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam newSampleForNot12878"));
final Set<String> na12878ReadGroups = new HashSet<>();
final SAMFileReader inputBamReader = new SAMFileReader(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam"));
for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) {
final SamReader inpuBAMreader = SamReaderFactory.makeDefault().open(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam"));
for ( final SAMReadGroupRecord readGroup : inpuBAMreader.getFileHeader().getReadGroups() ) {
na12878ReadGroups.add(readGroup.getId());
}
inputBamReader.close();
inpuBAMreader.close();
final WalkerTestSpec spec = new WalkerTestSpec(" -T TestPrintReadsWalker" +
" -R " + b37KGReference +
@ -504,10 +503,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
1, Arrays.asList("")); // No MD5s; we only want to check the read groups
final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0);
final SAMFileReader outputBamReader = new SAMFileReader(outputBam);
final SamReader outputBAMreader = SamReaderFactory.makeDefault().open(outputBam);
int totalReadGroupsSeen = 0;
for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) {
for ( final SAMReadGroupRecord readGroup : outputBAMreader.getFileHeader().getReadGroups() ) {
String expectedSampleName = "";
if ( na12878ReadGroups.contains(readGroup.getId()) ) {
expectedSampleName = "newSampleFor12878";
@ -523,7 +522,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file");
outputBamReader.close();
outputBAMreader.close();
}
// On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException)

View File

@ -55,12 +55,12 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
// public void timeDownsampling(int reps) {
// for(int i = 0; i < reps; i++) {
// SAMFileReader reader = new SAMFileReader(inputFile);
// SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
// ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
// reader.getFileHeader(),
// SAMFileHeader.SortOrder.coordinate,
// false,
// SAMFileReader.ValidationStringency.SILENT,
// ValidationStringency.SILENT,
// downsampling.create(),
// new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
// Collections.<ReadFilter>emptyList(),

View File

@ -25,7 +25,6 @@
package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest;
@ -50,8 +49,7 @@ public class GATKBAMIndexFromDataSourceUnitTest extends BaseTest {
@BeforeClass
public void init() throws IOException {
final SAMFileReader reader = new SAMFileReader(bamFile);
reader.enableIndexCaching(true); // needed ot get BrowseableBAMIndex
final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile);
Assert.assertTrue(reader.hasIndex());
Assert.assertTrue(reader.indexing().hasBrowseableIndex());

View File

@ -25,8 +25,9 @@
package org.broadinstitute.gatk.engine.datasources.reads;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.Assert;
@ -34,7 +35,7 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* Test basic functionality in the GATK's implementation of the BAM index classes.
@ -59,8 +60,8 @@ public class GATKBAMIndexFromFileUnitTest extends BaseTest {
@BeforeClass
public void init() throws FileNotFoundException {
final SAMFileReader reader = new SAMFileReader(bamFile);
public void init() throws IOException {
final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(bamFile);
sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
reader.close();

View File

@ -26,13 +26,13 @@
package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.SamLocusIterator;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
/**
@ -55,9 +55,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
@Override
public Integer getMaxReads() { return maxReads; }
public void timeDecompressBamFile(int reps) {
public void timeDecompressBamFile(int reps) throws IOException {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext())
iterator.next();
@ -66,9 +66,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
}
}
public void timeExtractTag(int reps) {
public void timeExtractTag(int reps) throws IOException {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) {
SAMRecord read = iterator.next();
@ -79,9 +79,9 @@ public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
}
}
public void timeSamLocusIterator(int reps) {
public void timeSamLocusIterator(int reps) throws IOException {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
long loci = 0;
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);

View File

@ -25,11 +25,11 @@
package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
@ -51,7 +51,7 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
@Override
public void setUp() {
SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile()));
SamReader reader = SamReaderFactory.makeDefault().open(new File(getBAMFile()));
File tempFile = null;
try {
@ -62,15 +62,20 @@ public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
}
SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(true);
SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile);
SAMFileWriter writer = factory.makeBAMWriter(reader.getFileHeader(),true,tempFile);
long numReads = 0;
for(SAMRecord read: fullInputFile) {
for(SAMRecord read: reader) {
if(numReads++ >= getMaxReads())
break;
writer.addAlignment(read);
}
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + getBAMFile() , ex);
}
writer.close();
inputFile = tempFile;

View File

@ -28,11 +28,13 @@ package org.broadinstitute.gatk.engine.datasources.reads;
import com.google.caliper.Param;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator;
import java.io.File;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
@ -54,10 +56,10 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
@Override
public Integer getMaxReads() { return maxReads; }
public void timeIterateOverEachBase(int reps) {
public void timeIterateOverEachBase(int reps) throws IOException {
System.out.printf("Processing " + inputFile);
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open((inputFile));
CloseableIterator<SAMRecord> iterator = reader.iterator();
long As=0,Cs=0,Gs=0,Ts=0;
@ -78,14 +80,14 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
}
}
public void timeIterateOverCigarString(int reps) {
public void timeIterateOverCigarString(int reps) throws IOException {
for(int i = 0; i < reps; i++) {
long matchMismatches = 0;
long insertions = 0;
long deletions = 0;
long others = 0;
SAMFileReader reader = new SAMFileReader(inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) {
SAMRecord read = iterator.next();

View File

@ -25,7 +25,7 @@
package org.broadinstitute.gatk.queue.extensions.gatk;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SAMFileWriter;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
@ -245,7 +245,7 @@ public abstract class ArgumentField {
protected static Class<?> mapType(Class<?> clazz) {
if (InputStream.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class;
if (SamReader.class.isAssignableFrom(clazz)) return File.class;
if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
if (VariantContextWriter.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;

View File

@ -27,7 +27,7 @@ package org.broadinstitute.gatk.queue.util
import java.io.File
import io.Source._
import htsjdk.samtools.{SAMReadGroupRecord, SAMFileReader}
import htsjdk.samtools.{SamReaderFactory, SAMReadGroupRecord}
import collection.JavaConversions._
@ -87,8 +87,10 @@ object QScriptUtils {
* Returns the number of contigs in the BAM file header.
*/
def getNumberOfContigs(bamFile: File): Int = {
val samReader = new SAMFileReader(bamFile)
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
val samReader = SamReaderFactory.makeDefault().open(bamFile)
val size = samReader.getFileHeader.getSequenceDictionary.getSequences.size()
samReader.close
return size
}
/**
@ -112,11 +114,12 @@ object QScriptUtils {
* @return a set with all distinct samples (in no particular order)
*/
def getSamplesFromBAM(bam: File) : Set[String] = {
val reader = new SAMFileReader(bam)
val reader = SamReaderFactory.makeDefault().open(bam);
var samples: Set[String] = Set()
for (rg <- reader.getFileHeader.getReadGroups) {
samples += rg.getSample
}
reader.close
samples
}
}

View File

@ -28,7 +28,7 @@ package org.broadinstitute.gatk.queue.util
import java.io.File
import org.apache.commons.io.FilenameUtils
import scala.io.Source._
import htsjdk.samtools.SAMFileReader
import htsjdk.samtools.{SamReaderFactory}
import htsjdk.variant.vcf.{VCFHeader, VCFCodec}
import scala.collection.JavaConversions._
import htsjdk.tribble.AbstractFeatureReader
@ -40,7 +40,7 @@ object VCF_BAM_utilities {
}
def getSamplesInBAM(bam: File): List[String] = {
return new SAMFileReader(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList
return SamReaderFactory.makeDefault().open(bam).getFileHeader().getReadGroups().toList.map(srgr => srgr.getSample()).toSet.toList
}
def parseBAMsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match {

View File

@ -25,11 +25,13 @@
package org.broadinstitute.gatk.utils.diffengine;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.BlockCompressedInputStream;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.*;
import java.util.Arrays;
@ -49,8 +51,7 @@ public class BAMDiffableReader implements DiffableReader {
@Override
public DiffElement readFromFile(File file, int maxElementsToRead) {
final SAMFileReader reader = new SAMFileReader(file, null); // null because we don't want it to look for the index
reader.setValidationStringency(ValidationStringency.SILENT);
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(file);
DiffNode root = DiffNode.rooted(file.getName());
SAMRecordIterator iterator = reader.iterator();
@ -93,7 +94,11 @@ public class BAMDiffableReader implements DiffableReader {
break;
}
reader.close();
try {
reader.close();
} catch (final IOException ex ) {
throw new ReviewedGATKException("Unable to close " + file , ex);
}
return root.getBinding();
}

View File

@ -25,7 +25,8 @@
package org.broadinstitute.gatk.utils.locusiterator;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.reference.ReferenceSequenceFile;
@ -65,7 +66,7 @@ public class LIBSPerformance extends CommandLineProgram {
final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
final SAMFileReader reader = new SAMFileReader(samFile);
final SamReader reader = SamReaderFactory.makeDefault().open(samFile);
SAMRecordIterator rawIterator;
if ( location == null )
@ -81,6 +82,8 @@ public class LIBSPerformance extends CommandLineProgram {
for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() )
samples.add(rg.getSample());
reader.close();
final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(downsample, 250);
final LocusIteratorByState libs =

View File

@ -28,7 +28,7 @@ package org.broadinstitute.gatk.utils.locusiterator;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
@ -149,7 +149,7 @@ public final class LocusIteratorByState extends LocusIterator {
}
/**
* Create a new LocusIteratorByState based on a SAMFileReader using reads in an iterator it
* Create a new LocusIteratorByState based on a SamReader using reads in an iterator it
*
* Simple constructor that uses the samples in the reader, doesn't do any downsampling,
* and makes a new GenomeLocParser using the reader. This constructor will be slow(ish)
@ -158,7 +158,7 @@ public final class LocusIteratorByState extends LocusIterator {
* @param reader a non-null reader
* @param it an iterator from reader that has the reads we want to use to create ReadBackPileups
*/
public LocusIteratorByState(final SAMFileReader reader, final CloseableIterator<SAMRecord> it) {
public LocusIteratorByState(final SamReader reader, final CloseableIterator<SAMRecord> it) {
this(new GATKSAMRecordIterator(it),
new LIBSDownsamplingInfo(false, 0),
true,

View File

@ -26,16 +26,19 @@
package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.*;
import htsjdk.samtools.SamReader.Indexing;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* User: hanna
* Date: Jun 11, 2009
@ -53,7 +56,13 @@ import java.util.List;
* Pass specified reads into the given walker.
*/
public class ArtificialSAMFileReader extends SAMFileReader {
public class ArtificialSAMFileReader implements SamReader, Indexing {
/**
* The reader of SamRecords
*/
private SamReader reader;
/**
* The parser, for GenomeLocs.
*/
@ -64,15 +73,20 @@ public class ArtificialSAMFileReader extends SAMFileReader {
*/
private final List<SAMRecord> reads;
/**
* Input/custom SAM file header
*/
private SAMFileHeader customHeader = null;
/**
* Construct an artificial SAM file reader.
*
* @param sequenceDictionary sequence dictionary used to initialize our GenomeLocParser
* @param reads Reads to use as backing data source.
*/
public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) {
super( createEmptyInputStream(),true );
final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream());
reader = SamReaderFactory.makeDefault().open(samInputResource);
this.genomeLocParser = new GenomeLocParser(sequenceDictionary);
this.reads = Arrays.asList(reads);
}
@ -84,30 +98,75 @@ public class ArtificialSAMFileReader extends SAMFileReader {
* @param reads Reads to use as backing data source.
*/
public ArtificialSAMFileReader( SAMFileHeader customHeader, SAMRecord... reads ) {
super(createEmptyInputStream(),true);
final SamInputResource samInputResource = SamInputResource.of(createEmptyInputStream());
reader = SamReaderFactory.makeDefault().open(samInputResource);
this.customHeader = customHeader;
this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary());
this.reads = Arrays.asList(reads);
}
@Override
public String getResourceDescription() {
return this.toString();
}
@Override
public SAMFileHeader getFileHeader() {
if ( customHeader != null ) {
return customHeader;
}
public boolean hasIndex() {
return this.reader.hasIndex();
}
return super.getFileHeader();
@Override
public Indexing indexing() {
return this;
}
@Override
public BrowseableBAMIndex getBrowseableIndex() {
BAMIndex index = this.getIndex();
if(!(index instanceof BrowseableBAMIndex)) {
throw new SAMException("Cannot return index: index created by BAM is not browseable.");
} else {
return BrowseableBAMIndex.class.cast(index);
}
}
@Override
public boolean hasBrowseableIndex() {
return this.hasIndex() && this.getIndex() instanceof BrowseableBAMIndex;
}
@Override
public BAMIndex getIndex() {
throw new UnsupportedOperationException();
}
@Override
public SAMRecordIterator iterator() {
return new SAMRecordIterator() {
private final Iterator<SAMRecord> iterator = reads.iterator();
public boolean hasNext() { return iterator.hasNext(); }
public SAMRecord next() { return iterator.next(); }
public void close() {}
public void remove() { iterator.remove(); }
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; }
};
}
/**
* @{inheritDoc}
* Iterate through the the file.
*
* @param chunks List of chunks for which to retrieve data.
* @return An iterator.
*/
@Override
public SAMRecordIterator iterator(SAMFileSpan chunks) {
return new SamReader.AssertingIterator(this.reader.iterator());
}
public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end);
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
List<SAMRecord> coveredSubset = new ArrayList<>();
for( SAMRecord read: reads ) {
GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read);
@ -126,15 +185,116 @@ public class ArtificialSAMFileReader extends SAMFileReader {
}
@Override
public SAMRecordIterator iterator() {
return new SAMRecordIterator() {
private final Iterator<SAMRecord> iterator = reads.iterator();
public boolean hasNext() { return iterator.hasNext(); }
public SAMRecord next() { return iterator.next(); }
public void close() {}
public void remove() { iterator.remove(); }
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; }
};
public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end) {
return this.query(sequence, start, end, false);
}
@Override
public SAMRecordIterator queryContained(final String sequence, final int start, final int end) {
return this.query(sequence, start, end, true);
}
@Override
public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) {
return new AssertingIterator(this.reader.query(intervals, contained));
}
@Override
public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) {
return this.query(intervals, false);
}
@Override
public SAMRecordIterator queryContained(final QueryInterval[] intervals) {
return this.query(intervals, true);
}
@Override
public SAMRecordIterator queryUnmapped() {
return new AssertingIterator(this.reader.queryUnmapped());
}
@Override
public SAMRecordIterator queryAlignmentStart(final String sequence, final int start) {
return new AssertingIterator(this.reader.queryAlignmentStart(sequence, start));
}
@Override
public SAMRecord queryMate(final SAMRecord rec) {
if(!rec.getReadPairedFlag()) {
throw new IllegalArgumentException("queryMate called for unpaired read.");
} else if(rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
} else {
boolean firstOfPair = rec.getFirstOfPairFlag();
SAMRecordIterator it;
if(rec.getMateReferenceIndex() == -1) {
it = this.queryUnmapped();
} else {
it = this.queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
}
try {
SAMRecord mateRec = null;
while(true) {
SAMRecord next;
while(it.hasNext()) {
next = it.next();
if(!next.getReadPairedFlag()) {
if(rec.getReadName().equals(next.getReadName())) {
throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
}
} else {
if(firstOfPair) {
if(next.getFirstOfPairFlag()) {
continue;
}
} else if(next.getSecondOfPairFlag()) {
continue;
}
if(rec.getReadName().equals(next.getReadName())) {
if(mateRec != null) {
throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() + " for " + (firstOfPair?"second":"first") + " end.");
}
mateRec = next;
}
}
}
next = mateRec;
return next;
}
} finally {
it.close();
}
}
}
@Override
public SAMFileSpan getFilePointerSpanningReads() {
return this.reader.indexing().getFilePointerSpanningReads();
}
@Override
public void close() throws IOException{
if(this.reader != null) {
this.reader.close();
}
this.reader = null;
}
@Override
public Type type() {
return this.reader.type();
}
@Override
public SAMFileHeader getFileHeader() {
return customHeader != null ? customHeader : this.reader.getFileHeader();
}
/**

View File

@ -30,9 +30,11 @@ package org.broadinstitute.gatk.utils;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
@ -50,6 +52,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
public class ExampleToCopyUnitTest extends BaseTest {
@ -217,13 +220,18 @@ public class ExampleToCopyUnitTest extends BaseTest {
// create a fake BAM file, and iterate through it
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
final File bam = bamBuilder.makeTemporarilyBAMFile();
final SAMFileReader reader = new SAMFileReader(bam);
final SamReader reader = SamReaderFactory.makeDefault().open(bam);
final Iterator<SAMRecord> bamIt = reader.iterator();
while ( bamIt.hasNext() ) {
final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords
// TODO -- add some tests that use reads from a BAM
}
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bam , ex);
}
}
/**

View File

@ -25,14 +25,17 @@
package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
@ -94,7 +97,7 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest {
}
final File bam = bamBuilder.makeTemporarilyBAMFile();
final SAMFileReader reader = new SAMFileReader(bam);
final SamReader reader = SamReaderFactory.makeDefault().open(bam);
Assert.assertTrue(reader.hasIndex());
final Iterator<SAMRecord> bamIt = reader.iterator();
int nReadsFromBam = 0;
@ -106,6 +109,11 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest {
Assert.assertTrue(read.getAlignmentStart() >= lastStart);
lastStart = read.getAlignmentStart();
}
try {
reader.close();
} catch ( IOException ex ) {
throw new ReviewedGATKException("Unable to close " + bam , ex);
}
Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
}