diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/sam/ClippedGATKSAMRecord.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/sam/ClippedGATKSAMRecord.java index 3baa98018..5f6ee422d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/sam/ClippedGATKSAMRecord.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/utils/sam/ClippedGATKSAMRecord.java @@ -51,6 +51,8 @@ package org.broadinstitute.gatk.utils.sam; +import htsjdk.samtools.GATKBin; + import java.util.Arrays; /** @@ -69,15 +71,30 @@ public class ClippedGATKSAMRecord extends GATKSAMRecord { * @param end inclusive last position in {@code read} included in the clipped view. */ public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) { - super(read.getHeader(), read.getReferenceIndex(), read.getAlignmentStart() + start, (short) read.getReadNameLength(), - (short) 100, -1, read.getCigarLength(), read.getFlags(), end - start, - read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(), - new byte[0]); + super(read.getHeader()); + this.setReferenceIndex(read.getReferenceIndex()); + this.setAlignmentStart(read.getAlignmentStart() + start); + this.setMappingQuality(100); + // setting read indexing bin below + this.setFlags(read.getFlags()); + this.setMateReferenceIndex(read.getMateReferenceIndex()); + this.setMateAlignmentStart(read.getMateAlignmentStart()); + this.setInferredInsertSize(read.getInferredInsertSize()); this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end)); this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end)); this.setReadName(read.getReadName()); insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end); deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end); + + // Set these to null in order to mark them as being candidates for lazy initialization. + // If this is not done, they will have non-null defaults. + super.setReadName(null); + super.setCigarString(null); + super.setReadBases(null); + super.setBaseQualities(null); + + // Do this after the above because setCigarString will clear it. + GATKBin.setReadIndexingBin(this, -1); } @Override diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTestDataSet.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTestDataSet.java index 81afc1816..3a12ee99c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTestDataSet.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTestDataSet.java @@ -51,6 +51,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; +import htsjdk.samtools.GATKBin; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; @@ -317,22 +318,18 @@ public class ActiveRegionTestDataSet { private class MyGATKSAMRecord extends GATKSAMRecord { protected MyGATKSAMRecord(final GATKSAMRecord r) { - super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(), - (short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(), - r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(), - new byte[0]); - this.setReadBases(r.getReadBases()); - this.setBaseQualities(r.getBaseQualities()); - this.setReadName(r.getReadName()); + super(r); + this.setMappingQuality(100); + GATKBin.setReadIndexingBin(this, -1); } ExponentialDistribution indelLengthDist = MathUtils.exponentialDistribution(1.0 / 0.9); public MyGATKSAMRecord(final GATKSAMRecord r, final Random rnd) { - super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(), - (short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(), - r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(), - new byte[0]); + super(r); + this.setMappingQuality(100); + // setting read indexing bin last + final byte[] bases = new byte[r.getReadBases().length]; final byte[] readBases = r.getReadBases(); @@ -384,7 +381,7 @@ public class ActiveRegionTestDataSet { this.setBaseQualities(r.getBaseQualities()); this.setReadName(r.getReadName()); - + GATKBin.setReadIndexingBin(this, -1); } private int generateIndelLength(final Random rnd) { diff --git a/public/external-example/src/test/java/org/mycompany/app/MyExampleWalkerIntegrationTest.java b/public/external-example/src/test/java/org/mycompany/app/MyExampleWalkerIntegrationTest.java index ee625a46d..c3461f23e 100644 --- a/public/external-example/src/test/java/org/mycompany/app/MyExampleWalkerIntegrationTest.java +++ b/public/external-example/src/test/java/org/mycompany/app/MyExampleWalkerIntegrationTest.java @@ -46,9 +46,13 @@ public class MyExampleWalkerIntegrationTest extends WalkerTest { } private File getResource(String path) throws URISyntaxException { + return new File(publicTestDir, path); + /* + TODO: Enable proper resource extraction from the test jars. For now just use the publicTestDir path. URL resourceUrl = getClass().getResource(path); if (resourceUrl == null) throw new MissingResourceException("Resource not found: " + path, getClass().getSimpleName(), path); return new File(resourceUrl.toURI()); + */ } } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java index c67ca5d35..447ceeecb 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GenomeAnalysisEngine.java @@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.io.stubs.Stub; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode; import org.broadinstitute.gatk.engine.phonehome.GATKRunReport; +import org.broadinstitute.gatk.utils.io.ReferenceBacked; import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils; import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet; @@ -697,9 +698,12 @@ public class GenomeAnalysisEngine { * @param outputTracker the tracker supplying the initialization data. */ private void initializeOutputStreams(final OutputTracker outputTracker) { - for (final Map.Entry input : getInputs().entrySet()) + for (final Map.Entry input : getInputs().entrySet()) { + setReferenceFile(input.getValue()); outputTracker.addInput(input.getKey(), input.getValue()); + } for (final Stub stub : getOutputs()) { + setReferenceFile(stub); stub.processArguments(argCollection); outputTracker.addOutput(stub); } @@ -707,6 +711,12 @@ public class GenomeAnalysisEngine { outputTracker.prepareWalker(walker, getArguments().strictnessLevel); } + private void setReferenceFile(final Object object) { + if (object instanceof ReferenceBacked) { + ((ReferenceBacked)object).setReferenceFile(argCollection.referenceFile); + } + } + public ReferenceDataSource getReferenceDataSource() { return referenceDataSource; } @@ -907,6 +917,7 @@ public class GenomeAnalysisEngine { final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker; return new SAMDataSource( + argCollection.referenceFile, samReaderIDs, threadAllocation, argCollection.numberOfBAMFileHandles, diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java index 9933a6152..79b853e6b 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSource.java @@ -29,6 +29,7 @@ import htsjdk.samtools.MergingSamRecordIterator; import htsjdk.samtools.SamFileHeaderMerger; import htsjdk.samtools.*; import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.ReadMetrics; @@ -49,7 +50,8 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule; import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator; import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter; import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator; import org.broadinstitute.gatk.utils.sam.SAMReaderID; import java.io.File; @@ -64,7 +66,8 @@ import java.util.concurrent.Callable; * Converts shards to SAM iterators over the specified region */ public class SAMDataSource { - final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); + /** Reference file */ + private final File referenceFile; /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -177,8 +180,11 @@ public class SAMDataSource { * * @param samFiles list of reads files. */ - public SAMDataSource(Collection samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) { + public SAMDataSource(final File referenceFile, final Collection samFiles, + final ThreadAllocation threadAllocation, final Integer numFileHandles, + final GenomeLocParser genomeLocParser) { this( + referenceFile, samFiles, threadAllocation, numFileHandles, @@ -198,6 +204,7 @@ public class SAMDataSource { * For testing purposes */ public SAMDataSource( + final File referenceFile, Collection samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, @@ -209,7 +216,8 @@ public class SAMDataSource { ValidationExclusion exclusionList, Collection supplementalFilters, boolean includeReadsWithDeletionAtLoci) { - this( samFiles, + this( referenceFile, + samFiles, threadAllocation, numFileHandles, genomeLocParser, @@ -230,6 +238,7 @@ public class SAMDataSource { /** * Create a new SAM data source given the supplied read metadata. + * @param referenceFile reference file. * @param samFiles list of reads files. * @param useOriginalBaseQualities True if original base qualities should be used. * @param strictness Stringency of reads file parsing. @@ -247,6 +256,7 @@ public class SAMDataSource { * @param intervalMergingRule how are adjacent intervals merged by the sharder */ public SAMDataSource( + final File referenceFile, Collection samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, @@ -265,6 +275,7 @@ public class SAMDataSource { final Map sampleRenameMap, final IntervalMergingRule intervalMergingRule) { + this.referenceFile = referenceFile; this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; this.intervalMergingRule = intervalMergingRule; @@ -303,7 +314,7 @@ public class SAMDataSource { "Please check that the file is present and readable and try again."); // Get the sort order, forcing it to coordinate if unsorted. - SAMFileReader reader = readers.getReader(readerID); + SamReader reader = readers.getReader(readerID); SAMFileHeader header = reader.getFileHeader(); headers.put(readerID,header); @@ -343,7 +354,7 @@ public class SAMDataSource { // cache the read group id (original) -> read group id (merged) // and read group id (merged) -> read group id (original) mappings. for(SAMReaderID id: readerIDs) { - SAMFileReader reader = readers.getReader(id); + SamReader reader = readers.getReader(id); ReadGroupMapping mappingToMerged = new ReadGroupMapping(); @@ -385,8 +396,8 @@ public class SAMDataSource { public void close() { SAMReaders readers = resourcePool.getAvailableReaders(); for(SAMReaderID readerID: readerIDs) { - SAMFileReader reader = readers.getReader(readerID); - reader.close(); + SamReader reader = readers.getReader(readerID); + CloserUtil.close(reader); } } @@ -463,14 +474,6 @@ public class SAMDataSource { return mergedToOriginalReadGroupMappings.get(mergedReadGroupId); } - /** - * True if all readers have an index. - * @return True if all readers have an index. - */ - public boolean hasIndex() { - return readerIDs.size() == bamIndices.size(); - } - /** * Gets the index for a particular reader. Always preloaded. * @param id Id of the reader. @@ -480,6 +483,44 @@ public class SAMDataSource { return bamIndices.get(id); } + /** + * Return true if the index for a particular reader exists. + * @param id Id of the reader. + * @return True if the index exists. + */ + public boolean hasIndex(final SAMReaderID id) { + return bamIndices.containsKey(id); + } + + /** + * True if all readers that require an index for SAMFileSpan creation have an index. + * @return True if all readers that require an index for SAMFileSpan creation have an index. + */ + public boolean hasIndex() { + for (final SAMReaderID readerID: readerIDs) + if (isSAMFileSpanSupported(readerID)) + if (!hasIndex(readerID)) + return false; + return true; + } + /** + * Returns true if the reader can use file spans. + * @return true if file spans are supported. + */ + private boolean isSAMFileSpanSupported(final SAMReaderID readerID) { + // example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/BAMFileReader.java#L341 + return readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.BAM_TYPE.fileExtension()); + } + + /** + * Returns true if the reader caches its SAMFileHeader for each iterator. + * @return true if this reader caches its SAMFileHeader for each iterator. + */ + private boolean isIteratorSAMFileHeaderCached(final SAMReaderID readerID) { + // example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/CRAMFileReader.java#L183 + return !readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.CRAM_TYPE.fileExtension()); + } + /** * Retrieves the sort order of the readers. * @return Sort order. Can be unsorted, coordinate order, or query name order. @@ -538,7 +579,17 @@ public class SAMDataSource { SAMReaders readers = resourcePool.getAvailableReaders(); for ( SAMReaderID id: getReaderIDs() ) { - initialPositions.put(id, new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads())); + GATKBAMFileSpan span; + try { + span = new GATKBAMFileSpan(readers.getReader(id).indexing().getFilePointerSpanningReads()); + } catch (RuntimeException e) { + if ("Not implemented.".equals(e.getMessage())) { https://github.com/samtools/htsjdk/blob/035d4319643657d715e93c53c13fe4a1f64e0188/src/java/htsjdk/samtools/CRAMFileReader.java#L197 + span = new GATKBAMFileSpan(new GATKChunk(0, Long.MAX_VALUE)); + } else { + throw e; + } + } + initialPositions.put(id, span); } resourcePool.releaseReaders(readers); @@ -567,7 +618,7 @@ public class SAMDataSource { Map> iteratorMap = new HashMap<>(); for(SAMReaderID id: getReaderIDs()) { - CloseableIterator iterator = null; + CloseableIterator iterator; // TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin. // TODO: Kill this check once we've proven that the design elements are gone. @@ -576,19 +627,33 @@ public class SAMDataSource { try { if(threadAllocation.getNumIOThreads() > 0) { + // TODO: need to add friendly error if -nit is used with non BAM. Later, possibly add this capability with CRAM when htsjdk supports CRAM file spans are supported. BlockInputStream inputStream = readers.getInputStream(id); inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id))); - BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory); + BAMRecordCodec codec = new BAMRecordCodec(getHeader(id)); codec.setInputStream(inputStream); iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec); } else { - iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id)); + final SamReader reader = readers.getReader(id); + try { + iterator = ((SamReader.Indexing)reader).iterator(shard.getFileSpans().get(id)); + } catch (RuntimeException re) { + if ("Not implemented.".equals(re.getMessage())) { // https://github.com/samtools/htsjdk/blob/429f2a8585d9c98a3efd4cedc5188b60b1e66ac5/src/java/htsjdk/samtools/CRAMFileReader.java#L192 + // No way to jump into the file span. Query the whole file. + iterator = readers.getReader(id).iterator(); + } else { + throw re; + } + } } } catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes throw new UserException.MalformedBAM(id.getSamFile(), e.getMessage()); } + // At the moment, too many other classes to change for GATKSAMRecordIterator converter. + // Force the compiler to just let the conversion happen, since generics are erased anyway. + iterator = (CloseableIterator)(Object)new GATKSAMRecordIterator(iterator); iterator = new MalformedBAMErrorReformatingIterator(id.getSamFile(), iterator); if(shard.getGenomeLocs().size() > 0) iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs()); @@ -614,11 +679,11 @@ public class SAMDataSource { private class BAMCodecIterator implements CloseableIterator { private final BlockInputStream inputStream; - private final SAMFileReader reader; + private final SamReader reader; private final BAMRecordCodec codec; private SAMRecord nextRead; - private BAMCodecIterator(final BlockInputStream inputStream, final SAMFileReader reader, final BAMRecordCodec codec) { + private BAMCodecIterator(final BlockInputStream inputStream, final SamReader reader, final BAMRecordCodec codec) { this.inputStream = inputStream; this.reader = reader; this.codec = codec; @@ -823,7 +888,7 @@ public class SAMDataSource { /** * A collection of readers derived from a reads metadata structure. */ - private class SAMReaders implements Iterable { + private class SAMReaders implements Iterable { /** * Cached representation of the merged header used to generate a merging iterator. */ @@ -832,7 +897,7 @@ public class SAMDataSource { /** * Internal storage for a map of id -> reader. */ - private final Map readers = new LinkedHashMap(); + private final Map readers = new LinkedHashMap<>(); /** * The inptu streams backing @@ -860,7 +925,11 @@ public class SAMDataSource { checkForUnsupportedBamFile(init.reader.getFileHeader()); - if (removeProgramRecords) { + if (removeProgramRecords && isIteratorSAMFileHeaderCached(readerID)) { + // Only works when the SamReader implementation caches its header. + // Some implementations (ex: CRAM) rewrite the new underlying file header in reader.getIterator(). + // Later, when MergingSamRecordIterator goes to check the headers with .contains()/.equals(), + // it will error out complaining it can't find the unmodified version of the header. init.reader.getFileHeader().setProgramRecords(new ArrayList()); } @@ -883,9 +952,9 @@ public class SAMDataSource { // Examine the bam headers, perform any requested sample renaming on them, and add // them to the list of headers to pass to the Picard SamFileHeaderMerger: - for ( final Map.Entry readerEntry : readers.entrySet() ) { + for ( final Map.Entry readerEntry : readers.entrySet() ) { final SAMReaderID readerID = readerEntry.getKey(); - final SAMFileReader reader = readerEntry.getValue(); + final SamReader reader = readerEntry.getValue(); final SAMFileHeader header = reader.getFileHeader(); // The remappedSampleName will be null if either no on-the-fly sample renaming was requested, @@ -1009,7 +1078,7 @@ public class SAMDataSource { * @param id The ID of the reader to retrieve. * @return the reader associated with the given id. */ - public SAMFileReader getReader(SAMReaderID id) { + public SamReader getReader(SAMReaderID id) { if(!readers.containsKey(id)) throw new NoSuchElementException("No reader is associated with id " + id); return readers.get(id); @@ -1030,7 +1099,7 @@ public class SAMDataSource { * @return The id associated the given reader, or null if the reader is not present in this collection. */ protected SAMReaderID getReaderID(SamReader reader) { - for(Map.Entry entry: readers.entrySet()) { + for(Map.Entry entry: readers.entrySet()) { if(reader == entry.getValue()) return entry.getKey(); } @@ -1042,7 +1111,7 @@ public class SAMDataSource { * Returns an iterator over all readers in this structure. * @return An iterator over readers. */ - public Iterator iterator() { + public Iterator iterator() { return readers.values().iterator(); } @@ -1058,18 +1127,23 @@ public class SAMDataSource { class ReaderInitializer implements Callable { final SAMReaderID readerID; BlockInputStream blockInputStream = null; - SAMFileReader reader; + SamReader reader; public ReaderInitializer(final SAMReaderID readerID) { this.readerID = readerID; } public ReaderInitializer call() { - final File indexFile = findIndexFile(readerID.getSamFile()); try { if (threadAllocation.getNumIOThreads() > 0) blockInputStream = new BlockInputStream(dispatcher,readerID,false); - reader = new SAMFileReader(readerID.getSamFile(),indexFile,false); + reader = SamReaderFactory.makeDefault() + .referenceSequence(referenceFile) + .validationStringency(validationStringency) + .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) + .setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true) + .open(readerID.getSamFile()); + } catch ( RuntimeIOException e ) { throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e); } catch ( SAMFormatException e ) { @@ -1081,9 +1155,6 @@ public class SAMDataSource { catch ( RuntimeException e ) { throw new UserException.MalformedBAM(readerID.getSamFile(), e.getMessage()); } - reader.setSAMRecordFactory(factory); - reader.enableFileSource(true); - reader.setValidationStringency(validationStringency); return this; } } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/FindLargeShards.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/FindLargeShards.java index 231bbc4ef..b6869f0b9 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/FindLargeShards.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/datasources/reads/utilities/FindLargeShards.java @@ -93,7 +93,7 @@ public class FindLargeShards extends CommandLineProgram { // initialize reads List bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser); - SAMDataSource dataSource = new SAMDataSource(bamReaders,new ThreadAllocation(),null,genomeLocParser); + SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser); // intervals final GenomeLocSortedSet intervalSortedSet; diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java index 8f2fbe340..d5925900c 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/OutputTracker.java @@ -25,8 +25,8 @@ package org.broadinstitute.gatk.engine.io; -import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.ValidationStringency; +import org.broadinstitute.gatk.utils.io.ReferenceBacked; import org.broadinstitute.gatk.utils.commandline.ArgumentSource; import org.broadinstitute.gatk.engine.io.storage.Storage; import org.broadinstitute.gatk.engine.io.storage.StorageFactory; @@ -37,7 +37,7 @@ import org.broadinstitute.gatk.utils.classloader.JVMUtils; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.io.IOUtils; -import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder; +import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder; import java.io.File; import java.io.OutputStream; @@ -49,7 +49,12 @@ import java.util.Map; * Manages the output and err streams that are created specifically for walker * output. */ -public abstract class OutputTracker { +public abstract class OutputTracker implements ReferenceBacked { + /** + * The reference file. + */ + private File referenceFile; + /** * The streams to which walker users should be reading directly. */ @@ -78,6 +83,16 @@ public abstract class OutputTracker { */ public abstract T getStorage( Stub stub ); + @Override + public File getReferenceFile() { + return referenceFile; + } + + @Override + public void setReferenceFile(final File referenceFile) { + this.referenceFile = referenceFile; + } + public void prepareWalker( Walker walker, ValidationStringency strictnessLevel ) { for( Map.Entry io: inputs.entrySet() ) { ArgumentSource targetField = io.getKey(); @@ -85,8 +100,8 @@ public abstract class OutputTracker { // Ghastly hack: reaches in and finishes building out the SAMFileReader. // TODO: Generalize this, and move it to its own initialization step. - if( targetValue instanceof SAMFileReaderBuilder) { - SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue; + if( targetValue instanceof SAMReaderBuilder) { + SAMReaderBuilder builder = (SAMReaderBuilder)targetValue; builder.setValidationStringency(strictnessLevel); targetValue = builder.build(); } diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java index 3956e6e0b..68943f887 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/storage/SAMFileWriterStorage.java @@ -35,6 +35,9 @@ import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter; import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -46,6 +49,7 @@ import java.lang.reflect.Method; */ public class SAMFileWriterStorage implements SAMFileWriter, Storage { private final File file; + private File referenceFasta; private SAMFileWriter writer; private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class); @@ -55,6 +59,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage, GATKSAMFileWriter { +public class SAMFileWriterStub implements Stub, GATKSAMFileWriter, ReferenceBacked { /** * Engine to use for collecting attributes for the output SAM file. */ @@ -67,6 +68,11 @@ public class SAMFileWriterStub implements Stub, GATKSAMFileWriter */ private final File samFile; + /** + * The reference file for stub. + */ + private File referenceFile; + /** * The target output stream, to be used in place of the SAM file. */ @@ -189,6 +195,16 @@ public class SAMFileWriterStub implements Stub, GATKSAMFileWriter return samOutputStream; } + @Override + public File getReferenceFile() { + return referenceFile; + } + + @Override + public void setReferenceFile(final File referenceFile) { + this.referenceFile = referenceFile; + } + /** * Retrieves the header to use when creating the new SAM file. * @return header to use when creating the new SAM file. diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMReaderArgumentTypeDescriptor.java similarity index 87% rename from public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileReaderArgumentTypeDescriptor.java rename to public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMReaderArgumentTypeDescriptor.java index 42397cb9a..3a440488d 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/SAMReaderArgumentTypeDescriptor.java @@ -29,14 +29,14 @@ import htsjdk.samtools.SAMFileReader; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.utils.exceptions.UserException; -import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder; +import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder; import java.lang.reflect.Type; /** - * Describe how to parse SAMFileReaders. + * Describe how to parse SAMReaders. */ -public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor { +public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor { /** * The engine into which output stubs should be fed. */ @@ -46,7 +46,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor * Create a new SAMFileReader argument, notifying the given engine when that argument has been created. * @param engine engine */ - public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) { + public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) { this.engine = engine; } @@ -57,7 +57,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor @Override public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { - SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); + SAMReaderBuilder builder = new SAMReaderBuilder(); ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); @@ -71,7 +71,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor // MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then // let the output tracker load it in. - // TODO: Add a stub for SAMFileReader. + // TODO: Add a stub for SAMReader. return null; } } diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java index bfa3bfbc3..bae83b072 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java @@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; @@ -312,7 +311,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0); final SAMFileReader reader = new SAMFileReader(outputBam); reader.setValidationStringency(ValidationStringency.SILENT); - reader.setSAMRecordFactory(new GATKSamRecordFactory()); final SAMRecord read = reader.iterator().next(); reader.close(); diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/ReadMetricsUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/ReadMetricsUnitTest.java index dcc58ea36..50c7f8222 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/ReadMetricsUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/ReadMetricsUnitTest.java @@ -81,6 +81,7 @@ public class ReadMetricsUnitTest extends BaseTest { // Test the accuracy of the read metrics + private File referenceFile; private IndexedFastaSequenceFile reference; private SAMSequenceDictionary dictionary; private SAMFileHeader header; @@ -93,7 +94,8 @@ public class ReadMetricsUnitTest extends BaseTest { @BeforeClass private void init() throws IOException { - reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); + referenceFile = new File(b37KGReference); + reference = new CachingIndexedFastaSequenceFile(referenceFile); dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); @@ -149,7 +151,7 @@ public class ReadMetricsUnitTest extends BaseTest { final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); samFiles.add(readerID); - final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser, false, ValidationStringency.STRICT, null, @@ -184,7 +186,7 @@ public class ReadMetricsUnitTest extends BaseTest { final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); samFiles.add(readerID); - final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser, false, ValidationStringency.STRICT, null, @@ -225,7 +227,7 @@ public class ReadMetricsUnitTest extends BaseTest { final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); samFiles.add(readerID); - final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser, false, ValidationStringency.STRICT, null, @@ -272,7 +274,7 @@ public class ReadMetricsUnitTest extends BaseTest { final List filters = new ArrayList<>(); filters.add(new EveryTenthReadFilter()); - final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser, false, ValidationStringency.STRICT, null, diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java new file mode 100644 index 000000000..770d951b0 --- /dev/null +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/arguments/CramIntegrationTest.java @@ -0,0 +1,74 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.engine.arguments; + +import org.broadinstitute.gatk.engine.walkers.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Test the GATK core CRAM parsing mechanism. + */ +public class CramIntegrationTest extends WalkerTest { + @DataProvider(name="cramData") + public Object[][] getCRAMData() { + return new Object[][] { + {"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, + //{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148 + {"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"}, + {"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"}, + {"PrintReads", "exampleCRAM-noindex.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"}, + {"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"}, + {"CountLoci", "exampleCRAM-noindex.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"}, + {"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, + {"CountLoci", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, + {"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"}, + {"CountReads", "exampleCRAM-noindex.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"}, + {"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, + {"CountReads", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, + {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "9598062587ad8d2ec596a8ecb19be979"}, + {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, + {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, + }; + } + + @Test(dataProvider = "cramData") + public void testCRAM(String walker, String input, String args, String ext, String md5) { + WalkerTestSpec spec = new WalkerTestSpec( + " -T Test" + walker + "Walker" + + " -I " + publicTestDir + input + + " -R " + exampleFASTA + + args + + " -o %s", + 1, // just one output file + Arrays.asList(ext), + Arrays.asList(md5)); + executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec); + } +} diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/providers/LocusViewTemplate.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/providers/LocusViewTemplate.java index 8b476e6ef..72f2bb1ee 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/providers/LocusViewTemplate.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/providers/LocusViewTemplate.java @@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(); GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); + Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(null,Collections.emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs()); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null); diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/MockLocusShard.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/MockLocusShard.java index 896549adf..bed203b3d 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/MockLocusShard.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/MockLocusShard.java @@ -42,7 +42,7 @@ import java.util.Collections; public class MockLocusShard extends LocusShard { public MockLocusShard(final GenomeLocParser genomeLocParser,final List intervals) { super( genomeLocParser, - new SAMDataSource(Collections.emptyList(),new ThreadAllocation(),null,genomeLocParser), + new SAMDataSource(null, Collections.emptyList(),new ThreadAllocation(),null,genomeLocParser), intervals, null); } diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadShardBalancerUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadShardBalancerUnitTest.java index 3836409b9..c4f6159a1 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadShardBalancerUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/ReadShardBalancerUnitTest.java @@ -84,7 +84,8 @@ public class ReadShardBalancerUnitTest extends BaseTest { public void run() { createTestBAM(); - SAMDataSource dataSource = new SAMDataSource(Arrays.asList(testBAM), + SAMDataSource dataSource = new SAMDataSource(null, // Reference not used in this test. + Arrays.asList(testBAM), new ThreadAllocation(), null, new GenomeLocParser(header.getSequenceDictionary()), diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSourceUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSourceUnitTest.java index eb1915f0c..8be72a22c 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/datasources/reads/SAMDataSourceUnitTest.java @@ -63,6 +63,7 @@ public class SAMDataSourceUnitTest extends BaseTest { // TODO: These legacy tests should really be replaced with a more comprehensive suite of tests for SAMDataSource private List readers; + private File referenceFile; private IndexedFastaSequenceFile seq; private GenomeLocParser genomeLocParser; @@ -76,7 +77,8 @@ public class SAMDataSourceUnitTest extends BaseTest { readers = new ArrayList(); // sequence - seq = new CachingIndexedFastaSequenceFile(new File(b36KGReference)); + referenceFile = new File(b36KGReference); + seq = new CachingIndexedFastaSequenceFile(referenceFile); genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary()); } @@ -101,7 +103,9 @@ public class SAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags())); // the sharding strat. - SAMDataSource data = new SAMDataSource(readers, + SAMDataSource data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, @@ -155,7 +159,9 @@ public class SAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags())); // use defaults - SAMDataSource data = new SAMDataSource(readers, + SAMDataSource data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, @@ -171,7 +177,9 @@ public class SAMDataSourceUnitTest extends BaseTest { assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor"); boolean removeProgramRecords = false; - data = new SAMDataSource(readers, + data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, @@ -192,7 +200,9 @@ public class SAMDataSourceUnitTest extends BaseTest { assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); removeProgramRecords = true; - data = new SAMDataSource(readers, + data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, @@ -217,7 +227,9 @@ public class SAMDataSourceUnitTest extends BaseTest { public void testFailOnReducedReads() { readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags())); - SAMDataSource data = new SAMDataSource(readers, + SAMDataSource data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, @@ -234,7 +246,9 @@ public class SAMDataSourceUnitTest extends BaseTest { public void testFailOnReducedReadsRemovingProgramRecords() { readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags())); - SAMDataSource data = new SAMDataSource(readers, + SAMDataSource data = new SAMDataSource( + referenceFile, + readers, new ThreadAllocation(), null, genomeLocParser, diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/ReadFilterTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/ReadFilterTest.java index 4e801815f..d997f3758 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/ReadFilterTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/filters/ReadFilterTest.java @@ -37,6 +37,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import java.io.File; import java.util.*; /** @@ -313,6 +314,7 @@ public class ReadFilterTest extends BaseTest { protected SAMDataSource composeDataSource() { checkHeaderExists(); + final File referenceFile = null; // Not used in this test. final Set readerIDs = new HashSet<>(1); final ThreadAllocation ta = new ThreadAllocation(); final Integer numFileHandles = 1; // I believe that any value would do but need to confirm. @@ -326,6 +328,7 @@ public class ReadFilterTest extends BaseTest { final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary()); final SAMDataSource res = new SAMDataSource( + referenceFile, readerIDs, ta, numFileHandles, diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegionsUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegionsUnitTest.java index b8b9c75f1..5b710a10d 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegionsUnitTest.java @@ -79,6 +79,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { return traversals.toArray(new Object[][]{}); } + private File referenceFile; private IndexedFastaSequenceFile reference; private SAMSequenceDictionary dictionary; private GenomeLocParser genomeLocParser; @@ -90,7 +91,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { @BeforeClass private void init() throws IOException { //reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference)); - reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); + referenceFile = new File(hg19Reference); + reference = new CachingIndexedFastaSequenceFile(referenceFile); dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); @@ -470,7 +472,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { SAMReaderID readerID = new SAMReaderID(bamFile, new Tags()); samFiles.add(readerID); - SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser, + SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser, false, ValidationStringency.STRICT, null, diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseReadsUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseReadsUnitTest.java index 3e6b3f2f4..a03802635 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseReadsUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/traversals/TraverseReadsUnitTest.java @@ -138,7 +138,7 @@ public class TraverseReadsUnitTest extends BaseTest { /** Test out that we can shard the file and iterate over every read */ @Test public void testUnmappedReadCount() { - SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser); + SAMDataSource dataSource = new SAMDataSource(refFile, bamList,new ThreadAllocation(),null,genomeLocParser); Iterable shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()); countReadWalker.initialize(); diff --git a/public/gatk-engine/src/main/java/htsjdk/samtools/GATKBAMFileSpan.java b/public/gatk-utils/src/main/java/htsjdk/samtools/GATKBAMFileSpan.java similarity index 100% rename from public/gatk-engine/src/main/java/htsjdk/samtools/GATKBAMFileSpan.java rename to public/gatk-utils/src/main/java/htsjdk/samtools/GATKBAMFileSpan.java diff --git a/public/gatk-engine/src/main/java/htsjdk/samtools/GATKBin.java b/public/gatk-utils/src/main/java/htsjdk/samtools/GATKBin.java similarity index 89% rename from public/gatk-engine/src/main/java/htsjdk/samtools/GATKBin.java rename to public/gatk-utils/src/main/java/htsjdk/samtools/GATKBin.java index d1e689d63..a4a7c7246 100644 --- a/public/gatk-engine/src/main/java/htsjdk/samtools/GATKBin.java +++ b/public/gatk-utils/src/main/java/htsjdk/samtools/GATKBin.java @@ -132,4 +132,15 @@ public class GATKBin implements Comparable { return new GATKChunk[0]; return chunkList; } + + // HACK: Using this classes package permissions to further hack the CRAM created SAMRecord's indexing bin and binary attributes. + public static Integer getReadIndexingBin(final SAMRecord read) { + return read.getIndexingBin(); + } + public static void setReadIndexingBin(final SAMRecord read, final Integer indexingBin) { + read.setIndexingBin(indexingBin); + } + public static SAMBinaryTagAndValue getReadBinaryAttributes(final SAMRecord read) { + return read.getBinaryAttributes(); + } } diff --git a/public/gatk-engine/src/main/java/htsjdk/samtools/GATKChunk.java b/public/gatk-utils/src/main/java/htsjdk/samtools/GATKChunk.java similarity index 100% rename from public/gatk-engine/src/main/java/htsjdk/samtools/GATKChunk.java rename to public/gatk-utils/src/main/java/htsjdk/samtools/GATKChunk.java diff --git a/public/gatk-engine/src/main/java/htsjdk/samtools/PicardNamespaceUtils.java b/public/gatk-utils/src/main/java/htsjdk/samtools/PicardNamespaceUtils.java similarity index 100% rename from public/gatk-engine/src/main/java/htsjdk/samtools/PicardNamespaceUtils.java rename to public/gatk-utils/src/main/java/htsjdk/samtools/PicardNamespaceUtils.java diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java index 33d09c3ad..bfb0ca039 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/exceptions/UserException.java @@ -225,20 +225,20 @@ public class UserException extends ReviewedGATKException { public static class MissortedBAM extends UserException { public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) { - super(String.format("Missorted Input SAM/BAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder())); + super(String.format("Missorted Input SAM/BAM/CRAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder())); } public MissortedBAM(SAMFileHeader.SortOrder order, String message) { - super(String.format("Missorted Input SAM/BAM files: files are not sorted in %s order; %s", order, message)); + super(String.format("Missorted Input SAM/BAM/CRAM files: files are not sorted in %s order; %s", order, message)); } public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) { - super(String.format("Missorted Input SAM/BAM file %s: file sorted in %s order but %s is required; %s", + super(String.format("Missorted Input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required; %s", read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message)); } public MissortedBAM(String message) { - super(String.format("Missorted Input SAM/BAM files: %s", message)); + super(String.format("Missorted Input SAM/BAM/CRAM files: %s", message)); } } @@ -252,7 +252,7 @@ public class UserException extends ReviewedGATKException { } public MalformedBAM(String source, String message) { - super(String.format("SAM/BAM file %s is malformed: %s", source, message)); + super(String.format("SAM/BAM/CRAM file %s is malformed: %s", source, message)); } } @@ -262,7 +262,7 @@ public class UserException extends ReviewedGATKException { } public MisencodedBAM(String source, String message) { - super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message)); + super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message)); } } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/io/ReferenceBacked.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/io/ReferenceBacked.java new file mode 100644 index 000000000..1cf0aea2b --- /dev/null +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/io/ReferenceBacked.java @@ -0,0 +1,33 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.utils.io; + +import java.io.File; + +public interface ReferenceBacked { + public File getReferenceFile(); + public void setReferenceFile(final File reference); +} diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java index d4c22a6ad..206249559 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/locusiterator/LIBSPerformance.java @@ -37,7 +37,6 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator; import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory; import java.io.File; import java.io.IOException; @@ -67,7 +66,6 @@ public class LIBSPerformance extends CommandLineProgram { final GenomeLocParser genomeLocParser = new GenomeLocParser(reference); final SAMFileReader reader = new SAMFileReader(samFile); - reader.setSAMRecordFactory(new GATKSamRecordFactory()); SAMRecordIterator rawIterator; if ( location == null ) diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java index 427b12e00..c0390a3a9 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/ArtificialSAMFileReader.java @@ -26,6 +26,7 @@ package org.broadinstitute.gatk.utils.sam; import htsjdk.samtools.*; +import htsjdk.samtools.cram.build.CramIO; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; @@ -149,7 +150,7 @@ public class ArtificialSAMFileReader extends SAMFileReader { byte[] byteArray = "".getBytes("ISO-8859-1"); return new ByteArrayInputStream(byteArray); } - catch( UnsupportedEncodingException ex ) { + catch( Exception ex ) { throw new ReviewedGATKException("Unable to build empty input stream",ex); } } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecord.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecord.java index 0080f01df..968ae0b89 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecord.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecord.java @@ -49,7 +49,7 @@ import java.util.*; * Changing these values in any way will invalidate the cached value. However, we do not monitor those setter * functions, so modifying a GATKSAMRecord in any way may result in stale cached values. */ -public class GATKSAMRecord extends BAMRecord implements Cloneable { +public class GATKSAMRecord extends SAMRecord implements Cloneable { // Base Quality Score Recalibrator specific attribute tags public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions @@ -92,42 +92,36 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable { * @param read */ public GATKSAMRecord(final SAMRecord read) { - super(read.getHeader(), - read.getReferenceIndex(), - read.getAlignmentStart(), - read.getReadName() != null ? (short)read.getReadNameLength() : 0, - (short)read.getMappingQuality(), - 0, - read.getCigarLength(), - read.getFlags(), - read.getReadLength(), - read.getMateReferenceIndex(), - read.getMateAlignmentStart(), - read.getInferredInsertSize(), - null); + super(read.getHeader()); + super.setReferenceIndex(read.getReferenceIndex()); + super.setAlignmentStart(read.getAlignmentStart()); + super.setReadName(read.getReadName()); + super.setMappingQuality(read.getMappingQuality()); + // indexing bin done below + super.setCigar(read.getCigar()); + super.setFlags(read.getFlags()); + super.setMateReferenceIndex(read.getMateReferenceIndex()); + super.setMateAlignmentStart(read.getMateAlignmentStart()); + super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); - clearAttributes(); + SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); + if (samAttr == null) { + clearAttributes(); + } else { + setAttributes(samAttr); + } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } - } - public GATKSAMRecord(final SAMFileHeader header, - final int referenceSequenceIndex, - final int alignmentStart, - final short readNameLength, - final short mappingQuality, - final int indexingBin, - final int cigarLen, - final int flags, - final int readLen, - final int mateReferenceSequenceIndex, - final int mateAlignmentStart, - final int insertSize, - final byte[] variableLengthBlock) { - super(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen, - flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock); + super.setFileSource(read.getFileSource()); + super.setReadName(read.getReadName()); + super.setCigarString(read.getCigarString()); + super.setReadBases(read.getReadBases()); + super.setBaseQualities(read.getBaseQualities()); + // From SAMRecord constructor: Do this after the above because setCigarString will clear it. + GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); } public static GATKSAMRecord createRandomRead(int length) { @@ -520,19 +514,15 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable { * @return a read with no bases but safe for the GATK */ public static GATKSAMRecord emptyRead(GATKSAMRecord read) { - GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(), - read.getReferenceIndex(), - 0, - (short) 0, - (short) 0, - 0, - 0, - read.getFlags(), - 0, - read.getMateReferenceIndex(), - read.getMateAlignmentStart(), - read.getInferredInsertSize(), - null); + final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader()); + emptyRead.setReferenceIndex(read.getReferenceIndex()); + emptyRead.setAlignmentStart(0); + emptyRead.setMappingQuality(0); + // setting read indexing bin last + emptyRead.setFlags(read.getFlags()); + emptyRead.setMateReferenceIndex(read.getMateReferenceIndex()); + emptyRead.setMateAlignmentStart(read.getMateAlignmentStart()); + emptyRead.setInferredInsertSize(read.getInferredInsertSize()); emptyRead.setCigarString(""); emptyRead.setReadBases(new byte[0]); @@ -545,6 +535,8 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable { emptyRead.setReadGroup(rg); } + GATKBin.setReadIndexingBin(emptyRead, 0); + return emptyRead; } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecordIterator.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecordIterator.java index cc2b77895..314facdd5 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecordIterator.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSAMRecordIterator.java @@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator; -import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import java.util.Iterator; @@ -40,9 +39,9 @@ import java.util.Iterator; * Time: 1:19 PM */ public class GATKSAMRecordIterator implements CloseableIterator, Iterable { - final CloseableIterator it; + final CloseableIterator it; - public GATKSAMRecordIterator(final CloseableIterator it) { + public GATKSAMRecordIterator(final CloseableIterator it) { this.it = it; } @@ -51,7 +50,14 @@ public class GATKSAMRecordIterator implements CloseableIterator, } @Override public boolean hasNext() { return it.hasNext(); } - @Override public GATKSAMRecord next() { return (GATKSAMRecord)it.next(); } + @Override public GATKSAMRecord next() { + SAMRecord next = it.next(); + if (next instanceof GATKSAMRecord) { + return (GATKSAMRecord)next; + } else { + return new GATKSAMRecord(next); + } + } @Override public void remove() { it.remove(); } @Override public void close() { it.close(); } @Override public Iterator iterator() { return this; } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSamRecordFactory.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSamRecordFactory.java deleted file mode 100644 index 1e5ad1e78..000000000 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/GATKSamRecordFactory.java +++ /dev/null @@ -1,75 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.gatk.utils.sam; - -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordFactory; -import htsjdk.samtools.BAMRecord; -import org.broadinstitute.gatk.utils.exceptions.UserException; - -/** - * Factory interface implementation used to create GATKSamRecords - * from SAMFileReaders with SAM-JDK - * - * @author Mark DePristo - */ -public class GATKSamRecordFactory implements SAMRecordFactory { - - /** Create a new SAMRecord to be filled in */ - public SAMRecord createSAMRecord(SAMFileHeader header) { - throw new UserException.BadInput("The GATK now longer supports input SAM files"); - } - - /** Create a new BAM Record. */ - public BAMRecord createBAMRecord(final SAMFileHeader header, - final int referenceSequenceIndex, - final int alignmentStart, - final short readNameLength, - final short mappingQuality, - final int indexingBin, - final int cigarLen, - final int flags, - final int readLen, - final int mateReferenceSequenceIndex, - final int mateAlignmentStart, - final int insertSize, - final byte[] variableLengthBlock) { - return new GATKSAMRecord(header, - referenceSequenceIndex, - alignmentStart, - readNameLength, - mappingQuality, - indexingBin, - cigarLen, - flags, - readLen, - mateReferenceSequenceIndex, - mateAlignmentStart, - insertSize, - variableLengthBlock); - } -} diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMFileReaderBuilder.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMReaderBuilder.java similarity index 78% rename from public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMFileReaderBuilder.java rename to public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMReaderBuilder.java index 2c5ea5f0e..19d2315f4 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMFileReaderBuilder.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/SAMReaderBuilder.java @@ -25,8 +25,10 @@ package org.broadinstitute.gatk.utils.sam; -import htsjdk.samtools.SAMFileReader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; +import org.broadinstitute.gatk.utils.io.ReferenceBacked; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import java.io.File; @@ -39,12 +41,17 @@ import java.io.File; * @author mhanna * @version 0.1 */ -public class SAMFileReaderBuilder { +public class SAMReaderBuilder implements ReferenceBacked { /** * To which file should output be written? */ private File samFile = null; + /** + * The reference file for the samFile. + */ + private File referenceFile = null; + /** * What compression level should be used when building this file? */ @@ -58,6 +65,16 @@ public class SAMFileReaderBuilder { this.samFile = samFile; } + @Override + public File getReferenceFile() { + return referenceFile; + } + + @Override + public void setReferenceFile(final File referenceFile) { + this.referenceFile = referenceFile; + } + /** * Sets the validation stringency to apply when reading this sam file. * @param validationStringency Stringency to apply. Must not be null. @@ -70,15 +87,16 @@ public class SAMFileReaderBuilder { * Create the SAM writer, given the constituent parts accrued. * @return Newly minted SAM file writer. */ - public SAMFileReader build() { + public SamReader build() { if( samFile == null ) throw new ReviewedGATKException( "Filename for output sam file must be supplied."); if( validationStringency == null ) throw new ReviewedGATKException( "Header for output sam file must be supplied."); - SAMFileReader reader = new SAMFileReader( samFile ); - reader.setValidationStringency( validationStringency ); - - return reader; + return SamReaderFactory + .makeDefault() + .referenceSequence(this.getReferenceFile()) + .validationStringency(validationStringency) + .open(samFile); } } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/text/ListFileUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/text/ListFileUtils.java index 33a4b7d63..a2d796f52 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/text/ListFileUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/text/ListFileUtils.java @@ -73,16 +73,16 @@ public class ListFileUtils { throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex); } } - else if(inputFileName.toLowerCase().endsWith(".bam")) { + else if(inputFileName.toLowerCase().endsWith(".bam") || inputFileName.toLowerCase().endsWith(".cram")) { unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } else if(inputFileName.endsWith("stdin")) { unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } else { - throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " + - "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + - "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); + throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " + + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " + + "of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName)); } } return unpackedReads; diff --git a/public/gatk-engine/src/test/java/htsjdk/samtools/GATKBAMFileSpanUnitTest.java b/public/gatk-utils/src/test/java/htsjdk/samtools/GATKBAMFileSpanUnitTest.java similarity index 100% rename from public/gatk-engine/src/test/java/htsjdk/samtools/GATKBAMFileSpanUnitTest.java rename to public/gatk-utils/src/test/java/htsjdk/samtools/GATKBAMFileSpanUnitTest.java diff --git a/public/gatk-engine/src/test/java/htsjdk/samtools/GATKChunkUnitTest.java b/public/gatk-utils/src/test/java/htsjdk/samtools/GATKChunkUnitTest.java similarity index 100% rename from public/gatk-engine/src/test/java/htsjdk/samtools/GATKChunkUnitTest.java rename to public/gatk-utils/src/test/java/htsjdk/samtools/GATKChunkUnitTest.java diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java index 8c6e24c36..2ec1eb8d2 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/ExampleToCopyUnitTest.java @@ -40,7 +40,6 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder; import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -219,11 +218,10 @@ public class ExampleToCopyUnitTest extends BaseTest { final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10); final File bam = bamBuilder.makeTemporarilyBAMFile(); final SAMFileReader reader = new SAMFileReader(bam); - reader.setSAMRecordFactory(new GATKSamRecordFactory()); final Iterator bamIt = reader.iterator(); while ( bamIt.hasNext() ) { - final GATKSAMRecord read = (GATKSAMRecord)bamIt.next(); // all reads are actually GATKSAMRecords + final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords // TODO -- add some tests that use reads from a BAM } } diff --git a/public/gatk-utils/src/test/resources/exampleCRAM-noindex.cram b/public/gatk-utils/src/test/resources/exampleCRAM-noindex.cram new file mode 100644 index 000000000..7041737ee Binary files /dev/null and b/public/gatk-utils/src/test/resources/exampleCRAM-noindex.cram differ diff --git a/public/gatk-utils/src/test/resources/exampleCRAM.cram b/public/gatk-utils/src/test/resources/exampleCRAM.cram new file mode 100644 index 000000000..7041737ee Binary files /dev/null and b/public/gatk-utils/src/test/resources/exampleCRAM.cram differ diff --git a/public/gatk-utils/src/test/resources/exampleCRAM.cram.bai b/public/gatk-utils/src/test/resources/exampleCRAM.cram.bai new file mode 100644 index 000000000..a491abb97 Binary files /dev/null and b/public/gatk-utils/src/test/resources/exampleCRAM.cram.bai differ diff --git a/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai b/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai new file mode 100644 index 000000000..d10c49f45 Binary files /dev/null and b/public/gatk-utils/src/test/resources/exampleCRAM.cram.crai differ