Merge pull request #805 from broadinstitute/ks_gatk_cram
Introductory GATK CRAM support
This commit is contained in:
commit
d561fc5edc
|
|
@ -51,6 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.GATKBin;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -69,15 +71,30 @@ public class ClippedGATKSAMRecord extends GATKSAMRecord {
|
|||
* @param end inclusive last position in {@code read} included in the clipped view.
|
||||
*/
|
||||
public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) {
|
||||
super(read.getHeader(), read.getReferenceIndex(), read.getAlignmentStart() + start, (short) read.getReadNameLength(),
|
||||
(short) 100, -1, read.getCigarLength(), read.getFlags(), end - start,
|
||||
read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
super(read.getHeader());
|
||||
this.setReferenceIndex(read.getReferenceIndex());
|
||||
this.setAlignmentStart(read.getAlignmentStart() + start);
|
||||
this.setMappingQuality(100);
|
||||
// setting read indexing bin below
|
||||
this.setFlags(read.getFlags());
|
||||
this.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
this.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
this.setInferredInsertSize(read.getInferredInsertSize());
|
||||
this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end));
|
||||
this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end));
|
||||
this.setReadName(read.getReadName());
|
||||
insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end);
|
||||
deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end);
|
||||
|
||||
// Set these to null in order to mark them as being candidates for lazy initialization.
|
||||
// If this is not done, they will have non-null defaults.
|
||||
super.setReadName(null);
|
||||
super.setCigarString(null);
|
||||
super.setReadBases(null);
|
||||
super.setBaseQualities(null);
|
||||
|
||||
// Do this after the above because setCigarString will clear it.
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import htsjdk.samtools.GATKBin;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMSequenceDictionary;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
|
|
@ -317,22 +318,18 @@ public class ActiveRegionTestDataSet {
|
|||
|
||||
private class MyGATKSAMRecord extends GATKSAMRecord {
|
||||
protected MyGATKSAMRecord(final GATKSAMRecord r) {
|
||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
this.setReadBases(r.getReadBases());
|
||||
this.setBaseQualities(r.getBaseQualities());
|
||||
this.setReadName(r.getReadName());
|
||||
super(r);
|
||||
this.setMappingQuality(100);
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
ExponentialDistribution indelLengthDist = MathUtils.exponentialDistribution(1.0 / 0.9);
|
||||
|
||||
public MyGATKSAMRecord(final GATKSAMRecord r, final Random rnd) {
|
||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
super(r);
|
||||
this.setMappingQuality(100);
|
||||
// setting read indexing bin last
|
||||
|
||||
final byte[] bases = new byte[r.getReadBases().length];
|
||||
|
||||
final byte[] readBases = r.getReadBases();
|
||||
|
|
@ -384,7 +381,7 @@ public class ActiveRegionTestDataSet {
|
|||
this.setBaseQualities(r.getBaseQualities());
|
||||
this.setReadName(r.getReadName());
|
||||
|
||||
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
private int generateIndelLength(final Random rnd) {
|
||||
|
|
|
|||
|
|
@ -46,9 +46,13 @@ public class MyExampleWalkerIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
private File getResource(String path) throws URISyntaxException {
|
||||
return new File(publicTestDir, path);
|
||||
/*
|
||||
TODO: Enable proper resource extraction from the test jars. For now just use the publicTestDir path.
|
||||
URL resourceUrl = getClass().getResource(path);
|
||||
if (resourceUrl == null)
|
||||
throw new MissingResourceException("Resource not found: " + path, getClass().getSimpleName(), path);
|
||||
return new File(resourceUrl.toURI());
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.io.stubs.Stub;
|
|||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils;
|
||||
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
||||
|
|
@ -697,9 +698,12 @@ public class GenomeAnalysisEngine {
|
|||
* @param outputTracker the tracker supplying the initialization data.
|
||||
*/
|
||||
private void initializeOutputStreams(final OutputTracker outputTracker) {
|
||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
|
||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet()) {
|
||||
setReferenceFile(input.getValue());
|
||||
outputTracker.addInput(input.getKey(), input.getValue());
|
||||
}
|
||||
for (final Stub<?> stub : getOutputs()) {
|
||||
setReferenceFile(stub);
|
||||
stub.processArguments(argCollection);
|
||||
outputTracker.addOutput(stub);
|
||||
}
|
||||
|
|
@ -707,6 +711,12 @@ public class GenomeAnalysisEngine {
|
|||
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
|
||||
}
|
||||
|
||||
private void setReferenceFile(final Object object) {
|
||||
if (object instanceof ReferenceBacked) {
|
||||
((ReferenceBacked)object).setReferenceFile(argCollection.referenceFile);
|
||||
}
|
||||
}
|
||||
|
||||
public ReferenceDataSource getReferenceDataSource() {
|
||||
return referenceDataSource;
|
||||
}
|
||||
|
|
@ -907,6 +917,7 @@ public class GenomeAnalysisEngine {
|
|||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
||||
|
||||
return new SAMDataSource(
|
||||
argCollection.referenceFile,
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
argCollection.numberOfBAMFileHandles,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import htsjdk.samtools.MergingSamRecordIterator;
|
|||
import htsjdk.samtools.SamFileHeaderMerger;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import htsjdk.samtools.util.CloserUtil;
|
||||
import htsjdk.samtools.util.RuntimeIOException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.ReadMetrics;
|
||||
|
|
@ -49,7 +50,8 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
|||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -64,7 +66,8 @@ import java.util.concurrent.Callable;
|
|||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class SAMDataSource {
|
||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||
/** Reference file */
|
||||
private final File referenceFile;
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
|
@ -177,8 +180,11 @@ public class SAMDataSource {
|
|||
*
|
||||
* @param samFiles list of reads files.
|
||||
*/
|
||||
public SAMDataSource(Collection<SAMReaderID> samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) {
|
||||
public SAMDataSource(final File referenceFile, final Collection<SAMReaderID> samFiles,
|
||||
final ThreadAllocation threadAllocation, final Integer numFileHandles,
|
||||
final GenomeLocParser genomeLocParser) {
|
||||
this(
|
||||
referenceFile,
|
||||
samFiles,
|
||||
threadAllocation,
|
||||
numFileHandles,
|
||||
|
|
@ -198,6 +204,7 @@ public class SAMDataSource {
|
|||
* For testing purposes
|
||||
*/
|
||||
public SAMDataSource(
|
||||
final File referenceFile,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
ThreadAllocation threadAllocation,
|
||||
Integer numFileHandles,
|
||||
|
|
@ -209,7 +216,8 @@ public class SAMDataSource {
|
|||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
boolean includeReadsWithDeletionAtLoci) {
|
||||
this( samFiles,
|
||||
this( referenceFile,
|
||||
samFiles,
|
||||
threadAllocation,
|
||||
numFileHandles,
|
||||
genomeLocParser,
|
||||
|
|
@ -230,6 +238,7 @@ public class SAMDataSource {
|
|||
|
||||
/**
|
||||
* Create a new SAM data source given the supplied read metadata.
|
||||
* @param referenceFile reference file.
|
||||
* @param samFiles list of reads files.
|
||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||
* @param strictness Stringency of reads file parsing.
|
||||
|
|
@ -247,6 +256,7 @@ public class SAMDataSource {
|
|||
* @param intervalMergingRule how are adjacent intervals merged by the sharder
|
||||
*/
|
||||
public SAMDataSource(
|
||||
final File referenceFile,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
ThreadAllocation threadAllocation,
|
||||
Integer numFileHandles,
|
||||
|
|
@ -265,6 +275,7 @@ public class SAMDataSource {
|
|||
final Map<String, String> sampleRenameMap,
|
||||
final IntervalMergingRule intervalMergingRule) {
|
||||
|
||||
this.referenceFile = referenceFile;
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.intervalMergingRule = intervalMergingRule;
|
||||
|
|
@ -303,7 +314,7 @@ public class SAMDataSource {
|
|||
"Please check that the file is present and readable and try again.");
|
||||
|
||||
// Get the sort order, forcing it to coordinate if unsorted.
|
||||
SAMFileReader reader = readers.getReader(readerID);
|
||||
SamReader reader = readers.getReader(readerID);
|
||||
SAMFileHeader header = reader.getFileHeader();
|
||||
|
||||
headers.put(readerID,header);
|
||||
|
|
@ -343,7 +354,7 @@ public class SAMDataSource {
|
|||
// cache the read group id (original) -> read group id (merged)
|
||||
// and read group id (merged) -> read group id (original) mappings.
|
||||
for(SAMReaderID id: readerIDs) {
|
||||
SAMFileReader reader = readers.getReader(id);
|
||||
SamReader reader = readers.getReader(id);
|
||||
|
||||
ReadGroupMapping mappingToMerged = new ReadGroupMapping();
|
||||
|
||||
|
|
@ -385,8 +396,8 @@ public class SAMDataSource {
|
|||
public void close() {
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
for(SAMReaderID readerID: readerIDs) {
|
||||
SAMFileReader reader = readers.getReader(readerID);
|
||||
reader.close();
|
||||
SamReader reader = readers.getReader(readerID);
|
||||
CloserUtil.close(reader);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -463,14 +474,6 @@ public class SAMDataSource {
|
|||
return mergedToOriginalReadGroupMappings.get(mergedReadGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if all readers have an index.
|
||||
* @return True if all readers have an index.
|
||||
*/
|
||||
public boolean hasIndex() {
|
||||
return readerIDs.size() == bamIndices.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the index for a particular reader. Always preloaded.
|
||||
* @param id Id of the reader.
|
||||
|
|
@ -480,6 +483,44 @@ public class SAMDataSource {
|
|||
return bamIndices.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the index for a particular reader exists.
|
||||
* @param id Id of the reader.
|
||||
* @return True if the index exists.
|
||||
*/
|
||||
public boolean hasIndex(final SAMReaderID id) {
|
||||
return bamIndices.containsKey(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if all readers that require an index for SAMFileSpan creation have an index.
|
||||
* @return True if all readers that require an index for SAMFileSpan creation have an index.
|
||||
*/
|
||||
public boolean hasIndex() {
|
||||
for (final SAMReaderID readerID: readerIDs)
|
||||
if (isSAMFileSpanSupported(readerID))
|
||||
if (!hasIndex(readerID))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Returns true if the reader can use file spans.
|
||||
* @return true if file spans are supported.
|
||||
*/
|
||||
private boolean isSAMFileSpanSupported(final SAMReaderID readerID) {
|
||||
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/BAMFileReader.java#L341
|
||||
return readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.BAM_TYPE.fileExtension());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the reader caches its SAMFileHeader for each iterator.
|
||||
* @return true if this reader caches its SAMFileHeader for each iterator.
|
||||
*/
|
||||
private boolean isIteratorSAMFileHeaderCached(final SAMReaderID readerID) {
|
||||
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/CRAMFileReader.java#L183
|
||||
return !readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.CRAM_TYPE.fileExtension());
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the sort order of the readers.
|
||||
* @return Sort order. Can be unsorted, coordinate order, or query name order.
|
||||
|
|
@ -538,7 +579,17 @@ public class SAMDataSource {
|
|||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
|
||||
for ( SAMReaderID id: getReaderIDs() ) {
|
||||
initialPositions.put(id, new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
|
||||
GATKBAMFileSpan span;
|
||||
try {
|
||||
span = new GATKBAMFileSpan(readers.getReader(id).indexing().getFilePointerSpanningReads());
|
||||
} catch (RuntimeException e) {
|
||||
if ("Not implemented.".equals(e.getMessage())) { https://github.com/samtools/htsjdk/blob/035d4319643657d715e93c53c13fe4a1f64e0188/src/java/htsjdk/samtools/CRAMFileReader.java#L197
|
||||
span = new GATKBAMFileSpan(new GATKChunk(0, Long.MAX_VALUE));
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
initialPositions.put(id, span);
|
||||
}
|
||||
|
||||
resourcePool.releaseReaders(readers);
|
||||
|
|
@ -567,7 +618,7 @@ public class SAMDataSource {
|
|||
Map<SamReader,CloseableIterator<SAMRecord>> iteratorMap = new HashMap<>();
|
||||
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
CloseableIterator<SAMRecord> iterator = null;
|
||||
CloseableIterator<SAMRecord> iterator;
|
||||
|
||||
// TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin.
|
||||
// TODO: Kill this check once we've proven that the design elements are gone.
|
||||
|
|
@ -576,19 +627,33 @@ public class SAMDataSource {
|
|||
|
||||
try {
|
||||
if(threadAllocation.getNumIOThreads() > 0) {
|
||||
// TODO: need to add friendly error if -nit is used with non BAM. Later, possibly add this capability with CRAM when htsjdk supports CRAM file spans are supported.
|
||||
BlockInputStream inputStream = readers.getInputStream(id);
|
||||
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id));
|
||||
codec.setInputStream(inputStream);
|
||||
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
||||
}
|
||||
else {
|
||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||
final SamReader reader = readers.getReader(id);
|
||||
try {
|
||||
iterator = ((SamReader.Indexing)reader).iterator(shard.getFileSpans().get(id));
|
||||
} catch (RuntimeException re) {
|
||||
if ("Not implemented.".equals(re.getMessage())) { // https://github.com/samtools/htsjdk/blob/429f2a8585d9c98a3efd4cedc5188b60b1e66ac5/src/java/htsjdk/samtools/CRAMFileReader.java#L192
|
||||
// No way to jump into the file span. Query the whole file.
|
||||
iterator = readers.getReader(id).iterator();
|
||||
} else {
|
||||
throw re;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
|
||||
throw new UserException.MalformedBAM(id.getSamFile(), e.getMessage());
|
||||
}
|
||||
|
||||
// At the moment, too many other classes to change for GATKSAMRecordIterator converter.
|
||||
// Force the compiler to just let the conversion happen, since generics are erased anyway.
|
||||
iterator = (CloseableIterator<SAMRecord>)(Object)new GATKSAMRecordIterator(iterator);
|
||||
iterator = new MalformedBAMErrorReformatingIterator(id.getSamFile(), iterator);
|
||||
if(shard.getGenomeLocs().size() > 0)
|
||||
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
||||
|
|
@ -614,11 +679,11 @@ public class SAMDataSource {
|
|||
|
||||
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
|
||||
private final BlockInputStream inputStream;
|
||||
private final SAMFileReader reader;
|
||||
private final SamReader reader;
|
||||
private final BAMRecordCodec codec;
|
||||
private SAMRecord nextRead;
|
||||
|
||||
private BAMCodecIterator(final BlockInputStream inputStream, final SAMFileReader reader, final BAMRecordCodec codec) {
|
||||
private BAMCodecIterator(final BlockInputStream inputStream, final SamReader reader, final BAMRecordCodec codec) {
|
||||
this.inputStream = inputStream;
|
||||
this.reader = reader;
|
||||
this.codec = codec;
|
||||
|
|
@ -823,7 +888,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* A collection of readers derived from a reads metadata structure.
|
||||
*/
|
||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
||||
private class SAMReaders implements Iterable<SamReader> {
|
||||
/**
|
||||
* Cached representation of the merged header used to generate a merging iterator.
|
||||
*/
|
||||
|
|
@ -832,7 +897,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Internal storage for a map of id -> reader.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();
|
||||
private final Map<SAMReaderID,SamReader> readers = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* The inptu streams backing
|
||||
|
|
@ -860,7 +925,11 @@ public class SAMDataSource {
|
|||
|
||||
checkForUnsupportedBamFile(init.reader.getFileHeader());
|
||||
|
||||
if (removeProgramRecords) {
|
||||
if (removeProgramRecords && isIteratorSAMFileHeaderCached(readerID)) {
|
||||
// Only works when the SamReader implementation caches its header.
|
||||
// Some implementations (ex: CRAM) rewrite the new underlying file header in reader.getIterator().
|
||||
// Later, when MergingSamRecordIterator goes to check the headers with .contains()/.equals(),
|
||||
// it will error out complaining it can't find the unmodified version of the header.
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
|
||||
|
|
@ -883,9 +952,9 @@ public class SAMDataSource {
|
|||
|
||||
// Examine the bam headers, perform any requested sample renaming on them, and add
|
||||
// them to the list of headers to pass to the Picard SamFileHeaderMerger:
|
||||
for ( final Map.Entry<SAMReaderID, SAMFileReader> readerEntry : readers.entrySet() ) {
|
||||
for ( final Map.Entry<SAMReaderID, SamReader> readerEntry : readers.entrySet() ) {
|
||||
final SAMReaderID readerID = readerEntry.getKey();
|
||||
final SAMFileReader reader = readerEntry.getValue();
|
||||
final SamReader reader = readerEntry.getValue();
|
||||
final SAMFileHeader header = reader.getFileHeader();
|
||||
|
||||
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
||||
|
|
@ -1009,7 +1078,7 @@ public class SAMDataSource {
|
|||
* @param id The ID of the reader to retrieve.
|
||||
* @return the reader associated with the given id.
|
||||
*/
|
||||
public SAMFileReader getReader(SAMReaderID id) {
|
||||
public SamReader getReader(SAMReaderID id) {
|
||||
if(!readers.containsKey(id))
|
||||
throw new NoSuchElementException("No reader is associated with id " + id);
|
||||
return readers.get(id);
|
||||
|
|
@ -1030,7 +1099,7 @@ public class SAMDataSource {
|
|||
* @return The id associated the given reader, or null if the reader is not present in this collection.
|
||||
*/
|
||||
protected SAMReaderID getReaderID(SamReader reader) {
|
||||
for(Map.Entry<SAMReaderID,SAMFileReader> entry: readers.entrySet()) {
|
||||
for(Map.Entry<SAMReaderID,SamReader> entry: readers.entrySet()) {
|
||||
if(reader == entry.getValue())
|
||||
return entry.getKey();
|
||||
}
|
||||
|
|
@ -1042,7 +1111,7 @@ public class SAMDataSource {
|
|||
* Returns an iterator over all readers in this structure.
|
||||
* @return An iterator over readers.
|
||||
*/
|
||||
public Iterator<SAMFileReader> iterator() {
|
||||
public Iterator<SamReader> iterator() {
|
||||
return readers.values().iterator();
|
||||
}
|
||||
|
||||
|
|
@ -1058,18 +1127,23 @@ public class SAMDataSource {
|
|||
class ReaderInitializer implements Callable<ReaderInitializer> {
|
||||
final SAMReaderID readerID;
|
||||
BlockInputStream blockInputStream = null;
|
||||
SAMFileReader reader;
|
||||
SamReader reader;
|
||||
|
||||
public ReaderInitializer(final SAMReaderID readerID) {
|
||||
this.readerID = readerID;
|
||||
}
|
||||
|
||||
public ReaderInitializer call() {
|
||||
final File indexFile = findIndexFile(readerID.getSamFile());
|
||||
try {
|
||||
if (threadAllocation.getNumIOThreads() > 0)
|
||||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||
reader = new SAMFileReader(readerID.getSamFile(),indexFile,false);
|
||||
reader = SamReaderFactory.makeDefault()
|
||||
.referenceSequence(referenceFile)
|
||||
.validationStringency(validationStringency)
|
||||
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
|
||||
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
||||
.open(readerID.getSamFile());
|
||||
|
||||
} catch ( RuntimeIOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
||||
} catch ( SAMFormatException e ) {
|
||||
|
|
@ -1081,9 +1155,6 @@ public class SAMDataSource {
|
|||
catch ( RuntimeException e ) {
|
||||
throw new UserException.MalformedBAM(readerID.getSamFile(), e.getMessage());
|
||||
}
|
||||
reader.setSAMRecordFactory(factory);
|
||||
reader.enableFileSource(true);
|
||||
reader.setValidationStringency(validationStringency);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
|
||||
// initialize reads
|
||||
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
||||
SAMDataSource dataSource = new SAMDataSource(bamReaders,new ThreadAllocation(),null,genomeLocParser);
|
||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
// intervals
|
||||
final GenomeLocSortedSet intervalSortedSet;
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.io;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.ValidationStringency;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.commandline.ArgumentSource;
|
||||
import org.broadinstitute.gatk.engine.io.storage.Storage;
|
||||
import org.broadinstitute.gatk.engine.io.storage.StorageFactory;
|
||||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.gatk.utils.classloader.JVMUtils;
|
|||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.io.IOUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
|
|
@ -49,7 +49,12 @@ import java.util.Map;
|
|||
* Manages the output and err streams that are created specifically for walker
|
||||
* output.
|
||||
*/
|
||||
public abstract class OutputTracker {
|
||||
public abstract class OutputTracker implements ReferenceBacked {
|
||||
/**
|
||||
* The reference file.
|
||||
*/
|
||||
private File referenceFile;
|
||||
|
||||
/**
|
||||
* The streams to which walker users should be reading directly.
|
||||
*/
|
||||
|
|
@ -78,6 +83,16 @@ public abstract class OutputTracker {
|
|||
*/
|
||||
public abstract <T> T getStorage( Stub<T> stub );
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
public void prepareWalker( Walker walker, ValidationStringency strictnessLevel ) {
|
||||
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
|
||||
ArgumentSource targetField = io.getKey();
|
||||
|
|
@ -85,8 +100,8 @@ public abstract class OutputTracker {
|
|||
|
||||
// Ghastly hack: reaches in and finishes building out the SAMFileReader.
|
||||
// TODO: Generalize this, and move it to its own initialization step.
|
||||
if( targetValue instanceof SAMFileReaderBuilder) {
|
||||
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
|
||||
if( targetValue instanceof SAMReaderBuilder) {
|
||||
SAMReaderBuilder builder = (SAMReaderBuilder)targetValue;
|
||||
builder.setValidationStringency(strictnessLevel);
|
||||
targetValue = builder.build();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
|
|
@ -46,6 +49,7 @@ import java.lang.reflect.Method;
|
|||
*/
|
||||
public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWriter> {
|
||||
private final File file;
|
||||
private File referenceFasta;
|
||||
private SAMFileWriter writer;
|
||||
|
||||
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
|
||||
|
|
@ -55,6 +59,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
}
|
||||
|
||||
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
|
||||
this.referenceFasta = stub.getReferenceFile();
|
||||
this.file = file;
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
// Enable automatic index creation for pre-sorted BAMs.
|
||||
|
|
@ -69,9 +74,14 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
|
||||
if(stub.getOutputFile() != null) {
|
||||
try {
|
||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||
}
|
||||
catch(RuntimeIOException ex) {
|
||||
if (stub.getOutputFile().getName().toLowerCase().endsWith(".cram")) {
|
||||
this.writer = createCRAMWriter(factory, stub.getFileHeader(), new FileOutputStream(file), this.referenceFasta);
|
||||
} else {
|
||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||
}
|
||||
} catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "file could not be created", ex);
|
||||
} catch(RuntimeIOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
|
||||
}
|
||||
}
|
||||
|
|
@ -117,6 +127,13 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
}
|
||||
}
|
||||
|
||||
private SAMFileWriter createCRAMWriter(final SAMFileWriterFactory factory,
|
||||
final SAMFileHeader header,
|
||||
final OutputStream outputStream,
|
||||
final File referenceFasta) {
|
||||
return factory.makeCRAMWriter(header, outputStream, referenceFasta);
|
||||
}
|
||||
|
||||
private SAMFileWriter createBAMWriter(final SAMFileWriterFactory factory,
|
||||
final SAMFileHeader header,
|
||||
final boolean presorted,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface;
|
|||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||
|
|
@ -50,7 +51,7 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter {
|
||||
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter, ReferenceBacked {
|
||||
/**
|
||||
* Engine to use for collecting attributes for the output SAM file.
|
||||
*/
|
||||
|
|
@ -67,6 +68,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
*/
|
||||
private final File samFile;
|
||||
|
||||
/**
|
||||
* The reference file for stub.
|
||||
*/
|
||||
private File referenceFile;
|
||||
|
||||
/**
|
||||
* The target output stream, to be used in place of the SAM file.
|
||||
*/
|
||||
|
|
@ -189,6 +195,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
return samOutputStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the header to use when creating the new SAM file.
|
||||
* @return header to use when creating the new SAM file.
|
||||
|
|
|
|||
|
|
@ -29,14 +29,14 @@ import htsjdk.samtools.SAMFileReader;
|
|||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Describe how to parse SAMFileReaders.
|
||||
* Describe how to parse SAMReaders.
|
||||
*/
|
||||
public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
*/
|
||||
|
|
@ -46,7 +46,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created.
|
||||
* @param engine engine
|
||||
*/
|
||||
public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) {
|
||||
public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
|
||||
this.engine = engine;
|
||||
}
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||
SAMReaderBuilder builder = new SAMReaderBuilder();
|
||||
|
||||
ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
|
||||
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then
|
||||
// let the output tracker load it in.
|
||||
// TODO: Add a stub for SAMFileReader.
|
||||
// TODO: Add a stub for SAMReader.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.collections.Pair;
|
|||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFCodec;
|
||||
import htsjdk.variant.vcf.VCFHeader;
|
||||
|
|
@ -312,7 +311,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
|
||||
final SAMFileReader reader = new SAMFileReader(outputBam);
|
||||
reader.setValidationStringency(ValidationStringency.SILENT);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
final SAMRecord read = reader.iterator().next();
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
|
||||
// Test the accuracy of the read metrics
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private SAMFileHeader header;
|
||||
|
|
@ -93,7 +94,8 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
private void init() throws IOException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
referenceFile = new File(b37KGReference);
|
||||
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
|
|
@ -149,7 +151,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -184,7 +186,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -225,7 +227,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -272,7 +274,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final List<ReadFilter> filters = new ArrayList<>();
|
||||
filters.add(new EveryTenthReadFilter());
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.arguments;
|
||||
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Test the GATK core CRAM parsing mechanism.
|
||||
*/
|
||||
public class CramIntegrationTest extends WalkerTest {
|
||||
@DataProvider(name="cramData")
|
||||
public Object[][] getCRAMData() {
|
||||
return new Object[][] {
|
||||
{"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"},
|
||||
//{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148
|
||||
{"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
{"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||
{"PrintReads", "exampleCRAM-noindex.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||
{"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||
{"CountLoci", "exampleCRAM-noindex.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountLoci", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||
{"CountReads", "exampleCRAM-noindex.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountReads", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "9598062587ad8d2ec596a8ecb19be979"},
|
||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "cramData")
|
||||
public void testCRAM(String walker, String input, String args, String ext, String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T Test" + walker + "Walker" +
|
||||
" -I " + publicTestDir + input +
|
||||
" -R " + exampleFASTA +
|
||||
args +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(ext),
|
||||
Arrays.asList(md5));
|
||||
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator();
|
||||
|
||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(null,Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ import java.util.Collections;
|
|||
public class MockLocusShard extends LocusShard {
|
||||
public MockLocusShard(final GenomeLocParser genomeLocParser,final List<GenomeLoc> intervals) {
|
||||
super( genomeLocParser,
|
||||
new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
||||
new SAMDataSource(null, Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
||||
intervals,
|
||||
null);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
public void run() {
|
||||
createTestBAM();
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(Arrays.asList(testBAM),
|
||||
SAMDataSource dataSource = new SAMDataSource(null, // Reference not used in this test.
|
||||
Arrays.asList(testBAM),
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
new GenomeLocParser(header.getSequenceDictionary()),
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
// TODO: These legacy tests should really be replaced with a more comprehensive suite of tests for SAMDataSource
|
||||
|
||||
private List<SAMReaderID> readers;
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
|
|
@ -76,7 +77,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers = new ArrayList<SAMReaderID>();
|
||||
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b36KGReference));
|
||||
referenceFile = new File(b36KGReference);
|
||||
seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
@ -101,7 +103,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
|
||||
|
||||
// the sharding strat.
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -155,7 +159,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
||||
|
||||
// use defaults
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -171,7 +177,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
||||
|
||||
boolean removeProgramRecords = false;
|
||||
data = new SAMDataSource(readers,
|
||||
data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -192,7 +200,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
||||
removeProgramRecords = true;
|
||||
data = new SAMDataSource(readers,
|
||||
data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -217,7 +227,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
public void testFailOnReducedReads() {
|
||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -234,7 +246,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
public void testFailOnReducedReadsRemovingProgramRecords() {
|
||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
|||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -313,6 +314,7 @@ public class ReadFilterTest extends BaseTest {
|
|||
|
||||
protected SAMDataSource composeDataSource() {
|
||||
checkHeaderExists();
|
||||
final File referenceFile = null; // Not used in this test.
|
||||
final Set<SAMReaderID> readerIDs = new HashSet<>(1);
|
||||
final ThreadAllocation ta = new ThreadAllocation();
|
||||
final Integer numFileHandles = 1; // I believe that any value would do but need to confirm.
|
||||
|
|
@ -326,6 +328,7 @@ public class ReadFilterTest extends BaseTest {
|
|||
|
||||
final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final SAMDataSource res = new SAMDataSource(
|
||||
referenceFile,
|
||||
readerIDs,
|
||||
ta,
|
||||
numFileHandles,
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return traversals.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -90,7 +91,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
@BeforeClass
|
||||
private void init() throws IOException {
|
||||
//reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference));
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
referenceFile = new File(hg19Reference);
|
||||
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
|
|
@ -470,7 +472,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
SAMReaderID readerID = new SAMReaderID(bamFile, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
SAMDataSource dataSource = new SAMDataSource(refFile, bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@
|
|||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||
|
||||
<!-- Version numbers for picard and htsjdk -->
|
||||
<htsjdk.version>1.127.1690</htsjdk.version>
|
||||
<picard.version>1.127.1667</picard.version>
|
||||
<htsjdk.version>1.128.1696</htsjdk.version>
|
||||
<picard.version>1.128.1678</picard.version>
|
||||
</properties>
|
||||
|
||||
<!-- Dependency configuration (versions, etc.) -->
|
||||
|
|
|
|||
|
|
@ -132,4 +132,15 @@ public class GATKBin implements Comparable<GATKBin> {
|
|||
return new GATKChunk[0];
|
||||
return chunkList;
|
||||
}
|
||||
|
||||
// HACK: Using this classes package permissions to further hack the CRAM created SAMRecord's indexing bin and binary attributes.
|
||||
public static Integer getReadIndexingBin(final SAMRecord read) {
|
||||
return read.getIndexingBin();
|
||||
}
|
||||
public static void setReadIndexingBin(final SAMRecord read, final Integer indexingBin) {
|
||||
read.setIndexingBin(indexingBin);
|
||||
}
|
||||
public static SAMBinaryTagAndValue getReadBinaryAttributes(final SAMRecord read) {
|
||||
return read.getBinaryAttributes();
|
||||
}
|
||||
}
|
||||
|
|
@ -225,20 +225,20 @@ public class UserException extends ReviewedGATKException {
|
|||
|
||||
public static class MissortedBAM extends UserException {
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) {
|
||||
super(String.format("Missorted Input SAM/BAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
||||
}
|
||||
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, String message) {
|
||||
super(String.format("Missorted Input SAM/BAM files: files are not sorted in %s order; %s", order, message));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: files are not sorted in %s order; %s", order, message));
|
||||
}
|
||||
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) {
|
||||
super(String.format("Missorted Input SAM/BAM file %s: file sorted in %s order but %s is required; %s",
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required; %s",
|
||||
read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message));
|
||||
}
|
||||
|
||||
public MissortedBAM(String message) {
|
||||
super(String.format("Missorted Input SAM/BAM files: %s", message));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: %s", message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -252,7 +252,7 @@ public class UserException extends ReviewedGATKException {
|
|||
}
|
||||
|
||||
public MalformedBAM(String source, String message) {
|
||||
super(String.format("SAM/BAM file %s is malformed: %s", source, message));
|
||||
super(String.format("SAM/BAM/CRAM file %s is malformed: %s", source, message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ public class UserException extends ReviewedGATKException {
|
|||
}
|
||||
|
||||
public MisencodedBAM(String source, String message) {
|
||||
super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
||||
super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.io;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public interface ReferenceBacked {
|
||||
public File getReferenceFile();
|
||||
public void setReferenceFile(final File reference);
|
||||
}
|
||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
@ -67,7 +66,6 @@ public class LIBSPerformance extends CommandLineProgram {
|
|||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||
|
||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
SAMRecordIterator rawIterator;
|
||||
if ( location == null )
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.cram.build.CramIO;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
|
@ -149,7 +150,7 @@ public class ArtificialSAMFileReader extends SAMFileReader {
|
|||
byte[] byteArray = "".getBytes("ISO-8859-1");
|
||||
return new ByteArrayInputStream(byteArray);
|
||||
}
|
||||
catch( UnsupportedEncodingException ex ) {
|
||||
catch( Exception ex ) {
|
||||
throw new ReviewedGATKException("Unable to build empty input stream",ex);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ import java.util.*;
|
|||
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
||||
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
||||
public class GATKSAMRecord extends SAMRecord implements Cloneable {
|
||||
// Base Quality Score Recalibrator specific attribute tags
|
||||
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
|
||||
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
|
||||
|
|
@ -92,42 +92,36 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
* @param read
|
||||
*/
|
||||
public GATKSAMRecord(final SAMRecord read) {
|
||||
super(read.getHeader(),
|
||||
read.getReferenceIndex(),
|
||||
read.getAlignmentStart(),
|
||||
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
|
||||
(short)read.getMappingQuality(),
|
||||
0,
|
||||
read.getCigarLength(),
|
||||
read.getFlags(),
|
||||
read.getReadLength(),
|
||||
read.getMateReferenceIndex(),
|
||||
read.getMateAlignmentStart(),
|
||||
read.getInferredInsertSize(),
|
||||
null);
|
||||
super(read.getHeader());
|
||||
super.setReferenceIndex(read.getReferenceIndex());
|
||||
super.setAlignmentStart(read.getAlignmentStart());
|
||||
super.setReadName(read.getReadName());
|
||||
super.setMappingQuality(read.getMappingQuality());
|
||||
// indexing bin done below
|
||||
super.setCigar(read.getCigar());
|
||||
super.setFlags(read.getFlags());
|
||||
super.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
super.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
super.setInferredInsertSize(read.getInferredInsertSize());
|
||||
SAMReadGroupRecord samRG = read.getReadGroup();
|
||||
clearAttributes();
|
||||
SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
|
||||
if (samAttr == null) {
|
||||
clearAttributes();
|
||||
} else {
|
||||
setAttributes(samAttr);
|
||||
}
|
||||
if (samRG != null) {
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
|
||||
setReadGroup(rg);
|
||||
}
|
||||
}
|
||||
|
||||
public GATKSAMRecord(final SAMFileHeader header,
|
||||
final int referenceSequenceIndex,
|
||||
final int alignmentStart,
|
||||
final short readNameLength,
|
||||
final short mappingQuality,
|
||||
final int indexingBin,
|
||||
final int cigarLen,
|
||||
final int flags,
|
||||
final int readLen,
|
||||
final int mateReferenceSequenceIndex,
|
||||
final int mateAlignmentStart,
|
||||
final int insertSize,
|
||||
final byte[] variableLengthBlock) {
|
||||
super(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen,
|
||||
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
||||
super.setFileSource(read.getFileSource());
|
||||
super.setReadName(read.getReadName());
|
||||
super.setCigarString(read.getCigarString());
|
||||
super.setReadBases(read.getReadBases());
|
||||
super.setBaseQualities(read.getBaseQualities());
|
||||
// From SAMRecord constructor: Do this after the above because setCigarString will clear it.
|
||||
GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
|
||||
}
|
||||
|
||||
public static GATKSAMRecord createRandomRead(int length) {
|
||||
|
|
@ -520,19 +514,15 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
* @return a read with no bases but safe for the GATK
|
||||
*/
|
||||
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
||||
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
||||
read.getReferenceIndex(),
|
||||
0,
|
||||
(short) 0,
|
||||
(short) 0,
|
||||
0,
|
||||
0,
|
||||
read.getFlags(),
|
||||
0,
|
||||
read.getMateReferenceIndex(),
|
||||
read.getMateAlignmentStart(),
|
||||
read.getInferredInsertSize(),
|
||||
null);
|
||||
final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader());
|
||||
emptyRead.setReferenceIndex(read.getReferenceIndex());
|
||||
emptyRead.setAlignmentStart(0);
|
||||
emptyRead.setMappingQuality(0);
|
||||
// setting read indexing bin last
|
||||
emptyRead.setFlags(read.getFlags());
|
||||
emptyRead.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
emptyRead.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
emptyRead.setInferredInsertSize(read.getInferredInsertSize());
|
||||
|
||||
emptyRead.setCigarString("");
|
||||
emptyRead.setReadBases(new byte[0]);
|
||||
|
|
@ -545,6 +535,8 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
emptyRead.setReadGroup(rg);
|
||||
}
|
||||
|
||||
GATKBin.setReadIndexingBin(emptyRead, 0);
|
||||
|
||||
return emptyRead;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam;
|
|||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -40,9 +39,9 @@ import java.util.Iterator;
|
|||
* Time: 1:19 PM
|
||||
*/
|
||||
public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>, Iterable<GATKSAMRecord> {
|
||||
final CloseableIterator<SAMRecord> it;
|
||||
final CloseableIterator<? extends SAMRecord> it;
|
||||
|
||||
public GATKSAMRecordIterator(final CloseableIterator<SAMRecord> it) {
|
||||
public GATKSAMRecordIterator(final CloseableIterator<? extends SAMRecord> it) {
|
||||
this.it = it;
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +50,14 @@ public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>,
|
|||
}
|
||||
|
||||
@Override public boolean hasNext() { return it.hasNext(); }
|
||||
@Override public GATKSAMRecord next() { return (GATKSAMRecord)it.next(); }
|
||||
@Override public GATKSAMRecord next() {
|
||||
SAMRecord next = it.next();
|
||||
if (next instanceof GATKSAMRecord) {
|
||||
return (GATKSAMRecord)next;
|
||||
} else {
|
||||
return new GATKSAMRecord(next);
|
||||
}
|
||||
}
|
||||
@Override public void remove() { it.remove(); }
|
||||
@Override public void close() { it.close(); }
|
||||
@Override public Iterator<GATKSAMRecord> iterator() { return this; }
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.SAMRecordFactory;
|
||||
import htsjdk.samtools.BAMRecord;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Factory interface implementation used to create GATKSamRecords
|
||||
* from SAMFileReaders with SAM-JDK
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class GATKSamRecordFactory implements SAMRecordFactory {
|
||||
|
||||
/** Create a new SAMRecord to be filled in */
|
||||
public SAMRecord createSAMRecord(SAMFileHeader header) {
|
||||
throw new UserException.BadInput("The GATK now longer supports input SAM files");
|
||||
}
|
||||
|
||||
/** Create a new BAM Record. */
|
||||
public BAMRecord createBAMRecord(final SAMFileHeader header,
|
||||
final int referenceSequenceIndex,
|
||||
final int alignmentStart,
|
||||
final short readNameLength,
|
||||
final short mappingQuality,
|
||||
final int indexingBin,
|
||||
final int cigarLen,
|
||||
final int flags,
|
||||
final int readLen,
|
||||
final int mateReferenceSequenceIndex,
|
||||
final int mateAlignmentStart,
|
||||
final int insertSize,
|
||||
final byte[] variableLengthBlock) {
|
||||
return new GATKSAMRecord(header,
|
||||
referenceSequenceIndex,
|
||||
alignmentStart,
|
||||
readNameLength,
|
||||
mappingQuality,
|
||||
indexingBin,
|
||||
cigarLen,
|
||||
flags,
|
||||
readLen,
|
||||
mateReferenceSequenceIndex,
|
||||
mateAlignmentStart,
|
||||
insertSize,
|
||||
variableLengthBlock);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,8 +25,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.SamReader;
|
||||
import htsjdk.samtools.SamReaderFactory;
|
||||
import htsjdk.samtools.ValidationStringency;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -39,12 +41,17 @@ import java.io.File;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileReaderBuilder {
|
||||
public class SAMReaderBuilder implements ReferenceBacked {
|
||||
/**
|
||||
* To which file should output be written?
|
||||
*/
|
||||
private File samFile = null;
|
||||
|
||||
/**
|
||||
* The reference file for the samFile.
|
||||
*/
|
||||
private File referenceFile = null;
|
||||
|
||||
/**
|
||||
* What compression level should be used when building this file?
|
||||
*/
|
||||
|
|
@ -58,6 +65,16 @@ public class SAMFileReaderBuilder {
|
|||
this.samFile = samFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the validation stringency to apply when reading this sam file.
|
||||
* @param validationStringency Stringency to apply. Must not be null.
|
||||
|
|
@ -70,15 +87,16 @@ public class SAMFileReaderBuilder {
|
|||
* Create the SAM writer, given the constituent parts accrued.
|
||||
* @return Newly minted SAM file writer.
|
||||
*/
|
||||
public SAMFileReader build() {
|
||||
public SamReader build() {
|
||||
if( samFile == null )
|
||||
throw new ReviewedGATKException( "Filename for output sam file must be supplied.");
|
||||
if( validationStringency == null )
|
||||
throw new ReviewedGATKException( "Header for output sam file must be supplied.");
|
||||
|
||||
SAMFileReader reader = new SAMFileReader( samFile );
|
||||
reader.setValidationStringency( validationStringency );
|
||||
|
||||
return reader;
|
||||
return SamReaderFactory
|
||||
.makeDefault()
|
||||
.referenceSequence(this.getReferenceFile())
|
||||
.validationStringency(validationStringency)
|
||||
.open(samFile);
|
||||
}
|
||||
}
|
||||
|
|
@ -73,16 +73,16 @@ public class ListFileUtils {
|
|||
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else if(inputFileName.toLowerCase().endsWith(".bam")) {
|
||||
else if(inputFileName.toLowerCase().endsWith(".bam") || inputFileName.toLowerCase().endsWith(".cram")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else if(inputFileName.endsWith("stdin")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " +
|
||||
"of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
|||
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -219,11 +218,10 @@ public class ExampleToCopyUnitTest extends BaseTest {
|
|||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
|
||||
final File bam = bamBuilder.makeTemporarilyBAMFile();
|
||||
final SAMFileReader reader = new SAMFileReader(bam);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
final Iterator<SAMRecord> bamIt = reader.iterator();
|
||||
while ( bamIt.hasNext() ) {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)bamIt.next(); // all reads are actually GATKSAMRecords
|
||||
final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords
|
||||
// TODO -- add some tests that use reads from a BAM
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>picard</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
<version>1.127.1667</version>
|
||||
<version>1.128.1678</version>
|
||||
<name>picard</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>samtools</groupId>
|
||||
<artifactId>htsjdk</artifactId>
|
||||
<version>1.127.1690</version>
|
||||
<version>1.128.1696</version>
|
||||
</dependency>
|
||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>samtools</groupId>
|
||||
<artifactId>htsjdk</artifactId>
|
||||
<version>1.127.1690</version>
|
||||
<version>1.128.1696</version>
|
||||
<name>htsjdk</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
Loading…
Reference in New Issue