Merge pull request #805 from broadinstitute/ks_gatk_cram
Introductory GATK CRAM support
This commit is contained in:
commit
d561fc5edc
|
|
@ -51,6 +51,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
package org.broadinstitute.gatk.utils.sam;
|
||||||
|
|
||||||
|
import htsjdk.samtools.GATKBin;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -69,15 +71,30 @@ public class ClippedGATKSAMRecord extends GATKSAMRecord {
|
||||||
* @param end inclusive last position in {@code read} included in the clipped view.
|
* @param end inclusive last position in {@code read} included in the clipped view.
|
||||||
*/
|
*/
|
||||||
public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) {
|
public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) {
|
||||||
super(read.getHeader(), read.getReferenceIndex(), read.getAlignmentStart() + start, (short) read.getReadNameLength(),
|
super(read.getHeader());
|
||||||
(short) 100, -1, read.getCigarLength(), read.getFlags(), end - start,
|
this.setReferenceIndex(read.getReferenceIndex());
|
||||||
read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(),
|
this.setAlignmentStart(read.getAlignmentStart() + start);
|
||||||
new byte[0]);
|
this.setMappingQuality(100);
|
||||||
|
// setting read indexing bin below
|
||||||
|
this.setFlags(read.getFlags());
|
||||||
|
this.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||||
|
this.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||||
|
this.setInferredInsertSize(read.getInferredInsertSize());
|
||||||
this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end));
|
this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end));
|
||||||
this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end));
|
this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end));
|
||||||
this.setReadName(read.getReadName());
|
this.setReadName(read.getReadName());
|
||||||
insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end);
|
insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end);
|
||||||
deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end);
|
deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end);
|
||||||
|
|
||||||
|
// Set these to null in order to mark them as being candidates for lazy initialization.
|
||||||
|
// If this is not done, they will have non-null defaults.
|
||||||
|
super.setReadName(null);
|
||||||
|
super.setCigarString(null);
|
||||||
|
super.setReadBases(null);
|
||||||
|
super.setBaseQualities(null);
|
||||||
|
|
||||||
|
// Do this after the above because setCigarString will clear it.
|
||||||
|
GATKBin.setReadIndexingBin(this, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
|
import htsjdk.samtools.GATKBin;
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
import htsjdk.samtools.SAMSequenceDictionary;
|
import htsjdk.samtools.SAMSequenceDictionary;
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
|
|
@ -317,22 +318,18 @@ public class ActiveRegionTestDataSet {
|
||||||
|
|
||||||
private class MyGATKSAMRecord extends GATKSAMRecord {
|
private class MyGATKSAMRecord extends GATKSAMRecord {
|
||||||
protected MyGATKSAMRecord(final GATKSAMRecord r) {
|
protected MyGATKSAMRecord(final GATKSAMRecord r) {
|
||||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
super(r);
|
||||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
this.setMappingQuality(100);
|
||||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
GATKBin.setReadIndexingBin(this, -1);
|
||||||
new byte[0]);
|
|
||||||
this.setReadBases(r.getReadBases());
|
|
||||||
this.setBaseQualities(r.getBaseQualities());
|
|
||||||
this.setReadName(r.getReadName());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ExponentialDistribution indelLengthDist = MathUtils.exponentialDistribution(1.0 / 0.9);
|
ExponentialDistribution indelLengthDist = MathUtils.exponentialDistribution(1.0 / 0.9);
|
||||||
|
|
||||||
public MyGATKSAMRecord(final GATKSAMRecord r, final Random rnd) {
|
public MyGATKSAMRecord(final GATKSAMRecord r, final Random rnd) {
|
||||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
super(r);
|
||||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
this.setMappingQuality(100);
|
||||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
// setting read indexing bin last
|
||||||
new byte[0]);
|
|
||||||
final byte[] bases = new byte[r.getReadBases().length];
|
final byte[] bases = new byte[r.getReadBases().length];
|
||||||
|
|
||||||
final byte[] readBases = r.getReadBases();
|
final byte[] readBases = r.getReadBases();
|
||||||
|
|
@ -384,7 +381,7 @@ public class ActiveRegionTestDataSet {
|
||||||
this.setBaseQualities(r.getBaseQualities());
|
this.setBaseQualities(r.getBaseQualities());
|
||||||
this.setReadName(r.getReadName());
|
this.setReadName(r.getReadName());
|
||||||
|
|
||||||
|
GATKBin.setReadIndexingBin(this, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int generateIndelLength(final Random rnd) {
|
private int generateIndelLength(final Random rnd) {
|
||||||
|
|
|
||||||
|
|
@ -46,9 +46,13 @@ public class MyExampleWalkerIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private File getResource(String path) throws URISyntaxException {
|
private File getResource(String path) throws URISyntaxException {
|
||||||
|
return new File(publicTestDir, path);
|
||||||
|
/*
|
||||||
|
TODO: Enable proper resource extraction from the test jars. For now just use the publicTestDir path.
|
||||||
URL resourceUrl = getClass().getResource(path);
|
URL resourceUrl = getClass().getResource(path);
|
||||||
if (resourceUrl == null)
|
if (resourceUrl == null)
|
||||||
throw new MissingResourceException("Resource not found: " + path, getClass().getSimpleName(), path);
|
throw new MissingResourceException("Resource not found: " + path, getClass().getSimpleName(), path);
|
||||||
return new File(resourceUrl.toURI());
|
return new File(resourceUrl.toURI());
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.io.stubs.Stub;
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
|
||||||
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
|
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
|
||||||
|
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||||
import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils;
|
import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils;
|
||||||
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
||||||
|
|
@ -697,9 +698,12 @@ public class GenomeAnalysisEngine {
|
||||||
* @param outputTracker the tracker supplying the initialization data.
|
* @param outputTracker the tracker supplying the initialization data.
|
||||||
*/
|
*/
|
||||||
private void initializeOutputStreams(final OutputTracker outputTracker) {
|
private void initializeOutputStreams(final OutputTracker outputTracker) {
|
||||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
|
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet()) {
|
||||||
|
setReferenceFile(input.getValue());
|
||||||
outputTracker.addInput(input.getKey(), input.getValue());
|
outputTracker.addInput(input.getKey(), input.getValue());
|
||||||
|
}
|
||||||
for (final Stub<?> stub : getOutputs()) {
|
for (final Stub<?> stub : getOutputs()) {
|
||||||
|
setReferenceFile(stub);
|
||||||
stub.processArguments(argCollection);
|
stub.processArguments(argCollection);
|
||||||
outputTracker.addOutput(stub);
|
outputTracker.addOutput(stub);
|
||||||
}
|
}
|
||||||
|
|
@ -707,6 +711,12 @@ public class GenomeAnalysisEngine {
|
||||||
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
|
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setReferenceFile(final Object object) {
|
||||||
|
if (object instanceof ReferenceBacked) {
|
||||||
|
((ReferenceBacked)object).setReferenceFile(argCollection.referenceFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public ReferenceDataSource getReferenceDataSource() {
|
public ReferenceDataSource getReferenceDataSource() {
|
||||||
return referenceDataSource;
|
return referenceDataSource;
|
||||||
}
|
}
|
||||||
|
|
@ -907,6 +917,7 @@ public class GenomeAnalysisEngine {
|
||||||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
||||||
|
|
||||||
return new SAMDataSource(
|
return new SAMDataSource(
|
||||||
|
argCollection.referenceFile,
|
||||||
samReaderIDs,
|
samReaderIDs,
|
||||||
threadAllocation,
|
threadAllocation,
|
||||||
argCollection.numberOfBAMFileHandles,
|
argCollection.numberOfBAMFileHandles,
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import htsjdk.samtools.MergingSamRecordIterator;
|
||||||
import htsjdk.samtools.SamFileHeaderMerger;
|
import htsjdk.samtools.SamFileHeaderMerger;
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
import htsjdk.samtools.util.CloseableIterator;
|
import htsjdk.samtools.util.CloseableIterator;
|
||||||
|
import htsjdk.samtools.util.CloserUtil;
|
||||||
import htsjdk.samtools.util.RuntimeIOException;
|
import htsjdk.samtools.util.RuntimeIOException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.engine.ReadMetrics;
|
import org.broadinstitute.gatk.engine.ReadMetrics;
|
||||||
|
|
@ -49,7 +50,8 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||||
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -64,7 +66,8 @@ import java.util.concurrent.Callable;
|
||||||
* Converts shards to SAM iterators over the specified region
|
* Converts shards to SAM iterators over the specified region
|
||||||
*/
|
*/
|
||||||
public class SAMDataSource {
|
public class SAMDataSource {
|
||||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
/** Reference file */
|
||||||
|
private final File referenceFile;
|
||||||
|
|
||||||
/** Backing support for reads. */
|
/** Backing support for reads. */
|
||||||
protected final ReadProperties readProperties;
|
protected final ReadProperties readProperties;
|
||||||
|
|
@ -177,8 +180,11 @@ public class SAMDataSource {
|
||||||
*
|
*
|
||||||
* @param samFiles list of reads files.
|
* @param samFiles list of reads files.
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(Collection<SAMReaderID> samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) {
|
public SAMDataSource(final File referenceFile, final Collection<SAMReaderID> samFiles,
|
||||||
|
final ThreadAllocation threadAllocation, final Integer numFileHandles,
|
||||||
|
final GenomeLocParser genomeLocParser) {
|
||||||
this(
|
this(
|
||||||
|
referenceFile,
|
||||||
samFiles,
|
samFiles,
|
||||||
threadAllocation,
|
threadAllocation,
|
||||||
numFileHandles,
|
numFileHandles,
|
||||||
|
|
@ -198,6 +204,7 @@ public class SAMDataSource {
|
||||||
* For testing purposes
|
* For testing purposes
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(
|
public SAMDataSource(
|
||||||
|
final File referenceFile,
|
||||||
Collection<SAMReaderID> samFiles,
|
Collection<SAMReaderID> samFiles,
|
||||||
ThreadAllocation threadAllocation,
|
ThreadAllocation threadAllocation,
|
||||||
Integer numFileHandles,
|
Integer numFileHandles,
|
||||||
|
|
@ -209,7 +216,8 @@ public class SAMDataSource {
|
||||||
ValidationExclusion exclusionList,
|
ValidationExclusion exclusionList,
|
||||||
Collection<ReadFilter> supplementalFilters,
|
Collection<ReadFilter> supplementalFilters,
|
||||||
boolean includeReadsWithDeletionAtLoci) {
|
boolean includeReadsWithDeletionAtLoci) {
|
||||||
this( samFiles,
|
this( referenceFile,
|
||||||
|
samFiles,
|
||||||
threadAllocation,
|
threadAllocation,
|
||||||
numFileHandles,
|
numFileHandles,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -230,6 +238,7 @@ public class SAMDataSource {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new SAM data source given the supplied read metadata.
|
* Create a new SAM data source given the supplied read metadata.
|
||||||
|
* @param referenceFile reference file.
|
||||||
* @param samFiles list of reads files.
|
* @param samFiles list of reads files.
|
||||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||||
* @param strictness Stringency of reads file parsing.
|
* @param strictness Stringency of reads file parsing.
|
||||||
|
|
@ -247,6 +256,7 @@ public class SAMDataSource {
|
||||||
* @param intervalMergingRule how are adjacent intervals merged by the sharder
|
* @param intervalMergingRule how are adjacent intervals merged by the sharder
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(
|
public SAMDataSource(
|
||||||
|
final File referenceFile,
|
||||||
Collection<SAMReaderID> samFiles,
|
Collection<SAMReaderID> samFiles,
|
||||||
ThreadAllocation threadAllocation,
|
ThreadAllocation threadAllocation,
|
||||||
Integer numFileHandles,
|
Integer numFileHandles,
|
||||||
|
|
@ -265,6 +275,7 @@ public class SAMDataSource {
|
||||||
final Map<String, String> sampleRenameMap,
|
final Map<String, String> sampleRenameMap,
|
||||||
final IntervalMergingRule intervalMergingRule) {
|
final IntervalMergingRule intervalMergingRule) {
|
||||||
|
|
||||||
|
this.referenceFile = referenceFile;
|
||||||
this.readMetrics = new ReadMetrics();
|
this.readMetrics = new ReadMetrics();
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.intervalMergingRule = intervalMergingRule;
|
this.intervalMergingRule = intervalMergingRule;
|
||||||
|
|
@ -303,7 +314,7 @@ public class SAMDataSource {
|
||||||
"Please check that the file is present and readable and try again.");
|
"Please check that the file is present and readable and try again.");
|
||||||
|
|
||||||
// Get the sort order, forcing it to coordinate if unsorted.
|
// Get the sort order, forcing it to coordinate if unsorted.
|
||||||
SAMFileReader reader = readers.getReader(readerID);
|
SamReader reader = readers.getReader(readerID);
|
||||||
SAMFileHeader header = reader.getFileHeader();
|
SAMFileHeader header = reader.getFileHeader();
|
||||||
|
|
||||||
headers.put(readerID,header);
|
headers.put(readerID,header);
|
||||||
|
|
@ -343,7 +354,7 @@ public class SAMDataSource {
|
||||||
// cache the read group id (original) -> read group id (merged)
|
// cache the read group id (original) -> read group id (merged)
|
||||||
// and read group id (merged) -> read group id (original) mappings.
|
// and read group id (merged) -> read group id (original) mappings.
|
||||||
for(SAMReaderID id: readerIDs) {
|
for(SAMReaderID id: readerIDs) {
|
||||||
SAMFileReader reader = readers.getReader(id);
|
SamReader reader = readers.getReader(id);
|
||||||
|
|
||||||
ReadGroupMapping mappingToMerged = new ReadGroupMapping();
|
ReadGroupMapping mappingToMerged = new ReadGroupMapping();
|
||||||
|
|
||||||
|
|
@ -385,8 +396,8 @@ public class SAMDataSource {
|
||||||
public void close() {
|
public void close() {
|
||||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||||
for(SAMReaderID readerID: readerIDs) {
|
for(SAMReaderID readerID: readerIDs) {
|
||||||
SAMFileReader reader = readers.getReader(readerID);
|
SamReader reader = readers.getReader(readerID);
|
||||||
reader.close();
|
CloserUtil.close(reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -463,14 +474,6 @@ public class SAMDataSource {
|
||||||
return mergedToOriginalReadGroupMappings.get(mergedReadGroupId);
|
return mergedToOriginalReadGroupMappings.get(mergedReadGroupId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* True if all readers have an index.
|
|
||||||
* @return True if all readers have an index.
|
|
||||||
*/
|
|
||||||
public boolean hasIndex() {
|
|
||||||
return readerIDs.size() == bamIndices.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the index for a particular reader. Always preloaded.
|
* Gets the index for a particular reader. Always preloaded.
|
||||||
* @param id Id of the reader.
|
* @param id Id of the reader.
|
||||||
|
|
@ -480,6 +483,44 @@ public class SAMDataSource {
|
||||||
return bamIndices.get(id);
|
return bamIndices.get(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return true if the index for a particular reader exists.
|
||||||
|
* @param id Id of the reader.
|
||||||
|
* @return True if the index exists.
|
||||||
|
*/
|
||||||
|
public boolean hasIndex(final SAMReaderID id) {
|
||||||
|
return bamIndices.containsKey(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if all readers that require an index for SAMFileSpan creation have an index.
|
||||||
|
* @return True if all readers that require an index for SAMFileSpan creation have an index.
|
||||||
|
*/
|
||||||
|
public boolean hasIndex() {
|
||||||
|
for (final SAMReaderID readerID: readerIDs)
|
||||||
|
if (isSAMFileSpanSupported(readerID))
|
||||||
|
if (!hasIndex(readerID))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns true if the reader can use file spans.
|
||||||
|
* @return true if file spans are supported.
|
||||||
|
*/
|
||||||
|
private boolean isSAMFileSpanSupported(final SAMReaderID readerID) {
|
||||||
|
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/BAMFileReader.java#L341
|
||||||
|
return readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.BAM_TYPE.fileExtension());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the reader caches its SAMFileHeader for each iterator.
|
||||||
|
* @return true if this reader caches its SAMFileHeader for each iterator.
|
||||||
|
*/
|
||||||
|
private boolean isIteratorSAMFileHeaderCached(final SAMReaderID readerID) {
|
||||||
|
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/CRAMFileReader.java#L183
|
||||||
|
return !readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.CRAM_TYPE.fileExtension());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the sort order of the readers.
|
* Retrieves the sort order of the readers.
|
||||||
* @return Sort order. Can be unsorted, coordinate order, or query name order.
|
* @return Sort order. Can be unsorted, coordinate order, or query name order.
|
||||||
|
|
@ -538,7 +579,17 @@ public class SAMDataSource {
|
||||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||||
|
|
||||||
for ( SAMReaderID id: getReaderIDs() ) {
|
for ( SAMReaderID id: getReaderIDs() ) {
|
||||||
initialPositions.put(id, new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
|
GATKBAMFileSpan span;
|
||||||
|
try {
|
||||||
|
span = new GATKBAMFileSpan(readers.getReader(id).indexing().getFilePointerSpanningReads());
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
if ("Not implemented.".equals(e.getMessage())) { https://github.com/samtools/htsjdk/blob/035d4319643657d715e93c53c13fe4a1f64e0188/src/java/htsjdk/samtools/CRAMFileReader.java#L197
|
||||||
|
span = new GATKBAMFileSpan(new GATKChunk(0, Long.MAX_VALUE));
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
initialPositions.put(id, span);
|
||||||
}
|
}
|
||||||
|
|
||||||
resourcePool.releaseReaders(readers);
|
resourcePool.releaseReaders(readers);
|
||||||
|
|
@ -567,7 +618,7 @@ public class SAMDataSource {
|
||||||
Map<SamReader,CloseableIterator<SAMRecord>> iteratorMap = new HashMap<>();
|
Map<SamReader,CloseableIterator<SAMRecord>> iteratorMap = new HashMap<>();
|
||||||
|
|
||||||
for(SAMReaderID id: getReaderIDs()) {
|
for(SAMReaderID id: getReaderIDs()) {
|
||||||
CloseableIterator<SAMRecord> iterator = null;
|
CloseableIterator<SAMRecord> iterator;
|
||||||
|
|
||||||
// TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin.
|
// TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin.
|
||||||
// TODO: Kill this check once we've proven that the design elements are gone.
|
// TODO: Kill this check once we've proven that the design elements are gone.
|
||||||
|
|
@ -576,19 +627,33 @@ public class SAMDataSource {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if(threadAllocation.getNumIOThreads() > 0) {
|
if(threadAllocation.getNumIOThreads() > 0) {
|
||||||
|
// TODO: need to add friendly error if -nit is used with non BAM. Later, possibly add this capability with CRAM when htsjdk supports CRAM file spans are supported.
|
||||||
BlockInputStream inputStream = readers.getInputStream(id);
|
BlockInputStream inputStream = readers.getInputStream(id);
|
||||||
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
||||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
|
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id));
|
||||||
codec.setInputStream(inputStream);
|
codec.setInputStream(inputStream);
|
||||||
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
final SamReader reader = readers.getReader(id);
|
||||||
|
try {
|
||||||
|
iterator = ((SamReader.Indexing)reader).iterator(shard.getFileSpans().get(id));
|
||||||
|
} catch (RuntimeException re) {
|
||||||
|
if ("Not implemented.".equals(re.getMessage())) { // https://github.com/samtools/htsjdk/blob/429f2a8585d9c98a3efd4cedc5188b60b1e66ac5/src/java/htsjdk/samtools/CRAMFileReader.java#L192
|
||||||
|
// No way to jump into the file span. Query the whole file.
|
||||||
|
iterator = readers.getReader(id).iterator();
|
||||||
|
} else {
|
||||||
|
throw re;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
|
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
|
||||||
throw new UserException.MalformedBAM(id.getSamFile(), e.getMessage());
|
throw new UserException.MalformedBAM(id.getSamFile(), e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// At the moment, too many other classes to change for GATKSAMRecordIterator converter.
|
||||||
|
// Force the compiler to just let the conversion happen, since generics are erased anyway.
|
||||||
|
iterator = (CloseableIterator<SAMRecord>)(Object)new GATKSAMRecordIterator(iterator);
|
||||||
iterator = new MalformedBAMErrorReformatingIterator(id.getSamFile(), iterator);
|
iterator = new MalformedBAMErrorReformatingIterator(id.getSamFile(), iterator);
|
||||||
if(shard.getGenomeLocs().size() > 0)
|
if(shard.getGenomeLocs().size() > 0)
|
||||||
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
||||||
|
|
@ -614,11 +679,11 @@ public class SAMDataSource {
|
||||||
|
|
||||||
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
|
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
|
||||||
private final BlockInputStream inputStream;
|
private final BlockInputStream inputStream;
|
||||||
private final SAMFileReader reader;
|
private final SamReader reader;
|
||||||
private final BAMRecordCodec codec;
|
private final BAMRecordCodec codec;
|
||||||
private SAMRecord nextRead;
|
private SAMRecord nextRead;
|
||||||
|
|
||||||
private BAMCodecIterator(final BlockInputStream inputStream, final SAMFileReader reader, final BAMRecordCodec codec) {
|
private BAMCodecIterator(final BlockInputStream inputStream, final SamReader reader, final BAMRecordCodec codec) {
|
||||||
this.inputStream = inputStream;
|
this.inputStream = inputStream;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.codec = codec;
|
this.codec = codec;
|
||||||
|
|
@ -823,7 +888,7 @@ public class SAMDataSource {
|
||||||
/**
|
/**
|
||||||
* A collection of readers derived from a reads metadata structure.
|
* A collection of readers derived from a reads metadata structure.
|
||||||
*/
|
*/
|
||||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
private class SAMReaders implements Iterable<SamReader> {
|
||||||
/**
|
/**
|
||||||
* Cached representation of the merged header used to generate a merging iterator.
|
* Cached representation of the merged header used to generate a merging iterator.
|
||||||
*/
|
*/
|
||||||
|
|
@ -832,7 +897,7 @@ public class SAMDataSource {
|
||||||
/**
|
/**
|
||||||
* Internal storage for a map of id -> reader.
|
* Internal storage for a map of id -> reader.
|
||||||
*/
|
*/
|
||||||
private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();
|
private final Map<SAMReaderID,SamReader> readers = new LinkedHashMap<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The inptu streams backing
|
* The inptu streams backing
|
||||||
|
|
@ -860,7 +925,11 @@ public class SAMDataSource {
|
||||||
|
|
||||||
checkForUnsupportedBamFile(init.reader.getFileHeader());
|
checkForUnsupportedBamFile(init.reader.getFileHeader());
|
||||||
|
|
||||||
if (removeProgramRecords) {
|
if (removeProgramRecords && isIteratorSAMFileHeaderCached(readerID)) {
|
||||||
|
// Only works when the SamReader implementation caches its header.
|
||||||
|
// Some implementations (ex: CRAM) rewrite the new underlying file header in reader.getIterator().
|
||||||
|
// Later, when MergingSamRecordIterator goes to check the headers with .contains()/.equals(),
|
||||||
|
// it will error out complaining it can't find the unmodified version of the header.
|
||||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -883,9 +952,9 @@ public class SAMDataSource {
|
||||||
|
|
||||||
// Examine the bam headers, perform any requested sample renaming on them, and add
|
// Examine the bam headers, perform any requested sample renaming on them, and add
|
||||||
// them to the list of headers to pass to the Picard SamFileHeaderMerger:
|
// them to the list of headers to pass to the Picard SamFileHeaderMerger:
|
||||||
for ( final Map.Entry<SAMReaderID, SAMFileReader> readerEntry : readers.entrySet() ) {
|
for ( final Map.Entry<SAMReaderID, SamReader> readerEntry : readers.entrySet() ) {
|
||||||
final SAMReaderID readerID = readerEntry.getKey();
|
final SAMReaderID readerID = readerEntry.getKey();
|
||||||
final SAMFileReader reader = readerEntry.getValue();
|
final SamReader reader = readerEntry.getValue();
|
||||||
final SAMFileHeader header = reader.getFileHeader();
|
final SAMFileHeader header = reader.getFileHeader();
|
||||||
|
|
||||||
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
||||||
|
|
@ -1009,7 +1078,7 @@ public class SAMDataSource {
|
||||||
* @param id The ID of the reader to retrieve.
|
* @param id The ID of the reader to retrieve.
|
||||||
* @return the reader associated with the given id.
|
* @return the reader associated with the given id.
|
||||||
*/
|
*/
|
||||||
public SAMFileReader getReader(SAMReaderID id) {
|
public SamReader getReader(SAMReaderID id) {
|
||||||
if(!readers.containsKey(id))
|
if(!readers.containsKey(id))
|
||||||
throw new NoSuchElementException("No reader is associated with id " + id);
|
throw new NoSuchElementException("No reader is associated with id " + id);
|
||||||
return readers.get(id);
|
return readers.get(id);
|
||||||
|
|
@ -1030,7 +1099,7 @@ public class SAMDataSource {
|
||||||
* @return The id associated the given reader, or null if the reader is not present in this collection.
|
* @return The id associated the given reader, or null if the reader is not present in this collection.
|
||||||
*/
|
*/
|
||||||
protected SAMReaderID getReaderID(SamReader reader) {
|
protected SAMReaderID getReaderID(SamReader reader) {
|
||||||
for(Map.Entry<SAMReaderID,SAMFileReader> entry: readers.entrySet()) {
|
for(Map.Entry<SAMReaderID,SamReader> entry: readers.entrySet()) {
|
||||||
if(reader == entry.getValue())
|
if(reader == entry.getValue())
|
||||||
return entry.getKey();
|
return entry.getKey();
|
||||||
}
|
}
|
||||||
|
|
@ -1042,7 +1111,7 @@ public class SAMDataSource {
|
||||||
* Returns an iterator over all readers in this structure.
|
* Returns an iterator over all readers in this structure.
|
||||||
* @return An iterator over readers.
|
* @return An iterator over readers.
|
||||||
*/
|
*/
|
||||||
public Iterator<SAMFileReader> iterator() {
|
public Iterator<SamReader> iterator() {
|
||||||
return readers.values().iterator();
|
return readers.values().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1058,18 +1127,23 @@ public class SAMDataSource {
|
||||||
class ReaderInitializer implements Callable<ReaderInitializer> {
|
class ReaderInitializer implements Callable<ReaderInitializer> {
|
||||||
final SAMReaderID readerID;
|
final SAMReaderID readerID;
|
||||||
BlockInputStream blockInputStream = null;
|
BlockInputStream blockInputStream = null;
|
||||||
SAMFileReader reader;
|
SamReader reader;
|
||||||
|
|
||||||
public ReaderInitializer(final SAMReaderID readerID) {
|
public ReaderInitializer(final SAMReaderID readerID) {
|
||||||
this.readerID = readerID;
|
this.readerID = readerID;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReaderInitializer call() {
|
public ReaderInitializer call() {
|
||||||
final File indexFile = findIndexFile(readerID.getSamFile());
|
|
||||||
try {
|
try {
|
||||||
if (threadAllocation.getNumIOThreads() > 0)
|
if (threadAllocation.getNumIOThreads() > 0)
|
||||||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||||
reader = new SAMFileReader(readerID.getSamFile(),indexFile,false);
|
reader = SamReaderFactory.makeDefault()
|
||||||
|
.referenceSequence(referenceFile)
|
||||||
|
.validationStringency(validationStringency)
|
||||||
|
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
|
||||||
|
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
||||||
|
.open(readerID.getSamFile());
|
||||||
|
|
||||||
} catch ( RuntimeIOException e ) {
|
} catch ( RuntimeIOException e ) {
|
||||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
||||||
} catch ( SAMFormatException e ) {
|
} catch ( SAMFormatException e ) {
|
||||||
|
|
@ -1081,9 +1155,6 @@ public class SAMDataSource {
|
||||||
catch ( RuntimeException e ) {
|
catch ( RuntimeException e ) {
|
||||||
throw new UserException.MalformedBAM(readerID.getSamFile(), e.getMessage());
|
throw new UserException.MalformedBAM(readerID.getSamFile(), e.getMessage());
|
||||||
}
|
}
|
||||||
reader.setSAMRecordFactory(factory);
|
|
||||||
reader.enableFileSource(true);
|
|
||||||
reader.setValidationStringency(validationStringency);
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,7 @@ public class FindLargeShards extends CommandLineProgram {
|
||||||
|
|
||||||
// initialize reads
|
// initialize reads
|
||||||
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
||||||
SAMDataSource dataSource = new SAMDataSource(bamReaders,new ThreadAllocation(),null,genomeLocParser);
|
SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||||
|
|
||||||
// intervals
|
// intervals
|
||||||
final GenomeLocSortedSet intervalSortedSet;
|
final GenomeLocSortedSet intervalSortedSet;
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.io;
|
package org.broadinstitute.gatk.engine.io;
|
||||||
|
|
||||||
import htsjdk.samtools.SAMFileReader;
|
|
||||||
import htsjdk.samtools.ValidationStringency;
|
import htsjdk.samtools.ValidationStringency;
|
||||||
|
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||||
import org.broadinstitute.gatk.utils.commandline.ArgumentSource;
|
import org.broadinstitute.gatk.utils.commandline.ArgumentSource;
|
||||||
import org.broadinstitute.gatk.engine.io.storage.Storage;
|
import org.broadinstitute.gatk.engine.io.storage.Storage;
|
||||||
import org.broadinstitute.gatk.engine.io.storage.StorageFactory;
|
import org.broadinstitute.gatk.engine.io.storage.StorageFactory;
|
||||||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.gatk.utils.classloader.JVMUtils;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.io.IOUtils;
|
import org.broadinstitute.gatk.utils.io.IOUtils;
|
||||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
|
@ -49,7 +49,12 @@ import java.util.Map;
|
||||||
* Manages the output and err streams that are created specifically for walker
|
* Manages the output and err streams that are created specifically for walker
|
||||||
* output.
|
* output.
|
||||||
*/
|
*/
|
||||||
public abstract class OutputTracker {
|
public abstract class OutputTracker implements ReferenceBacked {
|
||||||
|
/**
|
||||||
|
* The reference file.
|
||||||
|
*/
|
||||||
|
private File referenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The streams to which walker users should be reading directly.
|
* The streams to which walker users should be reading directly.
|
||||||
*/
|
*/
|
||||||
|
|
@ -78,6 +83,16 @@ public abstract class OutputTracker {
|
||||||
*/
|
*/
|
||||||
public abstract <T> T getStorage( Stub<T> stub );
|
public abstract <T> T getStorage( Stub<T> stub );
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public File getReferenceFile() {
|
||||||
|
return referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setReferenceFile(final File referenceFile) {
|
||||||
|
this.referenceFile = referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
public void prepareWalker( Walker walker, ValidationStringency strictnessLevel ) {
|
public void prepareWalker( Walker walker, ValidationStringency strictnessLevel ) {
|
||||||
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
|
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
|
||||||
ArgumentSource targetField = io.getKey();
|
ArgumentSource targetField = io.getKey();
|
||||||
|
|
@ -85,8 +100,8 @@ public abstract class OutputTracker {
|
||||||
|
|
||||||
// Ghastly hack: reaches in and finishes building out the SAMFileReader.
|
// Ghastly hack: reaches in and finishes building out the SAMFileReader.
|
||||||
// TODO: Generalize this, and move it to its own initialization step.
|
// TODO: Generalize this, and move it to its own initialization step.
|
||||||
if( targetValue instanceof SAMFileReaderBuilder) {
|
if( targetValue instanceof SAMReaderBuilder) {
|
||||||
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
|
SAMReaderBuilder builder = (SAMReaderBuilder)targetValue;
|
||||||
builder.setValidationStringency(strictnessLevel);
|
builder.setValidationStringency(strictnessLevel);
|
||||||
targetValue = builder.build();
|
targetValue = builder.build();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,9 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
|
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
|
|
@ -46,6 +49,7 @@ import java.lang.reflect.Method;
|
||||||
*/
|
*/
|
||||||
public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWriter> {
|
public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWriter> {
|
||||||
private final File file;
|
private final File file;
|
||||||
|
private File referenceFasta;
|
||||||
private SAMFileWriter writer;
|
private SAMFileWriter writer;
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
|
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
|
||||||
|
|
@ -55,6 +59,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
|
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
|
||||||
|
this.referenceFasta = stub.getReferenceFile();
|
||||||
this.file = file;
|
this.file = file;
|
||||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||||
// Enable automatic index creation for pre-sorted BAMs.
|
// Enable automatic index creation for pre-sorted BAMs.
|
||||||
|
|
@ -69,9 +74,14 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
||||||
|
|
||||||
if(stub.getOutputFile() != null) {
|
if(stub.getOutputFile() != null) {
|
||||||
try {
|
try {
|
||||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
if (stub.getOutputFile().getName().toLowerCase().endsWith(".cram")) {
|
||||||
}
|
this.writer = createCRAMWriter(factory, stub.getFileHeader(), new FileOutputStream(file), this.referenceFasta);
|
||||||
catch(RuntimeIOException ex) {
|
} else {
|
||||||
|
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||||
|
}
|
||||||
|
} catch(IOException ex) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(file, "file could not be created", ex);
|
||||||
|
} catch(RuntimeIOException ex) {
|
||||||
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
|
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -117,6 +127,13 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SAMFileWriter createCRAMWriter(final SAMFileWriterFactory factory,
|
||||||
|
final SAMFileHeader header,
|
||||||
|
final OutputStream outputStream,
|
||||||
|
final File referenceFasta) {
|
||||||
|
return factory.makeCRAMWriter(header, outputStream, referenceFasta);
|
||||||
|
}
|
||||||
|
|
||||||
private SAMFileWriter createBAMWriter(final SAMFileWriterFactory factory,
|
private SAMFileWriter createBAMWriter(final SAMFileWriterFactory factory,
|
||||||
final SAMFileHeader header,
|
final SAMFileHeader header,
|
||||||
final boolean presorted,
|
final boolean presorted,
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||||
|
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||||
|
|
@ -50,7 +51,7 @@ import java.util.List;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter {
|
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter, ReferenceBacked {
|
||||||
/**
|
/**
|
||||||
* Engine to use for collecting attributes for the output SAM file.
|
* Engine to use for collecting attributes for the output SAM file.
|
||||||
*/
|
*/
|
||||||
|
|
@ -67,6 +68,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
||||||
*/
|
*/
|
||||||
private final File samFile;
|
private final File samFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The reference file for stub.
|
||||||
|
*/
|
||||||
|
private File referenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The target output stream, to be used in place of the SAM file.
|
* The target output stream, to be used in place of the SAM file.
|
||||||
*/
|
*/
|
||||||
|
|
@ -189,6 +195,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
||||||
return samOutputStream;
|
return samOutputStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public File getReferenceFile() {
|
||||||
|
return referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setReferenceFile(final File referenceFile) {
|
||||||
|
this.referenceFile = referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the header to use when creating the new SAM file.
|
* Retrieves the header to use when creating the new SAM file.
|
||||||
* @return header to use when creating the new SAM file.
|
* @return header to use when creating the new SAM file.
|
||||||
|
|
|
||||||
|
|
@ -29,14 +29,14 @@ import htsjdk.samtools.SAMFileReader;
|
||||||
import org.broadinstitute.gatk.utils.commandline.*;
|
import org.broadinstitute.gatk.utils.commandline.*;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||||
|
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Describe how to parse SAMFileReaders.
|
* Describe how to parse SAMReaders.
|
||||||
*/
|
*/
|
||||||
public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
/**
|
/**
|
||||||
* The engine into which output stubs should be fed.
|
* The engine into which output stubs should be fed.
|
||||||
*/
|
*/
|
||||||
|
|
@ -46,7 +46,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created.
|
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created.
|
||||||
* @param engine engine
|
* @param engine engine
|
||||||
*/
|
*/
|
||||||
public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) {
|
public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
|
||||||
this.engine = engine;
|
this.engine = engine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -57,7 +57,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
SAMReaderBuilder builder = new SAMReaderBuilder();
|
||||||
|
|
||||||
ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||||
|
|
||||||
|
|
@ -71,7 +71,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
|
|
||||||
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then
|
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then
|
||||||
// let the output tracker load it in.
|
// let the output tracker load it in.
|
||||||
// TODO: Add a stub for SAMFileReader.
|
// TODO: Add a stub for SAMReader.
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.collections.Pair;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
import htsjdk.variant.vcf.VCFCodec;
|
import htsjdk.variant.vcf.VCFCodec;
|
||||||
import htsjdk.variant.vcf.VCFHeader;
|
import htsjdk.variant.vcf.VCFHeader;
|
||||||
|
|
@ -312,7 +311,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
||||||
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
|
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
|
||||||
final SAMFileReader reader = new SAMFileReader(outputBam);
|
final SAMFileReader reader = new SAMFileReader(outputBam);
|
||||||
reader.setValidationStringency(ValidationStringency.SILENT);
|
reader.setValidationStringency(ValidationStringency.SILENT);
|
||||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
|
||||||
|
|
||||||
final SAMRecord read = reader.iterator().next();
|
final SAMRecord read = reader.iterator().next();
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
|
|
||||||
// Test the accuracy of the read metrics
|
// Test the accuracy of the read metrics
|
||||||
|
|
||||||
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile reference;
|
private IndexedFastaSequenceFile reference;
|
||||||
private SAMSequenceDictionary dictionary;
|
private SAMSequenceDictionary dictionary;
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
@ -93,7 +94,8 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
private void init() throws IOException {
|
private void init() throws IOException {
|
||||||
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
referenceFile = new File(b37KGReference);
|
||||||
|
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
dictionary = reference.getSequenceDictionary();
|
dictionary = reference.getSequenceDictionary();
|
||||||
genomeLocParser = new GenomeLocParser(dictionary);
|
genomeLocParser = new GenomeLocParser(dictionary);
|
||||||
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||||
|
|
@ -149,7 +151,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||||
samFiles.add(readerID);
|
samFiles.add(readerID);
|
||||||
|
|
||||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||||
false,
|
false,
|
||||||
ValidationStringency.STRICT,
|
ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
@ -184,7 +186,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||||
samFiles.add(readerID);
|
samFiles.add(readerID);
|
||||||
|
|
||||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||||
false,
|
false,
|
||||||
ValidationStringency.STRICT,
|
ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
@ -225,7 +227,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||||
samFiles.add(readerID);
|
samFiles.add(readerID);
|
||||||
|
|
||||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||||
false,
|
false,
|
||||||
ValidationStringency.STRICT,
|
ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
@ -272,7 +274,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
final List<ReadFilter> filters = new ArrayList<>();
|
final List<ReadFilter> filters = new ArrayList<>();
|
||||||
filters.add(new EveryTenthReadFilter());
|
filters.add(new EveryTenthReadFilter());
|
||||||
|
|
||||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||||
false,
|
false,
|
||||||
ValidationStringency.STRICT,
|
ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.engine.arguments;
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the GATK core CRAM parsing mechanism.
|
||||||
|
*/
|
||||||
|
public class CramIntegrationTest extends WalkerTest {
|
||||||
|
@DataProvider(name="cramData")
|
||||||
|
public Object[][] getCRAMData() {
|
||||||
|
return new Object[][] {
|
||||||
|
{"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"},
|
||||||
|
//{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148
|
||||||
|
{"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||||
|
{"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||||
|
{"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||||
|
{"PrintReads", "exampleCRAM-noindex.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||||
|
{"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||||
|
{"CountLoci", "exampleCRAM-noindex.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||||
|
{"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||||
|
{"CountLoci", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||||
|
{"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||||
|
{"CountReads", "exampleCRAM-noindex.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||||
|
{"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||||
|
{"CountReads", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||||
|
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "9598062587ad8d2ec596a8ecb19be979"},
|
||||||
|
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||||
|
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "cramData")
|
||||||
|
public void testCRAM(String walker, String input, String args, String ext, String md5) {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
" -T Test" + walker + "Walker" +
|
||||||
|
" -I " + publicTestDir + input +
|
||||||
|
" -R " + exampleFASTA +
|
||||||
|
args +
|
||||||
|
" -o %s",
|
||||||
|
1, // just one output file
|
||||||
|
Arrays.asList(ext),
|
||||||
|
Arrays.asList(md5));
|
||||||
|
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
||||||
SAMRecordIterator iterator = new SAMRecordIterator();
|
SAMRecordIterator iterator = new SAMRecordIterator();
|
||||||
|
|
||||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(null,Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
|
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ import java.util.Collections;
|
||||||
public class MockLocusShard extends LocusShard {
|
public class MockLocusShard extends LocusShard {
|
||||||
public MockLocusShard(final GenomeLocParser genomeLocParser,final List<GenomeLoc> intervals) {
|
public MockLocusShard(final GenomeLocParser genomeLocParser,final List<GenomeLoc> intervals) {
|
||||||
super( genomeLocParser,
|
super( genomeLocParser,
|
||||||
new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
new SAMDataSource(null, Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
||||||
intervals,
|
intervals,
|
||||||
null);
|
null);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
||||||
public void run() {
|
public void run() {
|
||||||
createTestBAM();
|
createTestBAM();
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(Arrays.asList(testBAM),
|
SAMDataSource dataSource = new SAMDataSource(null, // Reference not used in this test.
|
||||||
|
Arrays.asList(testBAM),
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
new GenomeLocParser(header.getSequenceDictionary()),
|
new GenomeLocParser(header.getSequenceDictionary()),
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
// TODO: These legacy tests should really be replaced with a more comprehensive suite of tests for SAMDataSource
|
// TODO: These legacy tests should really be replaced with a more comprehensive suite of tests for SAMDataSource
|
||||||
|
|
||||||
private List<SAMReaderID> readers;
|
private List<SAMReaderID> readers;
|
||||||
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile seq;
|
private IndexedFastaSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
|
@ -76,7 +77,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
readers = new ArrayList<SAMReaderID>();
|
readers = new ArrayList<SAMReaderID>();
|
||||||
|
|
||||||
// sequence
|
// sequence
|
||||||
seq = new CachingIndexedFastaSequenceFile(new File(b36KGReference));
|
referenceFile = new File(b36KGReference);
|
||||||
|
seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
|
genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -101,7 +103,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
|
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
|
||||||
|
|
||||||
// the sharding strat.
|
// the sharding strat.
|
||||||
SAMDataSource data = new SAMDataSource(readers,
|
SAMDataSource data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -155,7 +159,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
||||||
|
|
||||||
// use defaults
|
// use defaults
|
||||||
SAMDataSource data = new SAMDataSource(readers,
|
SAMDataSource data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -171,7 +177,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
||||||
|
|
||||||
boolean removeProgramRecords = false;
|
boolean removeProgramRecords = false;
|
||||||
data = new SAMDataSource(readers,
|
data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -192,7 +200,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||||
|
|
||||||
removeProgramRecords = true;
|
removeProgramRecords = true;
|
||||||
data = new SAMDataSource(readers,
|
data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -217,7 +227,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
public void testFailOnReducedReads() {
|
public void testFailOnReducedReads() {
|
||||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||||
|
|
||||||
SAMDataSource data = new SAMDataSource(readers,
|
SAMDataSource data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
@ -234,7 +246,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
public void testFailOnReducedReadsRemovingProgramRecords() {
|
public void testFailOnReducedReadsRemovingProgramRecords() {
|
||||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||||
|
|
||||||
SAMDataSource data = new SAMDataSource(readers,
|
SAMDataSource data = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
|
readers,
|
||||||
new ThreadAllocation(),
|
new ThreadAllocation(),
|
||||||
null,
|
null,
|
||||||
genomeLocParser,
|
genomeLocParser,
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||||
import org.testng.annotations.AfterClass;
|
import org.testng.annotations.AfterClass;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -313,6 +314,7 @@ public class ReadFilterTest extends BaseTest {
|
||||||
|
|
||||||
protected SAMDataSource composeDataSource() {
|
protected SAMDataSource composeDataSource() {
|
||||||
checkHeaderExists();
|
checkHeaderExists();
|
||||||
|
final File referenceFile = null; // Not used in this test.
|
||||||
final Set<SAMReaderID> readerIDs = new HashSet<>(1);
|
final Set<SAMReaderID> readerIDs = new HashSet<>(1);
|
||||||
final ThreadAllocation ta = new ThreadAllocation();
|
final ThreadAllocation ta = new ThreadAllocation();
|
||||||
final Integer numFileHandles = 1; // I believe that any value would do but need to confirm.
|
final Integer numFileHandles = 1; // I believe that any value would do but need to confirm.
|
||||||
|
|
@ -326,6 +328,7 @@ public class ReadFilterTest extends BaseTest {
|
||||||
|
|
||||||
final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary());
|
final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
final SAMDataSource res = new SAMDataSource(
|
final SAMDataSource res = new SAMDataSource(
|
||||||
|
referenceFile,
|
||||||
readerIDs,
|
readerIDs,
|
||||||
ta,
|
ta,
|
||||||
numFileHandles,
|
numFileHandles,
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
||||||
return traversals.toArray(new Object[][]{});
|
return traversals.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile reference;
|
private IndexedFastaSequenceFile reference;
|
||||||
private SAMSequenceDictionary dictionary;
|
private SAMSequenceDictionary dictionary;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
@ -90,7 +91,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
private void init() throws IOException {
|
private void init() throws IOException {
|
||||||
//reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference));
|
//reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference));
|
||||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
referenceFile = new File(hg19Reference);
|
||||||
|
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
dictionary = reference.getSequenceDictionary();
|
dictionary = reference.getSequenceDictionary();
|
||||||
genomeLocParser = new GenomeLocParser(dictionary);
|
genomeLocParser = new GenomeLocParser(dictionary);
|
||||||
|
|
||||||
|
|
@ -470,7 +472,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
||||||
SAMReaderID readerID = new SAMReaderID(bamFile, new Tags());
|
SAMReaderID readerID = new SAMReaderID(bamFile, new Tags());
|
||||||
samFiles.add(readerID);
|
samFiles.add(readerID);
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||||
false,
|
false,
|
||||||
ValidationStringency.STRICT,
|
ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
||||||
/** Test out that we can shard the file and iterate over every read */
|
/** Test out that we can shard the file and iterate over every read */
|
||||||
@Test
|
@Test
|
||||||
public void testUnmappedReadCount() {
|
public void testUnmappedReadCount() {
|
||||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
SAMDataSource dataSource = new SAMDataSource(refFile, bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
|
|
|
||||||
|
|
@ -44,8 +44,8 @@
|
||||||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||||
|
|
||||||
<!-- Version numbers for picard and htsjdk -->
|
<!-- Version numbers for picard and htsjdk -->
|
||||||
<htsjdk.version>1.127.1690</htsjdk.version>
|
<htsjdk.version>1.128.1696</htsjdk.version>
|
||||||
<picard.version>1.127.1667</picard.version>
|
<picard.version>1.128.1678</picard.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<!-- Dependency configuration (versions, etc.) -->
|
<!-- Dependency configuration (versions, etc.) -->
|
||||||
|
|
|
||||||
|
|
@ -132,4 +132,15 @@ public class GATKBin implements Comparable<GATKBin> {
|
||||||
return new GATKChunk[0];
|
return new GATKChunk[0];
|
||||||
return chunkList;
|
return chunkList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HACK: Using this classes package permissions to further hack the CRAM created SAMRecord's indexing bin and binary attributes.
|
||||||
|
public static Integer getReadIndexingBin(final SAMRecord read) {
|
||||||
|
return read.getIndexingBin();
|
||||||
|
}
|
||||||
|
public static void setReadIndexingBin(final SAMRecord read, final Integer indexingBin) {
|
||||||
|
read.setIndexingBin(indexingBin);
|
||||||
|
}
|
||||||
|
public static SAMBinaryTagAndValue getReadBinaryAttributes(final SAMRecord read) {
|
||||||
|
return read.getBinaryAttributes();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -225,20 +225,20 @@ public class UserException extends ReviewedGATKException {
|
||||||
|
|
||||||
public static class MissortedBAM extends UserException {
|
public static class MissortedBAM extends UserException {
|
||||||
public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) {
|
public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) {
|
||||||
super(String.format("Missorted Input SAM/BAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
super(String.format("Missorted Input SAM/BAM/CRAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public MissortedBAM(SAMFileHeader.SortOrder order, String message) {
|
public MissortedBAM(SAMFileHeader.SortOrder order, String message) {
|
||||||
super(String.format("Missorted Input SAM/BAM files: files are not sorted in %s order; %s", order, message));
|
super(String.format("Missorted Input SAM/BAM/CRAM files: files are not sorted in %s order; %s", order, message));
|
||||||
}
|
}
|
||||||
|
|
||||||
public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) {
|
public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) {
|
||||||
super(String.format("Missorted Input SAM/BAM file %s: file sorted in %s order but %s is required; %s",
|
super(String.format("Missorted Input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required; %s",
|
||||||
read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message));
|
read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message));
|
||||||
}
|
}
|
||||||
|
|
||||||
public MissortedBAM(String message) {
|
public MissortedBAM(String message) {
|
||||||
super(String.format("Missorted Input SAM/BAM files: %s", message));
|
super(String.format("Missorted Input SAM/BAM/CRAM files: %s", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -252,7 +252,7 @@ public class UserException extends ReviewedGATKException {
|
||||||
}
|
}
|
||||||
|
|
||||||
public MalformedBAM(String source, String message) {
|
public MalformedBAM(String source, String message) {
|
||||||
super(String.format("SAM/BAM file %s is malformed: %s", source, message));
|
super(String.format("SAM/BAM/CRAM file %s is malformed: %s", source, message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -262,7 +262,7 @@ public class UserException extends ReviewedGATKException {
|
||||||
}
|
}
|
||||||
|
|
||||||
public MisencodedBAM(String source, String message) {
|
public MisencodedBAM(String source, String message) {
|
||||||
super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.utils.io;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public interface ReferenceBacked {
|
||||||
|
public File getReferenceFile();
|
||||||
|
public void setReferenceFile(final File reference);
|
||||||
|
}
|
||||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||||
import org.broadinstitute.gatk.utils.*;
|
import org.broadinstitute.gatk.utils.*;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -67,7 +66,6 @@ public class LIBSPerformance extends CommandLineProgram {
|
||||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||||
|
|
||||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
|
||||||
|
|
||||||
SAMRecordIterator rawIterator;
|
SAMRecordIterator rawIterator;
|
||||||
if ( location == null )
|
if ( location == null )
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
package org.broadinstitute.gatk.utils.sam;
|
||||||
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.cram.build.CramIO;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
|
@ -149,7 +150,7 @@ public class ArtificialSAMFileReader extends SAMFileReader {
|
||||||
byte[] byteArray = "".getBytes("ISO-8859-1");
|
byte[] byteArray = "".getBytes("ISO-8859-1");
|
||||||
return new ByteArrayInputStream(byteArray);
|
return new ByteArrayInputStream(byteArray);
|
||||||
}
|
}
|
||||||
catch( UnsupportedEncodingException ex ) {
|
catch( Exception ex ) {
|
||||||
throw new ReviewedGATKException("Unable to build empty input stream",ex);
|
throw new ReviewedGATKException("Unable to build empty input stream",ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ import java.util.*;
|
||||||
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
||||||
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
public class GATKSAMRecord extends SAMRecord implements Cloneable {
|
||||||
// Base Quality Score Recalibrator specific attribute tags
|
// Base Quality Score Recalibrator specific attribute tags
|
||||||
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
|
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
|
||||||
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
|
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
|
||||||
|
|
@ -92,42 +92,36 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
||||||
* @param read
|
* @param read
|
||||||
*/
|
*/
|
||||||
public GATKSAMRecord(final SAMRecord read) {
|
public GATKSAMRecord(final SAMRecord read) {
|
||||||
super(read.getHeader(),
|
super(read.getHeader());
|
||||||
read.getReferenceIndex(),
|
super.setReferenceIndex(read.getReferenceIndex());
|
||||||
read.getAlignmentStart(),
|
super.setAlignmentStart(read.getAlignmentStart());
|
||||||
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
|
super.setReadName(read.getReadName());
|
||||||
(short)read.getMappingQuality(),
|
super.setMappingQuality(read.getMappingQuality());
|
||||||
0,
|
// indexing bin done below
|
||||||
read.getCigarLength(),
|
super.setCigar(read.getCigar());
|
||||||
read.getFlags(),
|
super.setFlags(read.getFlags());
|
||||||
read.getReadLength(),
|
super.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||||
read.getMateReferenceIndex(),
|
super.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||||
read.getMateAlignmentStart(),
|
super.setInferredInsertSize(read.getInferredInsertSize());
|
||||||
read.getInferredInsertSize(),
|
|
||||||
null);
|
|
||||||
SAMReadGroupRecord samRG = read.getReadGroup();
|
SAMReadGroupRecord samRG = read.getReadGroup();
|
||||||
clearAttributes();
|
SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
|
||||||
|
if (samAttr == null) {
|
||||||
|
clearAttributes();
|
||||||
|
} else {
|
||||||
|
setAttributes(samAttr);
|
||||||
|
}
|
||||||
if (samRG != null) {
|
if (samRG != null) {
|
||||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
|
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
|
||||||
setReadGroup(rg);
|
setReadGroup(rg);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public GATKSAMRecord(final SAMFileHeader header,
|
super.setFileSource(read.getFileSource());
|
||||||
final int referenceSequenceIndex,
|
super.setReadName(read.getReadName());
|
||||||
final int alignmentStart,
|
super.setCigarString(read.getCigarString());
|
||||||
final short readNameLength,
|
super.setReadBases(read.getReadBases());
|
||||||
final short mappingQuality,
|
super.setBaseQualities(read.getBaseQualities());
|
||||||
final int indexingBin,
|
// From SAMRecord constructor: Do this after the above because setCigarString will clear it.
|
||||||
final int cigarLen,
|
GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
|
||||||
final int flags,
|
|
||||||
final int readLen,
|
|
||||||
final int mateReferenceSequenceIndex,
|
|
||||||
final int mateAlignmentStart,
|
|
||||||
final int insertSize,
|
|
||||||
final byte[] variableLengthBlock) {
|
|
||||||
super(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen,
|
|
||||||
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GATKSAMRecord createRandomRead(int length) {
|
public static GATKSAMRecord createRandomRead(int length) {
|
||||||
|
|
@ -520,19 +514,15 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
||||||
* @return a read with no bases but safe for the GATK
|
* @return a read with no bases but safe for the GATK
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
||||||
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader());
|
||||||
read.getReferenceIndex(),
|
emptyRead.setReferenceIndex(read.getReferenceIndex());
|
||||||
0,
|
emptyRead.setAlignmentStart(0);
|
||||||
(short) 0,
|
emptyRead.setMappingQuality(0);
|
||||||
(short) 0,
|
// setting read indexing bin last
|
||||||
0,
|
emptyRead.setFlags(read.getFlags());
|
||||||
0,
|
emptyRead.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||||
read.getFlags(),
|
emptyRead.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||||
0,
|
emptyRead.setInferredInsertSize(read.getInferredInsertSize());
|
||||||
read.getMateReferenceIndex(),
|
|
||||||
read.getMateAlignmentStart(),
|
|
||||||
read.getInferredInsertSize(),
|
|
||||||
null);
|
|
||||||
|
|
||||||
emptyRead.setCigarString("");
|
emptyRead.setCigarString("");
|
||||||
emptyRead.setReadBases(new byte[0]);
|
emptyRead.setReadBases(new byte[0]);
|
||||||
|
|
@ -545,6 +535,8 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
||||||
emptyRead.setReadGroup(rg);
|
emptyRead.setReadGroup(rg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GATKBin.setReadIndexingBin(emptyRead, 0);
|
||||||
|
|
||||||
return emptyRead;
|
return emptyRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam;
|
||||||
import htsjdk.samtools.SAMRecord;
|
import htsjdk.samtools.SAMRecord;
|
||||||
import htsjdk.samtools.util.CloseableIterator;
|
import htsjdk.samtools.util.CloseableIterator;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -40,9 +39,9 @@ import java.util.Iterator;
|
||||||
* Time: 1:19 PM
|
* Time: 1:19 PM
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>, Iterable<GATKSAMRecord> {
|
public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>, Iterable<GATKSAMRecord> {
|
||||||
final CloseableIterator<SAMRecord> it;
|
final CloseableIterator<? extends SAMRecord> it;
|
||||||
|
|
||||||
public GATKSAMRecordIterator(final CloseableIterator<SAMRecord> it) {
|
public GATKSAMRecordIterator(final CloseableIterator<? extends SAMRecord> it) {
|
||||||
this.it = it;
|
this.it = it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,7 +50,14 @@ public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public boolean hasNext() { return it.hasNext(); }
|
@Override public boolean hasNext() { return it.hasNext(); }
|
||||||
@Override public GATKSAMRecord next() { return (GATKSAMRecord)it.next(); }
|
@Override public GATKSAMRecord next() {
|
||||||
|
SAMRecord next = it.next();
|
||||||
|
if (next instanceof GATKSAMRecord) {
|
||||||
|
return (GATKSAMRecord)next;
|
||||||
|
} else {
|
||||||
|
return new GATKSAMRecord(next);
|
||||||
|
}
|
||||||
|
}
|
||||||
@Override public void remove() { it.remove(); }
|
@Override public void remove() { it.remove(); }
|
||||||
@Override public void close() { it.close(); }
|
@Override public void close() { it.close(); }
|
||||||
@Override public Iterator<GATKSAMRecord> iterator() { return this; }
|
@Override public Iterator<GATKSAMRecord> iterator() { return this; }
|
||||||
|
|
|
||||||
|
|
@ -1,75 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
|
||||||
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
|
||||||
import htsjdk.samtools.SAMRecord;
|
|
||||||
import htsjdk.samtools.SAMRecordFactory;
|
|
||||||
import htsjdk.samtools.BAMRecord;
|
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Factory interface implementation used to create GATKSamRecords
|
|
||||||
* from SAMFileReaders with SAM-JDK
|
|
||||||
*
|
|
||||||
* @author Mark DePristo
|
|
||||||
*/
|
|
||||||
public class GATKSamRecordFactory implements SAMRecordFactory {
|
|
||||||
|
|
||||||
/** Create a new SAMRecord to be filled in */
|
|
||||||
public SAMRecord createSAMRecord(SAMFileHeader header) {
|
|
||||||
throw new UserException.BadInput("The GATK now longer supports input SAM files");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Create a new BAM Record. */
|
|
||||||
public BAMRecord createBAMRecord(final SAMFileHeader header,
|
|
||||||
final int referenceSequenceIndex,
|
|
||||||
final int alignmentStart,
|
|
||||||
final short readNameLength,
|
|
||||||
final short mappingQuality,
|
|
||||||
final int indexingBin,
|
|
||||||
final int cigarLen,
|
|
||||||
final int flags,
|
|
||||||
final int readLen,
|
|
||||||
final int mateReferenceSequenceIndex,
|
|
||||||
final int mateAlignmentStart,
|
|
||||||
final int insertSize,
|
|
||||||
final byte[] variableLengthBlock) {
|
|
||||||
return new GATKSAMRecord(header,
|
|
||||||
referenceSequenceIndex,
|
|
||||||
alignmentStart,
|
|
||||||
readNameLength,
|
|
||||||
mappingQuality,
|
|
||||||
indexingBin,
|
|
||||||
cigarLen,
|
|
||||||
flags,
|
|
||||||
readLen,
|
|
||||||
mateReferenceSequenceIndex,
|
|
||||||
mateAlignmentStart,
|
|
||||||
insertSize,
|
|
||||||
variableLengthBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -25,8 +25,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
package org.broadinstitute.gatk.utils.sam;
|
||||||
|
|
||||||
import htsjdk.samtools.SAMFileReader;
|
import htsjdk.samtools.SamReader;
|
||||||
|
import htsjdk.samtools.SamReaderFactory;
|
||||||
import htsjdk.samtools.ValidationStringency;
|
import htsjdk.samtools.ValidationStringency;
|
||||||
|
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -39,12 +41,17 @@ import java.io.File;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class SAMFileReaderBuilder {
|
public class SAMReaderBuilder implements ReferenceBacked {
|
||||||
/**
|
/**
|
||||||
* To which file should output be written?
|
* To which file should output be written?
|
||||||
*/
|
*/
|
||||||
private File samFile = null;
|
private File samFile = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The reference file for the samFile.
|
||||||
|
*/
|
||||||
|
private File referenceFile = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* What compression level should be used when building this file?
|
* What compression level should be used when building this file?
|
||||||
*/
|
*/
|
||||||
|
|
@ -58,6 +65,16 @@ public class SAMFileReaderBuilder {
|
||||||
this.samFile = samFile;
|
this.samFile = samFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public File getReferenceFile() {
|
||||||
|
return referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setReferenceFile(final File referenceFile) {
|
||||||
|
this.referenceFile = referenceFile;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the validation stringency to apply when reading this sam file.
|
* Sets the validation stringency to apply when reading this sam file.
|
||||||
* @param validationStringency Stringency to apply. Must not be null.
|
* @param validationStringency Stringency to apply. Must not be null.
|
||||||
|
|
@ -70,15 +87,16 @@ public class SAMFileReaderBuilder {
|
||||||
* Create the SAM writer, given the constituent parts accrued.
|
* Create the SAM writer, given the constituent parts accrued.
|
||||||
* @return Newly minted SAM file writer.
|
* @return Newly minted SAM file writer.
|
||||||
*/
|
*/
|
||||||
public SAMFileReader build() {
|
public SamReader build() {
|
||||||
if( samFile == null )
|
if( samFile == null )
|
||||||
throw new ReviewedGATKException( "Filename for output sam file must be supplied.");
|
throw new ReviewedGATKException( "Filename for output sam file must be supplied.");
|
||||||
if( validationStringency == null )
|
if( validationStringency == null )
|
||||||
throw new ReviewedGATKException( "Header for output sam file must be supplied.");
|
throw new ReviewedGATKException( "Header for output sam file must be supplied.");
|
||||||
|
|
||||||
SAMFileReader reader = new SAMFileReader( samFile );
|
return SamReaderFactory
|
||||||
reader.setValidationStringency( validationStringency );
|
.makeDefault()
|
||||||
|
.referenceSequence(this.getReferenceFile())
|
||||||
return reader;
|
.validationStringency(validationStringency)
|
||||||
|
.open(samFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -73,16 +73,16 @@ public class ListFileUtils {
|
||||||
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(inputFileName.toLowerCase().endsWith(".bam")) {
|
else if(inputFileName.toLowerCase().endsWith(".bam") || inputFileName.toLowerCase().endsWith(".cram")) {
|
||||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||||
}
|
}
|
||||||
else if(inputFileName.endsWith("stdin")) {
|
else if(inputFileName.endsWith("stdin")) {
|
||||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
|
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " +
|
||||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " +
|
||||||
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
"of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return unpackedReads;
|
return unpackedReads;
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
||||||
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
||||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||||
|
|
@ -219,11 +218,10 @@ public class ExampleToCopyUnitTest extends BaseTest {
|
||||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
|
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
|
||||||
final File bam = bamBuilder.makeTemporarilyBAMFile();
|
final File bam = bamBuilder.makeTemporarilyBAMFile();
|
||||||
final SAMFileReader reader = new SAMFileReader(bam);
|
final SAMFileReader reader = new SAMFileReader(bam);
|
||||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
|
||||||
|
|
||||||
final Iterator<SAMRecord> bamIt = reader.iterator();
|
final Iterator<SAMRecord> bamIt = reader.iterator();
|
||||||
while ( bamIt.hasNext() ) {
|
while ( bamIt.hasNext() ) {
|
||||||
final GATKSAMRecord read = (GATKSAMRecord)bamIt.next(); // all reads are actually GATKSAMRecords
|
final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords
|
||||||
// TODO -- add some tests that use reads from a BAM
|
// TODO -- add some tests that use reads from a BAM
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>picard</groupId>
|
<groupId>picard</groupId>
|
||||||
<artifactId>picard</artifactId>
|
<artifactId>picard</artifactId>
|
||||||
<version>1.127.1667</version>
|
<version>1.128.1678</version>
|
||||||
<name>picard</name>
|
<name>picard</name>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>samtools</groupId>
|
<groupId>samtools</groupId>
|
||||||
<artifactId>htsjdk</artifactId>
|
<artifactId>htsjdk</artifactId>
|
||||||
<version>1.127.1690</version>
|
<version>1.128.1696</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||||
<dependency>
|
<dependency>
|
||||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>samtools</groupId>
|
<groupId>samtools</groupId>
|
||||||
<artifactId>htsjdk</artifactId>
|
<artifactId>htsjdk</artifactId>
|
||||||
<version>1.127.1690</version>
|
<version>1.128.1696</version>
|
||||||
<name>htsjdk</name>
|
<name>htsjdk</name>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
Loading…
Reference in New Issue