Added introductory CRAM support.
Replaced usage of GATKSamRecordFactory with calls to wrapper GATKSAMRecord extending SAMRecord. Minor other updates for test changes. Added exampleCRAM.cram generated by GATK, with .bai and .crai indexes generated by CRAMTools. CRAM-to-CRAM test disabled due to https://github.com/samtools/htsjdk/issues/148 Using exampleBAM.bam input, outputs of GATK's generated CRAM match CRAMTools generated CRAM, but not samtools/PrintReads SAM output, as things like insert sizes are different. If required for other tools, CRAM indexes must be generated via CRAMTools until we can generate them via CRAMFileWriter. Generation of exampleCRAM.cram: * java -jar target/executable/GenomeAnalysisTK.jar -T PrintReads -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I public/gatk-utils/src/test/resources/exampleBAM.bam -o public/gatk-utils/src/test/resources/exampleCRAM.cram * java -jar cramtools-2.1.jar index -I public/gatk-utils/src/test/resources/exampleCRAM.cram * java -jar cramtools-2.1.jar index -I public/gatk-utils/src/test/resources/exampleCRAM.cram --bam-style-index CRAM generation by existing tools: * samtools view -C -T public/gatk-utils/src/test/resources/exampleFASTA.fasta -o testSamtools.cram public/gatk-utils/src/test/resources/exampleBAM.bam * java -jar cramtools-2.1.jar cram --ignore-md5-mismatch --capture-all-tags -Q -n -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I public/gatk-utils/src/test/resources/exampleBAM.bam -O testCRAMTools.cram * java -jar target/executable/GenomeAnalysisTK.jar -T PrintReads -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I public/gatk-utils/src/test/resources/exampleBAM.bam -o testGATK.cram CRAMTools view of the above: * java -jar cramtools-2.1.jar bam --skip-md5-check -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I public/gatk-utils/src/test/resources/exampleCRAM.cram | tail -n 1 * java -jar cramtools-2.1.jar bam --skip-md5-check -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I testSamtools.cram | tail -n 1 * java -jar cramtools-2.1.jar bam --skip-md5-check -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I testCRAMTools.cram | tail -n 1 * java -jar cramtools-2.1.jar bam --skip-md5-check -R public/gatk-utils/src/test/resources/exampleFASTA.fasta -I testGATK.cram | tail -n 1
This commit is contained in:
parent
de3ca65232
commit
1808c90d2a
|
|
@ -51,6 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.GATKBin;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -69,15 +71,30 @@ public class ClippedGATKSAMRecord extends GATKSAMRecord {
|
|||
* @param end inclusive last position in {@code read} included in the clipped view.
|
||||
*/
|
||||
public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) {
|
||||
super(read.getHeader(), read.getReferenceIndex(), read.getAlignmentStart() + start, (short) read.getReadNameLength(),
|
||||
(short) 100, -1, read.getCigarLength(), read.getFlags(), end - start,
|
||||
read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
super(read.getHeader());
|
||||
this.setReferenceIndex(read.getReferenceIndex());
|
||||
this.setAlignmentStart(read.getAlignmentStart() + start);
|
||||
this.setMappingQuality(100);
|
||||
// setting read indexing bin below
|
||||
this.setFlags(read.getFlags());
|
||||
this.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
this.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
this.setInferredInsertSize(read.getInferredInsertSize());
|
||||
this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end));
|
||||
this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end));
|
||||
this.setReadName(read.getReadName());
|
||||
insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end);
|
||||
deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end);
|
||||
|
||||
// Set these to null in order to mark them as being candidates for lazy initialization.
|
||||
// If this is not done, they will have non-null defaults.
|
||||
super.setReadName(null);
|
||||
super.setCigarString(null);
|
||||
super.setReadBases(null);
|
||||
super.setBaseQualities(null);
|
||||
|
||||
// Do this after the above because setCigarString will clear it.
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import htsjdk.samtools.GATKBin;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMSequenceDictionary;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
|
|
@ -317,22 +318,18 @@ public class ActiveRegionTestDataSet {
|
|||
|
||||
private class MyGATKSAMRecord extends GATKSAMRecord {
|
||||
protected MyGATKSAMRecord(final GATKSAMRecord r) {
|
||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
this.setReadBases(r.getReadBases());
|
||||
this.setBaseQualities(r.getBaseQualities());
|
||||
this.setReadName(r.getReadName());
|
||||
super(r);
|
||||
this.setMappingQuality(100);
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
ExponentialDistribution indelLengthDist = MathUtils.exponentialDistribution(1.0 / 0.9);
|
||||
|
||||
public MyGATKSAMRecord(final GATKSAMRecord r, final Random rnd) {
|
||||
super(r.getHeader(), r.getReferenceIndex(), r.getAlignmentStart(), (short) r.getReadNameLength(),
|
||||
(short) 100, -1, r.getCigarLength(), r.getFlags(), r.getReadLength(),
|
||||
r.getMateReferenceIndex(), r.getMateAlignmentStart(), r.getInferredInsertSize(),
|
||||
new byte[0]);
|
||||
super(r);
|
||||
this.setMappingQuality(100);
|
||||
// setting read indexing bin last
|
||||
|
||||
final byte[] bases = new byte[r.getReadBases().length];
|
||||
|
||||
final byte[] readBases = r.getReadBases();
|
||||
|
|
@ -384,7 +381,7 @@ public class ActiveRegionTestDataSet {
|
|||
this.setBaseQualities(r.getBaseQualities());
|
||||
this.setReadName(r.getReadName());
|
||||
|
||||
|
||||
GATKBin.setReadIndexingBin(this, -1);
|
||||
}
|
||||
|
||||
private int generateIndelLength(final Random rnd) {
|
||||
|
|
|
|||
|
|
@ -46,9 +46,13 @@ public class MyExampleWalkerIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
private File getResource(String path) throws URISyntaxException {
|
||||
return new File(publicTestDir, path);
|
||||
/*
|
||||
TODO: Enable proper resource extraction from the test jars. For now just use the publicTestDir path.
|
||||
URL resourceUrl = getClass().getResource(path);
|
||||
if (resourceUrl == null)
|
||||
throw new MissingResourceException("Resource not found: " + path, getClass().getSimpleName(), path);
|
||||
return new File(resourceUrl.toURI());
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.io.stubs.Stub;
|
|||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.refdata.tracks.IndexDictionaryUtils;
|
||||
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
||||
|
|
@ -697,9 +698,12 @@ public class GenomeAnalysisEngine {
|
|||
* @param outputTracker the tracker supplying the initialization data.
|
||||
*/
|
||||
private void initializeOutputStreams(final OutputTracker outputTracker) {
|
||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet())
|
||||
for (final Map.Entry<ArgumentSource, Object> input : getInputs().entrySet()) {
|
||||
setReferenceFile(input.getValue());
|
||||
outputTracker.addInput(input.getKey(), input.getValue());
|
||||
}
|
||||
for (final Stub<?> stub : getOutputs()) {
|
||||
setReferenceFile(stub);
|
||||
stub.processArguments(argCollection);
|
||||
outputTracker.addOutput(stub);
|
||||
}
|
||||
|
|
@ -707,6 +711,12 @@ public class GenomeAnalysisEngine {
|
|||
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
|
||||
}
|
||||
|
||||
private void setReferenceFile(final Object object) {
|
||||
if (object instanceof ReferenceBacked) {
|
||||
((ReferenceBacked)object).setReferenceFile(argCollection.referenceFile);
|
||||
}
|
||||
}
|
||||
|
||||
public ReferenceDataSource getReferenceDataSource() {
|
||||
return referenceDataSource;
|
||||
}
|
||||
|
|
@ -907,6 +917,7 @@ public class GenomeAnalysisEngine {
|
|||
final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker;
|
||||
|
||||
return new SAMDataSource(
|
||||
argCollection.referenceFile,
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
argCollection.numberOfBAMFileHandles,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import htsjdk.samtools.MergingSamRecordIterator;
|
|||
import htsjdk.samtools.SamFileHeaderMerger;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import htsjdk.samtools.util.CloserUtil;
|
||||
import htsjdk.samtools.util.RuntimeIOException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.ReadMetrics;
|
||||
|
|
@ -49,7 +50,8 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
|||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -64,7 +66,8 @@ import java.util.concurrent.Callable;
|
|||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class SAMDataSource {
|
||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||
/** Reference file */
|
||||
private final File referenceFile;
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
|
@ -177,8 +180,11 @@ public class SAMDataSource {
|
|||
*
|
||||
* @param samFiles list of reads files.
|
||||
*/
|
||||
public SAMDataSource(Collection<SAMReaderID> samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) {
|
||||
public SAMDataSource(final File referenceFile, final Collection<SAMReaderID> samFiles,
|
||||
final ThreadAllocation threadAllocation, final Integer numFileHandles,
|
||||
final GenomeLocParser genomeLocParser) {
|
||||
this(
|
||||
referenceFile,
|
||||
samFiles,
|
||||
threadAllocation,
|
||||
numFileHandles,
|
||||
|
|
@ -198,6 +204,7 @@ public class SAMDataSource {
|
|||
* For testing purposes
|
||||
*/
|
||||
public SAMDataSource(
|
||||
final File referenceFile,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
ThreadAllocation threadAllocation,
|
||||
Integer numFileHandles,
|
||||
|
|
@ -209,7 +216,8 @@ public class SAMDataSource {
|
|||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
boolean includeReadsWithDeletionAtLoci) {
|
||||
this( samFiles,
|
||||
this( referenceFile,
|
||||
samFiles,
|
||||
threadAllocation,
|
||||
numFileHandles,
|
||||
genomeLocParser,
|
||||
|
|
@ -230,6 +238,7 @@ public class SAMDataSource {
|
|||
|
||||
/**
|
||||
* Create a new SAM data source given the supplied read metadata.
|
||||
* @param referenceFile reference file.
|
||||
* @param samFiles list of reads files.
|
||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||
* @param strictness Stringency of reads file parsing.
|
||||
|
|
@ -247,6 +256,7 @@ public class SAMDataSource {
|
|||
* @param intervalMergingRule how are adjacent intervals merged by the sharder
|
||||
*/
|
||||
public SAMDataSource(
|
||||
final File referenceFile,
|
||||
Collection<SAMReaderID> samFiles,
|
||||
ThreadAllocation threadAllocation,
|
||||
Integer numFileHandles,
|
||||
|
|
@ -265,6 +275,7 @@ public class SAMDataSource {
|
|||
final Map<String, String> sampleRenameMap,
|
||||
final IntervalMergingRule intervalMergingRule) {
|
||||
|
||||
this.referenceFile = referenceFile;
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.intervalMergingRule = intervalMergingRule;
|
||||
|
|
@ -303,7 +314,7 @@ public class SAMDataSource {
|
|||
"Please check that the file is present and readable and try again.");
|
||||
|
||||
// Get the sort order, forcing it to coordinate if unsorted.
|
||||
SAMFileReader reader = readers.getReader(readerID);
|
||||
SamReader reader = readers.getReader(readerID);
|
||||
SAMFileHeader header = reader.getFileHeader();
|
||||
|
||||
headers.put(readerID,header);
|
||||
|
|
@ -343,7 +354,7 @@ public class SAMDataSource {
|
|||
// cache the read group id (original) -> read group id (merged)
|
||||
// and read group id (merged) -> read group id (original) mappings.
|
||||
for(SAMReaderID id: readerIDs) {
|
||||
SAMFileReader reader = readers.getReader(id);
|
||||
SamReader reader = readers.getReader(id);
|
||||
|
||||
ReadGroupMapping mappingToMerged = new ReadGroupMapping();
|
||||
|
||||
|
|
@ -385,8 +396,8 @@ public class SAMDataSource {
|
|||
public void close() {
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
for(SAMReaderID readerID: readerIDs) {
|
||||
SAMFileReader reader = readers.getReader(readerID);
|
||||
reader.close();
|
||||
SamReader reader = readers.getReader(readerID);
|
||||
CloserUtil.close(reader);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -463,14 +474,6 @@ public class SAMDataSource {
|
|||
return mergedToOriginalReadGroupMappings.get(mergedReadGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if all readers have an index.
|
||||
* @return True if all readers have an index.
|
||||
*/
|
||||
public boolean hasIndex() {
|
||||
return readerIDs.size() == bamIndices.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the index for a particular reader. Always preloaded.
|
||||
* @param id Id of the reader.
|
||||
|
|
@ -480,6 +483,44 @@ public class SAMDataSource {
|
|||
return bamIndices.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the index for a particular reader exists.
|
||||
* @param id Id of the reader.
|
||||
* @return True if the index exists.
|
||||
*/
|
||||
public boolean hasIndex(final SAMReaderID id) {
|
||||
return bamIndices.containsKey(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if all readers that require an index for SAMFileSpan creation have an index.
|
||||
* @return True if all readers that require an index for SAMFileSpan creation have an index.
|
||||
*/
|
||||
public boolean hasIndex() {
|
||||
for (final SAMReaderID readerID: readerIDs)
|
||||
if (isSAMFileSpanSupported(readerID))
|
||||
if (!hasIndex(readerID))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Returns true if the reader can use file spans.
|
||||
* @return true if file spans are supported.
|
||||
*/
|
||||
private boolean isSAMFileSpanSupported(final SAMReaderID readerID) {
|
||||
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/BAMFileReader.java#L341
|
||||
return readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.BAM_TYPE.fileExtension());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the reader caches its SAMFileHeader for each iterator.
|
||||
* @return true if this reader caches its SAMFileHeader for each iterator.
|
||||
*/
|
||||
private boolean isIteratorSAMFileHeaderCached(final SAMReaderID readerID) {
|
||||
// example: https://github.com/samtools/htsjdk/blob/ee4308ede60962f3ab4275473ac384724b471149/src/java/htsjdk/samtools/CRAMFileReader.java#L183
|
||||
return !readerID.getSamFile().getName().toLowerCase().endsWith(SamReader.Type.CRAM_TYPE.fileExtension());
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the sort order of the readers.
|
||||
* @return Sort order. Can be unsorted, coordinate order, or query name order.
|
||||
|
|
@ -538,7 +579,17 @@ public class SAMDataSource {
|
|||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
|
||||
for ( SAMReaderID id: getReaderIDs() ) {
|
||||
initialPositions.put(id, new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
|
||||
GATKBAMFileSpan span;
|
||||
try {
|
||||
span = new GATKBAMFileSpan(readers.getReader(id).indexing().getFilePointerSpanningReads());
|
||||
} catch (RuntimeException e) {
|
||||
if ("Not implemented.".equals(e.getMessage())) { https://github.com/samtools/htsjdk/blob/035d4319643657d715e93c53c13fe4a1f64e0188/src/java/htsjdk/samtools/CRAMFileReader.java#L197
|
||||
span = new GATKBAMFileSpan(new GATKChunk(0, Long.MAX_VALUE));
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
initialPositions.put(id, span);
|
||||
}
|
||||
|
||||
resourcePool.releaseReaders(readers);
|
||||
|
|
@ -567,7 +618,7 @@ public class SAMDataSource {
|
|||
Map<SamReader,CloseableIterator<SAMRecord>> iteratorMap = new HashMap<>();
|
||||
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
CloseableIterator<SAMRecord> iterator = null;
|
||||
CloseableIterator<SAMRecord> iterator;
|
||||
|
||||
// TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin.
|
||||
// TODO: Kill this check once we've proven that the design elements are gone.
|
||||
|
|
@ -576,19 +627,33 @@ public class SAMDataSource {
|
|||
|
||||
try {
|
||||
if(threadAllocation.getNumIOThreads() > 0) {
|
||||
// TODO: need to add friendly error if -nit is used with non BAM. Later, possibly add this capability with CRAM when htsjdk supports CRAM file spans are supported.
|
||||
BlockInputStream inputStream = readers.getInputStream(id);
|
||||
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id));
|
||||
codec.setInputStream(inputStream);
|
||||
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
||||
}
|
||||
else {
|
||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||
final SamReader reader = readers.getReader(id);
|
||||
try {
|
||||
iterator = ((SamReader.Indexing)reader).iterator(shard.getFileSpans().get(id));
|
||||
} catch (RuntimeException re) {
|
||||
if ("Not implemented.".equals(re.getMessage())) { // https://github.com/samtools/htsjdk/blob/429f2a8585d9c98a3efd4cedc5188b60b1e66ac5/src/java/htsjdk/samtools/CRAMFileReader.java#L192
|
||||
// No way to jump into the file span. Query the whole file.
|
||||
iterator = readers.getReader(id).iterator();
|
||||
} else {
|
||||
throw re;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
|
||||
throw new UserException.MalformedBAM(id.getSamFile(), e.getMessage());
|
||||
}
|
||||
|
||||
// At the moment, too many other classes to change for GATKSAMRecordIterator converter.
|
||||
// Force the compiler to just let the conversion happen, since generics are erased anyway.
|
||||
iterator = (CloseableIterator<SAMRecord>)(Object)new GATKSAMRecordIterator(iterator);
|
||||
iterator = new MalformedBAMErrorReformatingIterator(id.getSamFile(), iterator);
|
||||
if(shard.getGenomeLocs().size() > 0)
|
||||
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
||||
|
|
@ -614,11 +679,11 @@ public class SAMDataSource {
|
|||
|
||||
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
|
||||
private final BlockInputStream inputStream;
|
||||
private final SAMFileReader reader;
|
||||
private final SamReader reader;
|
||||
private final BAMRecordCodec codec;
|
||||
private SAMRecord nextRead;
|
||||
|
||||
private BAMCodecIterator(final BlockInputStream inputStream, final SAMFileReader reader, final BAMRecordCodec codec) {
|
||||
private BAMCodecIterator(final BlockInputStream inputStream, final SamReader reader, final BAMRecordCodec codec) {
|
||||
this.inputStream = inputStream;
|
||||
this.reader = reader;
|
||||
this.codec = codec;
|
||||
|
|
@ -823,7 +888,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* A collection of readers derived from a reads metadata structure.
|
||||
*/
|
||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
||||
private class SAMReaders implements Iterable<SamReader> {
|
||||
/**
|
||||
* Cached representation of the merged header used to generate a merging iterator.
|
||||
*/
|
||||
|
|
@ -832,7 +897,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Internal storage for a map of id -> reader.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();
|
||||
private final Map<SAMReaderID,SamReader> readers = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* The inptu streams backing
|
||||
|
|
@ -860,7 +925,11 @@ public class SAMDataSource {
|
|||
|
||||
checkForUnsupportedBamFile(init.reader.getFileHeader());
|
||||
|
||||
if (removeProgramRecords) {
|
||||
if (removeProgramRecords && isIteratorSAMFileHeaderCached(readerID)) {
|
||||
// Only works when the SamReader implementation caches its header.
|
||||
// Some implementations (ex: CRAM) rewrite the new underlying file header in reader.getIterator().
|
||||
// Later, when MergingSamRecordIterator goes to check the headers with .contains()/.equals(),
|
||||
// it will error out complaining it can't find the unmodified version of the header.
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
|
||||
|
|
@ -883,9 +952,9 @@ public class SAMDataSource {
|
|||
|
||||
// Examine the bam headers, perform any requested sample renaming on them, and add
|
||||
// them to the list of headers to pass to the Picard SamFileHeaderMerger:
|
||||
for ( final Map.Entry<SAMReaderID, SAMFileReader> readerEntry : readers.entrySet() ) {
|
||||
for ( final Map.Entry<SAMReaderID, SamReader> readerEntry : readers.entrySet() ) {
|
||||
final SAMReaderID readerID = readerEntry.getKey();
|
||||
final SAMFileReader reader = readerEntry.getValue();
|
||||
final SamReader reader = readerEntry.getValue();
|
||||
final SAMFileHeader header = reader.getFileHeader();
|
||||
|
||||
// The remappedSampleName will be null if either no on-the-fly sample renaming was requested,
|
||||
|
|
@ -1009,7 +1078,7 @@ public class SAMDataSource {
|
|||
* @param id The ID of the reader to retrieve.
|
||||
* @return the reader associated with the given id.
|
||||
*/
|
||||
public SAMFileReader getReader(SAMReaderID id) {
|
||||
public SamReader getReader(SAMReaderID id) {
|
||||
if(!readers.containsKey(id))
|
||||
throw new NoSuchElementException("No reader is associated with id " + id);
|
||||
return readers.get(id);
|
||||
|
|
@ -1030,7 +1099,7 @@ public class SAMDataSource {
|
|||
* @return The id associated the given reader, or null if the reader is not present in this collection.
|
||||
*/
|
||||
protected SAMReaderID getReaderID(SamReader reader) {
|
||||
for(Map.Entry<SAMReaderID,SAMFileReader> entry: readers.entrySet()) {
|
||||
for(Map.Entry<SAMReaderID,SamReader> entry: readers.entrySet()) {
|
||||
if(reader == entry.getValue())
|
||||
return entry.getKey();
|
||||
}
|
||||
|
|
@ -1042,7 +1111,7 @@ public class SAMDataSource {
|
|||
* Returns an iterator over all readers in this structure.
|
||||
* @return An iterator over readers.
|
||||
*/
|
||||
public Iterator<SAMFileReader> iterator() {
|
||||
public Iterator<SamReader> iterator() {
|
||||
return readers.values().iterator();
|
||||
}
|
||||
|
||||
|
|
@ -1058,18 +1127,23 @@ public class SAMDataSource {
|
|||
class ReaderInitializer implements Callable<ReaderInitializer> {
|
||||
final SAMReaderID readerID;
|
||||
BlockInputStream blockInputStream = null;
|
||||
SAMFileReader reader;
|
||||
SamReader reader;
|
||||
|
||||
public ReaderInitializer(final SAMReaderID readerID) {
|
||||
this.readerID = readerID;
|
||||
}
|
||||
|
||||
public ReaderInitializer call() {
|
||||
final File indexFile = findIndexFile(readerID.getSamFile());
|
||||
try {
|
||||
if (threadAllocation.getNumIOThreads() > 0)
|
||||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||
reader = new SAMFileReader(readerID.getSamFile(),indexFile,false);
|
||||
reader = SamReaderFactory.makeDefault()
|
||||
.referenceSequence(referenceFile)
|
||||
.validationStringency(validationStringency)
|
||||
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
|
||||
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
||||
.open(readerID.getSamFile());
|
||||
|
||||
} catch ( RuntimeIOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
||||
} catch ( SAMFormatException e ) {
|
||||
|
|
@ -1081,9 +1155,6 @@ public class SAMDataSource {
|
|||
catch ( RuntimeException e ) {
|
||||
throw new UserException.MalformedBAM(readerID.getSamFile(), e.getMessage());
|
||||
}
|
||||
reader.setSAMRecordFactory(factory);
|
||||
reader.enableFileSource(true);
|
||||
reader.setValidationStringency(validationStringency);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
|
||||
// initialize reads
|
||||
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
||||
SAMDataSource dataSource = new SAMDataSource(bamReaders,new ThreadAllocation(),null,genomeLocParser);
|
||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
// intervals
|
||||
final GenomeLocSortedSet intervalSortedSet;
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.io;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.ValidationStringency;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.commandline.ArgumentSource;
|
||||
import org.broadinstitute.gatk.engine.io.storage.Storage;
|
||||
import org.broadinstitute.gatk.engine.io.storage.StorageFactory;
|
||||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.gatk.utils.classloader.JVMUtils;
|
|||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.io.IOUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
|
|
@ -49,7 +49,12 @@ import java.util.Map;
|
|||
* Manages the output and err streams that are created specifically for walker
|
||||
* output.
|
||||
*/
|
||||
public abstract class OutputTracker {
|
||||
public abstract class OutputTracker implements ReferenceBacked {
|
||||
/**
|
||||
* The reference file.
|
||||
*/
|
||||
private File referenceFile;
|
||||
|
||||
/**
|
||||
* The streams to which walker users should be reading directly.
|
||||
*/
|
||||
|
|
@ -78,6 +83,16 @@ public abstract class OutputTracker {
|
|||
*/
|
||||
public abstract <T> T getStorage( Stub<T> stub );
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
public void prepareWalker( Walker walker, ValidationStringency strictnessLevel ) {
|
||||
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
|
||||
ArgumentSource targetField = io.getKey();
|
||||
|
|
@ -85,8 +100,8 @@ public abstract class OutputTracker {
|
|||
|
||||
// Ghastly hack: reaches in and finishes building out the SAMFileReader.
|
||||
// TODO: Generalize this, and move it to its own initialization step.
|
||||
if( targetValue instanceof SAMFileReaderBuilder) {
|
||||
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
|
||||
if( targetValue instanceof SAMReaderBuilder) {
|
||||
SAMReaderBuilder builder = (SAMReaderBuilder)targetValue;
|
||||
builder.setValidationStringency(strictnessLevel);
|
||||
targetValue = builder.build();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.sam.SimplifyingSAMFileWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
|
|
@ -46,6 +49,7 @@ import java.lang.reflect.Method;
|
|||
*/
|
||||
public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWriter> {
|
||||
private final File file;
|
||||
private File referenceFasta;
|
||||
private SAMFileWriter writer;
|
||||
|
||||
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
|
||||
|
|
@ -55,6 +59,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
}
|
||||
|
||||
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
|
||||
this.referenceFasta = stub.getReferenceFile();
|
||||
this.file = file;
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
// Enable automatic index creation for pre-sorted BAMs.
|
||||
|
|
@ -69,9 +74,14 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
|
||||
if(stub.getOutputFile() != null) {
|
||||
try {
|
||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||
}
|
||||
catch(RuntimeIOException ex) {
|
||||
if (stub.getOutputFile().getName().toLowerCase().endsWith(".cram")) {
|
||||
this.writer = createCRAMWriter(factory, stub.getFileHeader(), new FileOutputStream(file), this.referenceFasta);
|
||||
} else {
|
||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||
}
|
||||
} catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "file could not be created", ex);
|
||||
} catch(RuntimeIOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
|
||||
}
|
||||
}
|
||||
|
|
@ -117,6 +127,13 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
}
|
||||
}
|
||||
|
||||
private SAMFileWriter createCRAMWriter(final SAMFileWriterFactory factory,
|
||||
final SAMFileHeader header,
|
||||
final OutputStream outputStream,
|
||||
final File referenceFasta) {
|
||||
return factory.makeCRAMWriter(header, outputStream, referenceFasta);
|
||||
}
|
||||
|
||||
private SAMFileWriter createBAMWriter(final SAMFileWriterFactory factory,
|
||||
final SAMFileHeader header,
|
||||
final boolean presorted,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import htsjdk.samtools.util.ProgressLoggerInterface;
|
|||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.io.OutputTracker;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||
|
|
@ -50,7 +51,7 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter {
|
||||
public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter, ReferenceBacked {
|
||||
/**
|
||||
* Engine to use for collecting attributes for the output SAM file.
|
||||
*/
|
||||
|
|
@ -67,6 +68,11 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
*/
|
||||
private final File samFile;
|
||||
|
||||
/**
|
||||
* The reference file for stub.
|
||||
*/
|
||||
private File referenceFile;
|
||||
|
||||
/**
|
||||
* The target output stream, to be used in place of the SAM file.
|
||||
*/
|
||||
|
|
@ -189,6 +195,16 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, GATKSAMFileWriter
|
|||
return samOutputStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the header to use when creating the new SAM file.
|
||||
* @return header to use when creating the new SAM file.
|
||||
|
|
|
|||
|
|
@ -29,14 +29,14 @@ import htsjdk.samtools.SAMFileReader;
|
|||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMFileReaderBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderBuilder;
|
||||
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Describe how to parse SAMFileReaders.
|
||||
* Describe how to parse SAMReaders.
|
||||
*/
|
||||
public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
public class SAMReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
*/
|
||||
|
|
@ -46,7 +46,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
* Create a new SAMFileReader argument, notifying the given engine when that argument has been created.
|
||||
* @param engine engine
|
||||
*/
|
||||
public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) {
|
||||
public SAMReaderArgumentTypeDescriptor(GenomeAnalysisEngine engine) {
|
||||
this.engine = engine;
|
||||
}
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||
SAMReaderBuilder builder = new SAMReaderBuilder();
|
||||
|
||||
ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
|
||||
// MASSIVE KLUDGE! SAMFileReader is tricky to implement and we don't yet have a stub. Return null, then
|
||||
// let the output tracker load it in.
|
||||
// TODO: Add a stub for SAMFileReader.
|
||||
// TODO: Add a stub for SAMReader.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.collections.Pair;
|
|||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFCodec;
|
||||
import htsjdk.variant.vcf.VCFHeader;
|
||||
|
|
@ -312,7 +311,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
final File outputBam = executeTest("testGATKEngineConsolidatesCigars", spec).first.get(0);
|
||||
final SAMFileReader reader = new SAMFileReader(outputBam);
|
||||
reader.setValidationStringency(ValidationStringency.SILENT);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
final SAMRecord read = reader.iterator().next();
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
|
||||
// Test the accuracy of the read metrics
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private SAMFileHeader header;
|
||||
|
|
@ -93,7 +94,8 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
private void init() throws IOException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
referenceFile = new File(b37KGReference);
|
||||
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
|
|
@ -149,7 +151,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -184,7 +186,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -225,7 +227,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
@ -272,7 +274,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
final List<ReadFilter> filters = new ArrayList<>();
|
||||
filters.add(new EveryTenthReadFilter());
|
||||
|
||||
final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.arguments;
|
||||
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Test the GATK core CRAM parsing mechanism.
|
||||
*/
|
||||
public class CramIntegrationTest extends WalkerTest {
|
||||
@DataProvider(name="cramData")
|
||||
public Object[][] getCRAMData() {
|
||||
return new Object[][] {
|
||||
{"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"},
|
||||
//{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148
|
||||
{"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
{"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||
{"PrintReads", "exampleCRAM-noindex.cram", " -L chr1:200", "bam", "072435e8272411c31b2234f851706384"},
|
||||
{"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||
{"CountLoci", "exampleCRAM-noindex.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
|
||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountLoci", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||
{"CountReads", "exampleCRAM-noindex.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
|
||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"CountReads", "exampleCRAM-noindex.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
|
||||
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "9598062587ad8d2ec596a8ecb19be979"},
|
||||
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
|
||||
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "cramData")
|
||||
public void testCRAM(String walker, String input, String args, String ext, String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
" -T Test" + walker + "Walker" +
|
||||
" -I " + publicTestDir + input +
|
||||
" -R " + exampleFASTA +
|
||||
args +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(ext),
|
||||
Arrays.asList(md5));
|
||||
executeTest(String.format("testCRAM %s %s -> %s: %s", walker, input, ext, args), spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator();
|
||||
|
||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||
Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(null,Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ import java.util.Collections;
|
|||
public class MockLocusShard extends LocusShard {
|
||||
public MockLocusShard(final GenomeLocParser genomeLocParser,final List<GenomeLoc> intervals) {
|
||||
super( genomeLocParser,
|
||||
new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
||||
new SAMDataSource(null, Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
|
||||
intervals,
|
||||
null);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ public class ReadShardBalancerUnitTest extends BaseTest {
|
|||
public void run() {
|
||||
createTestBAM();
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(Arrays.asList(testBAM),
|
||||
SAMDataSource dataSource = new SAMDataSource(null, // Reference not used in this test.
|
||||
Arrays.asList(testBAM),
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
new GenomeLocParser(header.getSequenceDictionary()),
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
// TODO: These legacy tests should really be replaced with a more comprehensive suite of tests for SAMDataSource
|
||||
|
||||
private List<SAMReaderID> readers;
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
|
|
@ -76,7 +77,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers = new ArrayList<SAMReaderID>();
|
||||
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b36KGReference));
|
||||
referenceFile = new File(b36KGReference);
|
||||
seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
@ -101,7 +103,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
|
||||
|
||||
// the sharding strat.
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -155,7 +159,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
||||
|
||||
// use defaults
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -171,7 +177,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
||||
|
||||
boolean removeProgramRecords = false;
|
||||
data = new SAMDataSource(readers,
|
||||
data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -192,7 +200,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
||||
removeProgramRecords = true;
|
||||
data = new SAMDataSource(readers,
|
||||
data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -217,7 +227,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
public void testFailOnReducedReads() {
|
||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
@ -234,7 +246,9 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
public void testFailOnReducedReadsRemovingProgramRecords() {
|
||||
readers.add(new SAMReaderID(new File(privateTestDir + "old.reduced.bam"), new Tags()));
|
||||
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
SAMDataSource data = new SAMDataSource(
|
||||
referenceFile,
|
||||
readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
|||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -313,6 +314,7 @@ public class ReadFilterTest extends BaseTest {
|
|||
|
||||
protected SAMDataSource composeDataSource() {
|
||||
checkHeaderExists();
|
||||
final File referenceFile = null; // Not used in this test.
|
||||
final Set<SAMReaderID> readerIDs = new HashSet<>(1);
|
||||
final ThreadAllocation ta = new ThreadAllocation();
|
||||
final Integer numFileHandles = 1; // I believe that any value would do but need to confirm.
|
||||
|
|
@ -326,6 +328,7 @@ public class ReadFilterTest extends BaseTest {
|
|||
|
||||
final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final SAMDataSource res = new SAMDataSource(
|
||||
referenceFile,
|
||||
readerIDs,
|
||||
ta,
|
||||
numFileHandles,
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return traversals.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -90,7 +91,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
@BeforeClass
|
||||
private void init() throws IOException {
|
||||
//reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference));
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
referenceFile = new File(hg19Reference);
|
||||
reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
|
|
@ -470,7 +472,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
SAMReaderID readerID = new SAMReaderID(bamFile, new Tags());
|
||||
samFiles.add(readerID);
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, samFiles, new ThreadAllocation(), null, genomeLocParser,
|
||||
false,
|
||||
ValidationStringency.STRICT,
|
||||
null,
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
SAMDataSource dataSource = new SAMDataSource(refFile, bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
|
|||
|
|
@ -132,4 +132,15 @@ public class GATKBin implements Comparable<GATKBin> {
|
|||
return new GATKChunk[0];
|
||||
return chunkList;
|
||||
}
|
||||
|
||||
// HACK: Using this classes package permissions to further hack the CRAM created SAMRecord's indexing bin and binary attributes.
|
||||
public static Integer getReadIndexingBin(final SAMRecord read) {
|
||||
return read.getIndexingBin();
|
||||
}
|
||||
public static void setReadIndexingBin(final SAMRecord read, final Integer indexingBin) {
|
||||
read.setIndexingBin(indexingBin);
|
||||
}
|
||||
public static SAMBinaryTagAndValue getReadBinaryAttributes(final SAMRecord read) {
|
||||
return read.getBinaryAttributes();
|
||||
}
|
||||
}
|
||||
|
|
@ -225,20 +225,20 @@ public class UserException extends ReviewedGATKException {
|
|||
|
||||
public static class MissortedBAM extends UserException {
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) {
|
||||
super(String.format("Missorted Input SAM/BAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
|
||||
}
|
||||
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, String message) {
|
||||
super(String.format("Missorted Input SAM/BAM files: files are not sorted in %s order; %s", order, message));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: files are not sorted in %s order; %s", order, message));
|
||||
}
|
||||
|
||||
public MissortedBAM(SAMFileHeader.SortOrder order, SAMRecord read, String message) {
|
||||
super(String.format("Missorted Input SAM/BAM file %s: file sorted in %s order but %s is required; %s",
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM file %s: file sorted in %s order but %s is required; %s",
|
||||
read.getFileSource().getReader(), read.getHeader().getSortOrder(), order, message));
|
||||
}
|
||||
|
||||
public MissortedBAM(String message) {
|
||||
super(String.format("Missorted Input SAM/BAM files: %s", message));
|
||||
super(String.format("Missorted Input SAM/BAM/CRAM files: %s", message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -252,7 +252,7 @@ public class UserException extends ReviewedGATKException {
|
|||
}
|
||||
|
||||
public MalformedBAM(String source, String message) {
|
||||
super(String.format("SAM/BAM file %s is malformed: %s", source, message));
|
||||
super(String.format("SAM/BAM/CRAM file %s is malformed: %s", source, message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ public class UserException extends ReviewedGATKException {
|
|||
}
|
||||
|
||||
public MisencodedBAM(String source, String message) {
|
||||
super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
||||
super(String.format("SAM/BAM/CRAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.io;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public interface ReferenceBacked {
|
||||
public File getReferenceFile();
|
||||
public void setReferenceFile(final File reference);
|
||||
}
|
||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
@ -67,7 +66,6 @@ public class LIBSPerformance extends CommandLineProgram {
|
|||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||
|
||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
SAMRecordIterator rawIterator;
|
||||
if ( location == null )
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.cram.build.CramIO;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
|
@ -149,7 +150,7 @@ public class ArtificialSAMFileReader extends SAMFileReader {
|
|||
byte[] byteArray = "".getBytes("ISO-8859-1");
|
||||
return new ByteArrayInputStream(byteArray);
|
||||
}
|
||||
catch( UnsupportedEncodingException ex ) {
|
||||
catch( Exception ex ) {
|
||||
throw new ReviewedGATKException("Unable to build empty input stream",ex);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ import java.util.*;
|
|||
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
||||
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
||||
public class GATKSAMRecord extends SAMRecord implements Cloneable {
|
||||
// Base Quality Score Recalibrator specific attribute tags
|
||||
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
|
||||
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
|
||||
|
|
@ -92,42 +92,36 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
* @param read
|
||||
*/
|
||||
public GATKSAMRecord(final SAMRecord read) {
|
||||
super(read.getHeader(),
|
||||
read.getReferenceIndex(),
|
||||
read.getAlignmentStart(),
|
||||
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
|
||||
(short)read.getMappingQuality(),
|
||||
0,
|
||||
read.getCigarLength(),
|
||||
read.getFlags(),
|
||||
read.getReadLength(),
|
||||
read.getMateReferenceIndex(),
|
||||
read.getMateAlignmentStart(),
|
||||
read.getInferredInsertSize(),
|
||||
null);
|
||||
super(read.getHeader());
|
||||
super.setReferenceIndex(read.getReferenceIndex());
|
||||
super.setAlignmentStart(read.getAlignmentStart());
|
||||
super.setReadName(read.getReadName());
|
||||
super.setMappingQuality(read.getMappingQuality());
|
||||
// indexing bin done below
|
||||
super.setCigar(read.getCigar());
|
||||
super.setFlags(read.getFlags());
|
||||
super.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
super.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
super.setInferredInsertSize(read.getInferredInsertSize());
|
||||
SAMReadGroupRecord samRG = read.getReadGroup();
|
||||
clearAttributes();
|
||||
SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
|
||||
if (samAttr == null) {
|
||||
clearAttributes();
|
||||
} else {
|
||||
setAttributes(samAttr);
|
||||
}
|
||||
if (samRG != null) {
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
|
||||
setReadGroup(rg);
|
||||
}
|
||||
}
|
||||
|
||||
public GATKSAMRecord(final SAMFileHeader header,
|
||||
final int referenceSequenceIndex,
|
||||
final int alignmentStart,
|
||||
final short readNameLength,
|
||||
final short mappingQuality,
|
||||
final int indexingBin,
|
||||
final int cigarLen,
|
||||
final int flags,
|
||||
final int readLen,
|
||||
final int mateReferenceSequenceIndex,
|
||||
final int mateAlignmentStart,
|
||||
final int insertSize,
|
||||
final byte[] variableLengthBlock) {
|
||||
super(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen,
|
||||
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
||||
super.setFileSource(read.getFileSource());
|
||||
super.setReadName(read.getReadName());
|
||||
super.setCigarString(read.getCigarString());
|
||||
super.setReadBases(read.getReadBases());
|
||||
super.setBaseQualities(read.getBaseQualities());
|
||||
// From SAMRecord constructor: Do this after the above because setCigarString will clear it.
|
||||
GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
|
||||
}
|
||||
|
||||
public static GATKSAMRecord createRandomRead(int length) {
|
||||
|
|
@ -520,19 +514,15 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
* @return a read with no bases but safe for the GATK
|
||||
*/
|
||||
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
||||
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
||||
read.getReferenceIndex(),
|
||||
0,
|
||||
(short) 0,
|
||||
(short) 0,
|
||||
0,
|
||||
0,
|
||||
read.getFlags(),
|
||||
0,
|
||||
read.getMateReferenceIndex(),
|
||||
read.getMateAlignmentStart(),
|
||||
read.getInferredInsertSize(),
|
||||
null);
|
||||
final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader());
|
||||
emptyRead.setReferenceIndex(read.getReferenceIndex());
|
||||
emptyRead.setAlignmentStart(0);
|
||||
emptyRead.setMappingQuality(0);
|
||||
// setting read indexing bin last
|
||||
emptyRead.setFlags(read.getFlags());
|
||||
emptyRead.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||
emptyRead.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||
emptyRead.setInferredInsertSize(read.getInferredInsertSize());
|
||||
|
||||
emptyRead.setCigarString("");
|
||||
emptyRead.setReadBases(new byte[0]);
|
||||
|
|
@ -545,6 +535,8 @@ public class GATKSAMRecord extends BAMRecord implements Cloneable {
|
|||
emptyRead.setReadGroup(rg);
|
||||
}
|
||||
|
||||
GATKBin.setReadIndexingBin(emptyRead, 0);
|
||||
|
||||
return emptyRead;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.sam;
|
|||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -40,9 +39,9 @@ import java.util.Iterator;
|
|||
* Time: 1:19 PM
|
||||
*/
|
||||
public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>, Iterable<GATKSAMRecord> {
|
||||
final CloseableIterator<SAMRecord> it;
|
||||
final CloseableIterator<? extends SAMRecord> it;
|
||||
|
||||
public GATKSAMRecordIterator(final CloseableIterator<SAMRecord> it) {
|
||||
public GATKSAMRecordIterator(final CloseableIterator<? extends SAMRecord> it) {
|
||||
this.it = it;
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +50,14 @@ public class GATKSAMRecordIterator implements CloseableIterator<GATKSAMRecord>,
|
|||
}
|
||||
|
||||
@Override public boolean hasNext() { return it.hasNext(); }
|
||||
@Override public GATKSAMRecord next() { return (GATKSAMRecord)it.next(); }
|
||||
@Override public GATKSAMRecord next() {
|
||||
SAMRecord next = it.next();
|
||||
if (next instanceof GATKSAMRecord) {
|
||||
return (GATKSAMRecord)next;
|
||||
} else {
|
||||
return new GATKSAMRecord(next);
|
||||
}
|
||||
}
|
||||
@Override public void remove() { it.remove(); }
|
||||
@Override public void close() { it.close(); }
|
||||
@Override public Iterator<GATKSAMRecord> iterator() { return this; }
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.SAMRecordFactory;
|
||||
import htsjdk.samtools.BAMRecord;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Factory interface implementation used to create GATKSamRecords
|
||||
* from SAMFileReaders with SAM-JDK
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class GATKSamRecordFactory implements SAMRecordFactory {
|
||||
|
||||
/** Create a new SAMRecord to be filled in */
|
||||
public SAMRecord createSAMRecord(SAMFileHeader header) {
|
||||
throw new UserException.BadInput("The GATK now longer supports input SAM files");
|
||||
}
|
||||
|
||||
/** Create a new BAM Record. */
|
||||
public BAMRecord createBAMRecord(final SAMFileHeader header,
|
||||
final int referenceSequenceIndex,
|
||||
final int alignmentStart,
|
||||
final short readNameLength,
|
||||
final short mappingQuality,
|
||||
final int indexingBin,
|
||||
final int cigarLen,
|
||||
final int flags,
|
||||
final int readLen,
|
||||
final int mateReferenceSequenceIndex,
|
||||
final int mateAlignmentStart,
|
||||
final int insertSize,
|
||||
final byte[] variableLengthBlock) {
|
||||
return new GATKSAMRecord(header,
|
||||
referenceSequenceIndex,
|
||||
alignmentStart,
|
||||
readNameLength,
|
||||
mappingQuality,
|
||||
indexingBin,
|
||||
cigarLen,
|
||||
flags,
|
||||
readLen,
|
||||
mateReferenceSequenceIndex,
|
||||
mateAlignmentStart,
|
||||
insertSize,
|
||||
variableLengthBlock);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,8 +25,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.SamReader;
|
||||
import htsjdk.samtools.SamReaderFactory;
|
||||
import htsjdk.samtools.ValidationStringency;
|
||||
import org.broadinstitute.gatk.utils.io.ReferenceBacked;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -39,12 +41,17 @@ import java.io.File;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMFileReaderBuilder {
|
||||
public class SAMReaderBuilder implements ReferenceBacked {
|
||||
/**
|
||||
* To which file should output be written?
|
||||
*/
|
||||
private File samFile = null;
|
||||
|
||||
/**
|
||||
* The reference file for the samFile.
|
||||
*/
|
||||
private File referenceFile = null;
|
||||
|
||||
/**
|
||||
* What compression level should be used when building this file?
|
||||
*/
|
||||
|
|
@ -58,6 +65,16 @@ public class SAMFileReaderBuilder {
|
|||
this.samFile = samFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public File getReferenceFile() {
|
||||
return referenceFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setReferenceFile(final File referenceFile) {
|
||||
this.referenceFile = referenceFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the validation stringency to apply when reading this sam file.
|
||||
* @param validationStringency Stringency to apply. Must not be null.
|
||||
|
|
@ -70,15 +87,16 @@ public class SAMFileReaderBuilder {
|
|||
* Create the SAM writer, given the constituent parts accrued.
|
||||
* @return Newly minted SAM file writer.
|
||||
*/
|
||||
public SAMFileReader build() {
|
||||
public SamReader build() {
|
||||
if( samFile == null )
|
||||
throw new ReviewedGATKException( "Filename for output sam file must be supplied.");
|
||||
if( validationStringency == null )
|
||||
throw new ReviewedGATKException( "Header for output sam file must be supplied.");
|
||||
|
||||
SAMFileReader reader = new SAMFileReader( samFile );
|
||||
reader.setValidationStringency( validationStringency );
|
||||
|
||||
return reader;
|
||||
return SamReaderFactory
|
||||
.makeDefault()
|
||||
.referenceSequence(this.getReferenceFile())
|
||||
.validationStringency(validationStringency)
|
||||
.open(samFile);
|
||||
}
|
||||
}
|
||||
|
|
@ -73,16 +73,16 @@ public class ListFileUtils {
|
|||
throw new UserException.CouldNotReadInputFile(new File(inputFileName), "Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else if(inputFileName.toLowerCase().endsWith(".bam")) {
|
||||
else if(inputFileName.toLowerCase().endsWith(".bam") || inputFileName.toLowerCase().endsWith(".cram")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else if(inputFileName.endsWith("stdin")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " +
|
||||
"of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
|||
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSamRecordFactory;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -219,11 +218,10 @@ public class ExampleToCopyUnitTest extends BaseTest {
|
|||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10);
|
||||
final File bam = bamBuilder.makeTemporarilyBAMFile();
|
||||
final SAMFileReader reader = new SAMFileReader(bam);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
final Iterator<SAMRecord> bamIt = reader.iterator();
|
||||
while ( bamIt.hasNext() ) {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)bamIt.next(); // all reads are actually GATKSAMRecords
|
||||
final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords
|
||||
// TODO -- add some tests that use reads from a BAM
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue