Better file count / buffering variation in test suite. Parameterized read shard buffering. Misc cleanup.
This commit is contained in:
parent
b5b5ffe71d
commit
4001c22a11
|
|
@ -30,7 +30,6 @@ public class ReadProperties {
|
||||||
private Collection<SAMReaderID> readers = null;
|
private Collection<SAMReaderID> readers = null;
|
||||||
private SAMFileHeader header = null;
|
private SAMFileHeader header = null;
|
||||||
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
||||||
private Integer readBufferSize = null;
|
|
||||||
private DownsamplingMethod downsamplingMethod = null;
|
private DownsamplingMethod downsamplingMethod = null;
|
||||||
private ValidationExclusion exclusionList = null;
|
private ValidationExclusion exclusionList = null;
|
||||||
private Collection<ReadFilter> supplementalFilters = null;
|
private Collection<ReadFilter> supplementalFilters = null;
|
||||||
|
|
@ -91,14 +90,6 @@ public class ReadProperties {
|
||||||
return validationStringency;
|
return validationStringency;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a list of the total number of reads that the sharding system should buffer per BAM file.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Integer getReadBufferSize() {
|
|
||||||
return readBufferSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the method and parameters used when downsampling reads.
|
* Gets the method and parameters used when downsampling reads.
|
||||||
* @return Downsample fraction.
|
* @return Downsample fraction.
|
||||||
|
|
@ -150,7 +141,6 @@ public class ReadProperties {
|
||||||
* @param header sam file header.
|
* @param header sam file header.
|
||||||
* @param useOriginalBaseQualities True if original base qualities should be used.
|
* @param useOriginalBaseQualities True if original base qualities should be used.
|
||||||
* @param strictness Stringency of reads file parsing.
|
* @param strictness Stringency of reads file parsing.
|
||||||
* @param readBufferSize Number of reads to hold in memory per BAM.
|
|
||||||
* @param downsamplingMethod Method for downsampling reads at a given locus.
|
* @param downsamplingMethod Method for downsampling reads at a given locus.
|
||||||
* @param exclusionList what safety checks we're willing to let slide
|
* @param exclusionList what safety checks we're willing to let slide
|
||||||
* @param supplementalFilters additional filters to dynamically apply.
|
* @param supplementalFilters additional filters to dynamically apply.
|
||||||
|
|
@ -169,7 +159,6 @@ public class ReadProperties {
|
||||||
SAMFileHeader header,
|
SAMFileHeader header,
|
||||||
boolean useOriginalBaseQualities,
|
boolean useOriginalBaseQualities,
|
||||||
SAMFileReader.ValidationStringency strictness,
|
SAMFileReader.ValidationStringency strictness,
|
||||||
Integer readBufferSize,
|
|
||||||
DownsamplingMethod downsamplingMethod,
|
DownsamplingMethod downsamplingMethod,
|
||||||
ValidationExclusion exclusionList,
|
ValidationExclusion exclusionList,
|
||||||
Collection<ReadFilter> supplementalFilters,
|
Collection<ReadFilter> supplementalFilters,
|
||||||
|
|
@ -181,7 +170,6 @@ public class ReadProperties {
|
||||||
byte defaultBaseQualities) {
|
byte defaultBaseQualities) {
|
||||||
this.readers = samFiles;
|
this.readers = samFiles;
|
||||||
this.header = header;
|
this.header = header;
|
||||||
this.readBufferSize = readBufferSize;
|
|
||||||
this.validationStringency = strictness;
|
this.validationStringency = strictness;
|
||||||
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
|
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
|
||||||
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
|
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ public class ReadShard extends Shard {
|
||||||
/**
|
/**
|
||||||
* What is the maximum number of reads which should go into a read shard.
|
* What is the maximum number of reads which should go into a read shard.
|
||||||
*/
|
*/
|
||||||
public static final int MAX_READS = 10000;
|
public static int MAX_READS = 10000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The reads making up this shard.
|
* The reads making up this shard.
|
||||||
|
|
@ -49,6 +49,15 @@ public class ReadShard extends Shard {
|
||||||
super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
|
super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the maximum number of reads buffered in a read shard. Implemented as a weirdly static interface
|
||||||
|
* until we know what effect tuning this parameter has.
|
||||||
|
* @param bufferSize New maximum number
|
||||||
|
*/
|
||||||
|
static void setReadBufferSize(final int bufferSize) {
|
||||||
|
MAX_READS = bufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if this shard is meant to buffer reads, rather
|
* Returns true if this shard is meant to buffer reads, rather
|
||||||
* than just holding pointers to their locations.
|
* than just holding pointers to their locations.
|
||||||
|
|
|
||||||
|
|
@ -250,6 +250,9 @@ public class SAMDataSource {
|
||||||
dispatcher = null;
|
dispatcher = null;
|
||||||
|
|
||||||
validationStringency = strictness;
|
validationStringency = strictness;
|
||||||
|
if(readBufferSize != null)
|
||||||
|
ReadShard.setReadBufferSize(readBufferSize);
|
||||||
|
|
||||||
for (SAMReaderID readerID : samFiles) {
|
for (SAMReaderID readerID : samFiles) {
|
||||||
if (!readerID.samFile.canRead())
|
if (!readerID.samFile.canRead())
|
||||||
throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " +
|
throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " +
|
||||||
|
|
@ -293,7 +296,6 @@ public class SAMDataSource {
|
||||||
mergedHeader,
|
mergedHeader,
|
||||||
useOriginalBaseQualities,
|
useOriginalBaseQualities,
|
||||||
strictness,
|
strictness,
|
||||||
readBufferSize,
|
|
||||||
downsamplingMethod,
|
downsamplingMethod,
|
||||||
exclusionList,
|
exclusionList,
|
||||||
supplementalFilters,
|
supplementalFilters,
|
||||||
|
|
@ -551,8 +553,6 @@ public class SAMDataSource {
|
||||||
inputStream.submitAccessPlan(new SAMReaderPosition(id,inputStream,(GATKBAMFileSpan)shard.getFileSpans().get(id)));
|
inputStream.submitAccessPlan(new SAMReaderPosition(id,inputStream,(GATKBAMFileSpan)shard.getFileSpans().get(id)));
|
||||||
}
|
}
|
||||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||||
if(readProperties.getReadBufferSize() != null)
|
|
||||||
iterator = new BufferingReadIterator(iterator,readProperties.getReadBufferSize());
|
|
||||||
if(shard.getGenomeLocs().size() > 0)
|
if(shard.getGenomeLocs().size() > 0)
|
||||||
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
||||||
mergingIterator.addIterator(readers.getReader(id),iterator);
|
mergingIterator.addIterator(readers.getReader(id),iterator);
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,7 @@ public class SAMReaderID implements Comparable {
|
||||||
* @param other The other identifier.
|
* @param other The other identifier.
|
||||||
* @return True iff the two readers point to the same file.
|
* @return True iff the two readers point to the same file.
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public boolean equals(Object other) {
|
public boolean equals(Object other) {
|
||||||
if(other == null) return false;
|
if(other == null) return false;
|
||||||
if(!(other instanceof SAMReaderID)) return false;
|
if(!(other instanceof SAMReaderID)) return false;
|
||||||
|
|
@ -79,10 +80,20 @@ public class SAMReaderID implements Comparable {
|
||||||
* Generate a hash code for this object.
|
* Generate a hash code for this object.
|
||||||
* @return A hash code, based solely on the file name at this point.
|
* @return A hash code, based solely on the file name at this point.
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return samFile.hashCode();
|
return samFile.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Best string representation for a SAM file reader is the path of the source file.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getSamFilePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int compareTo(Object other) {
|
public int compareTo(Object other) {
|
||||||
return this.samFile.getAbsolutePath().compareTo(((SAMReaderID)other).samFile.getAbsolutePath());
|
return this.samFile.getAbsolutePath().compareTo(((SAMReaderID)other).samFile.getAbsolutePath());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.Queue;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Buffers access to a large stream of reads, replenishing the buffer only when the reads
|
|
||||||
*
|
|
||||||
* @author mhanna
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class BufferingReadIterator implements CloseableIterator<SAMRecord> {
|
|
||||||
private final CloseableIterator<SAMRecord> wrappedIterator;
|
|
||||||
private final Queue<SAMRecord> buffer;
|
|
||||||
private final int bufferSize;
|
|
||||||
|
|
||||||
public BufferingReadIterator(final CloseableIterator<SAMRecord> readIterator, final int bufferSize) {
|
|
||||||
this.wrappedIterator = readIterator;
|
|
||||||
this.buffer = new LinkedList<SAMRecord>();
|
|
||||||
this.bufferSize = bufferSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
assureBufferFull();
|
|
||||||
return !buffer.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord next() {
|
|
||||||
assureBufferFull();
|
|
||||||
if(!hasNext()) throw new NoSuchElementException("No next element available");
|
|
||||||
return buffer.remove();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
wrappedIterator.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new ReviewedStingException("Unable to remove from a BufferingReadIterator");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the buffer is empty but there are more elements in the iterator,
|
|
||||||
*/
|
|
||||||
private void assureBufferFull() {
|
|
||||||
if(!buffer.isEmpty())
|
|
||||||
return;
|
|
||||||
while(buffer.size() < bufferSize && wrappedIterator.hasNext())
|
|
||||||
buffer.add(wrappedIterator.next());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -72,7 +72,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
||||||
reader.getFileHeader(),
|
reader.getFileHeader(),
|
||||||
false,
|
false,
|
||||||
SAMFileReader.ValidationStringency.SILENT,
|
SAMFileReader.ValidationStringency.SILENT,
|
||||||
0,
|
|
||||||
downsampling.create(),
|
downsampling.create(),
|
||||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||||
Collections.<ReadFilter>emptyList(),
|
Collections.<ReadFilter>emptyList(),
|
||||||
|
|
|
||||||
|
|
@ -301,7 +301,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
false,
|
false,
|
||||||
SAMFileReader.ValidationStringency.STRICT,
|
SAMFileReader.ValidationStringency.STRICT,
|
||||||
null,
|
null,
|
||||||
null,
|
|
||||||
new ValidationExclusion(),
|
new ValidationExclusion(),
|
||||||
Collections.<ReadFilter>emptyList(),
|
Collections.<ReadFilter>emptyList(),
|
||||||
false,
|
false,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue