Better file count / buffering variation in test suite. Parameterized read shard buffering. Misc cleanup.

This commit is contained in:
Matt Hanna 2011-12-06 10:10:38 -05:00
parent b5b5ffe71d
commit 4001c22a11
7 changed files with 24 additions and 98 deletions

View File

@ -30,7 +30,6 @@ public class ReadProperties {
private Collection<SAMReaderID> readers = null;
private SAMFileHeader header = null;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Integer readBufferSize = null;
private DownsamplingMethod downsamplingMethod = null;
private ValidationExclusion exclusionList = null;
private Collection<ReadFilter> supplementalFilters = null;
@ -91,14 +90,6 @@ public class ReadProperties {
return validationStringency;
}
/**
* Gets a list of the total number of reads that the sharding system should buffer per BAM file.
* @return
*/
public Integer getReadBufferSize() {
return readBufferSize;
}
/**
* Gets the method and parameters used when downsampling reads.
* @return Downsample fraction.
@ -150,7 +141,6 @@ public class ReadProperties {
* @param header sam file header.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param strictness Stringency of reads file parsing.
* @param readBufferSize Number of reads to hold in memory per BAM.
* @param downsamplingMethod Method for downsampling reads at a given locus.
* @param exclusionList what safety checks we're willing to let slide
* @param supplementalFilters additional filters to dynamically apply.
@ -169,7 +159,6 @@ public class ReadProperties {
SAMFileHeader header,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<ReadFilter> supplementalFilters,
@ -181,7 +170,6 @@ public class ReadProperties {
byte defaultBaseQualities) {
this.readers = samFiles;
this.header = header;
this.readBufferSize = readBufferSize;
this.validationStringency = strictness;
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;

View File

@ -38,7 +38,7 @@ public class ReadShard extends Shard {
/**
* What is the maximum number of reads which should go into a read shard.
*/
public static final int MAX_READS = 10000;
public static int MAX_READS = 10000;
/**
* The reads making up this shard.
@ -49,6 +49,15 @@ public class ReadShard extends Shard {
super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
}
/**
* Sets the maximum number of reads buffered in a read shard. Implemented as a weirdly static interface
* until we know what effect tuning this parameter has.
* @param bufferSize New maximum number
*/
static void setReadBufferSize(final int bufferSize) {
MAX_READS = bufferSize;
}
/**
* Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations.

View File

@ -250,6 +250,9 @@ public class SAMDataSource {
dispatcher = null;
validationStringency = strictness;
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize);
for (SAMReaderID readerID : samFiles) {
if (!readerID.samFile.canRead())
throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " +
@ -293,7 +296,6 @@ public class SAMDataSource {
mergedHeader,
useOriginalBaseQualities,
strictness,
readBufferSize,
downsamplingMethod,
exclusionList,
supplementalFilters,
@ -551,8 +553,6 @@ public class SAMDataSource {
inputStream.submitAccessPlan(new SAMReaderPosition(id,inputStream,(GATKBAMFileSpan)shard.getFileSpans().get(id)));
}
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
if(readProperties.getReadBufferSize() != null)
iterator = new BufferingReadIterator(iterator,readProperties.getReadBufferSize());
if(shard.getGenomeLocs().size() > 0)
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
mergingIterator.addIterator(readers.getReader(id),iterator);

View File

@ -67,6 +67,7 @@ public class SAMReaderID implements Comparable {
* @param other The other identifier.
* @return True iff the two readers point to the same file.
*/
@Override
public boolean equals(Object other) {
if(other == null) return false;
if(!(other instanceof SAMReaderID)) return false;
@ -79,10 +80,20 @@ public class SAMReaderID implements Comparable {
* Generate a hash code for this object.
* @return A hash code, based solely on the file name at this point.
*/
@Override
public int hashCode() {
return samFile.hashCode();
}
/**
* Best string representation for a SAM file reader is the path of the source file.
*/
@Override
public String toString() {
return getSamFilePath();
}
@Override
public int compareTo(Object other) {
return this.samFile.getAbsolutePath().compareTo(((SAMReaderID)other).samFile.getAbsolutePath());
}

View File

@ -1,80 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.Queue;
/**
* Buffers access to a large stream of reads, replenishing the buffer only when the reads
*
* @author mhanna
* @version 0.1
*/
public class BufferingReadIterator implements CloseableIterator<SAMRecord> {
private final CloseableIterator<SAMRecord> wrappedIterator;
private final Queue<SAMRecord> buffer;
private final int bufferSize;
public BufferingReadIterator(final CloseableIterator<SAMRecord> readIterator, final int bufferSize) {
this.wrappedIterator = readIterator;
this.buffer = new LinkedList<SAMRecord>();
this.bufferSize = bufferSize;
}
public boolean hasNext() {
assureBufferFull();
return !buffer.isEmpty();
}
public SAMRecord next() {
assureBufferFull();
if(!hasNext()) throw new NoSuchElementException("No next element available");
return buffer.remove();
}
public void close() {
wrappedIterator.close();
}
public void remove() {
throw new ReviewedStingException("Unable to remove from a BufferingReadIterator");
}
/**
* If the buffer is empty but there are more elements in the iterator,
*/
private void assureBufferFull() {
if(!buffer.isEmpty())
return;
while(buffer.size() < bufferSize && wrappedIterator.hasNext())
buffer.add(wrappedIterator.next());
}
}

View File

@ -72,7 +72,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
reader.getFileHeader(),
false,
SAMFileReader.ValidationStringency.SILENT,
0,
downsampling.create(),
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<ReadFilter>emptyList(),

View File

@ -301,7 +301,6 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
Collections.<ReadFilter>emptyList(),
false,