Major refactoring of shards. No longer uses interfaces but is now an actual object hierarchy with most of the important and common functionality pushed up to base classes. Eliminated a lot of duplicated code, and the shards are much more understandable now. Also now require a GenomeLocParser to work with their own GenomeLocs.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5030 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-01-20 12:36:56 +00:00
parent 4d611e53e7
commit cacdac3914
9 changed files with 177 additions and 315 deletions

View File

@ -3,11 +3,16 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.*; import net.sf.samtools.*;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
/** /**
* A common interface for shards that natively understand the BAM format. * A common interface for shards that natively understand the BAM format.
@ -15,44 +20,90 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public interface BAMFormatAwareShard extends Shard { public abstract class BAMFormatAwareShard extends Shard {
/**
* Whether the current location is unmapped.
*/
private final boolean isUnmapped;
/**
* Reads data, if applicable.
*/
private final SAMDataSource readsDataSource;
/**
* The data backing the next chunks to deliver to the traversal engine.
*/
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
public BAMFormatAwareShard(GenomeLocParser parser,
ShardType shardType,
List<GenomeLoc> locs,
SAMDataSource readsDataSource,
Map<SAMReaderID,SAMFileSpan> fileSpans,
boolean isUnmapped) {
super(parser, shardType, locs);
this.readsDataSource = readsDataSource;
this.fileSpans = fileSpans;
this.isUnmapped = isUnmapped;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/** /**
* Get the list of chunks delimiting this shard. * Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard. * @return a list of chunks that contain data for this shard.
*/ */
public Map<SAMReaderID,SAMFileSpan> getFileSpans(); public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
return Collections.unmodifiableMap(fileSpans);
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/** /**
* Returns true if this shard is meant to buffer reads, rather * Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations. * than just holding pointers to their locations.
* @return True if this shard can buffer reads. False otherwise. * @return True if this shard can buffer reads. False otherwise.
*/ */
public boolean buffersReads(); public boolean buffersReads() { return false; }
/**
* Checks to see whether the buffer is empty.
* @return True if the buffer is empty.
*/
public boolean isBufferEmpty();
/** /**
* Returns true if the read buffer is currently full. * Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads). * @return True if this shard's buffer is full (and the shard can buffer reads).
*/ */
public boolean isBufferFull(); public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/** /**
* Adds a read to the read buffer. * Adds a read to the read buffer.
* @param read Add a read to the internal shard buffer. * @param read Add a read to the internal shard buffer.
*/ */
public void addRead(SAMRecord read); public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/** /**
* Assuming this iterator buffers reads, an iterator to the reads * Gets the iterator over the elements cached in the shard.
* stored in the shard. * @return
* @return An iterator over the reads stored in the shard.
*/ */
public StingSAMIterator iterator(); public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/** /**
* Whether this shard points to an unmapped region. * Whether this shard points to an unmapped region.
@ -60,5 +111,7 @@ public interface BAMFormatAwareShard extends Shard {
* this case, isUnmapped should always return false. * this case, isUnmapped should always return false.
* @return True if this shard is unmapped. False otherwise. * @return True if this shard is unmapped. False otherwise.
*/ */
public boolean isUnmapped(); public boolean isUnmapped() {
return isUnmapped;
}
} }

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.shards; package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
@ -21,127 +22,14 @@ import net.sf.picard.filter.SamRecordFilter;
* @version 1.0 * @version 1.0
* @date Apr 7, 2009 * @date Apr 7, 2009
*/ */
public class LocusShard implements BAMFormatAwareShard { public class LocusShard extends BAMFormatAwareShard {
/**
* Source for read data.
*/
private SAMDataSource dataSource;
/**
* A list of the chunks associated with this shard.
*/
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
// currently our location
private final List<GenomeLoc> loci;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
/** /**
* Create a new locus shard, divided by index. * Create a new locus shard, divided by index.
* @param intervals List of intervals to process. * @param intervals List of intervals to process.
* @param fileSpans File spans associated with that interval. * @param fileSpans File spans associated with that interval.
*/ */
public LocusShard(SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) { public LocusShard(GenomeLocParser parser, SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
this.dataSource = dataSource; super(parser, ShardType.LOCUS, intervals, dataSource, fileSpans, false);
this.loci = intervals;
this.fileSpans = fileSpans;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
dataSource.incorporateReadMetrics(readMetrics);
}
/**
* Gets the file spans associated with this locus shard.
* @return A list of the file spans to use when retrieving locus data.
*/
@Override
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
return fileSpans;
}
/** @return the genome location represented by this shard */
public List<GenomeLoc> getGenomeLocs() {
return loci;
}
/**
* Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations.
* @return True if this shard can buffer reads. False otherwise.
*/
@Override
public boolean buffersReads() { return false; }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
@Override
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
@Override
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Adds a read to the read buffer.
* @param read Add a read to the internal shard buffer.
*/
@Override
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Gets the iterator over the elements cached in the shard.
* @return
*/
@Override
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* returns the type of shard.
*/
@Override
public ShardType getShardType() {
return ShardType.LOCUS;
}
/**
* Locus shards don't make sense as unmapped regions. Always return false.
* @return False always.
*/
@Override
public boolean isUnmapped() {
return false;
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return dataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
} }
/** /**
@ -150,6 +38,6 @@ public class LocusShard implements BAMFormatAwareShard {
*/ */
@Override @Override
public String toString() { public String toString() {
return Utils.join(";",loci); return Utils.join(";",getGenomeLocs());
} }
} }

View File

@ -48,6 +48,11 @@ public class LocusShardStrategy implements ShardStrategy {
*/ */
private final SAMDataSource reads; private final SAMDataSource reads;
/**
* the parser for creating shards
*/
private GenomeLocParser genomeLocParser;
/** /**
* An iterator through the available file pointers. * An iterator through the available file pointers.
*/ */
@ -60,6 +65,8 @@ public class LocusShardStrategy implements ShardStrategy {
*/ */
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) { LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
this.reads = reads; this.reads = reads;
this.genomeLocParser = genomeLocParser;
if(!reads.isEmpty()) { if(!reads.isEmpty()) {
GenomeLocSortedSet intervals; GenomeLocSortedSet intervals;
if(locations == null) { if(locations == null) {
@ -124,7 +131,7 @@ public class LocusShardStrategy implements ShardStrategy {
public LocusShard next() { public LocusShard next() {
FilePointer nextFilePointer = filePointerIterator.next(); FilePointer nextFilePointer = filePointerIterator.next();
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null; Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding); return new LocusShard(genomeLocParser, reads,nextFilePointer.locations,fileSpansBounding);
} }
/** we don't support the remove command */ /** we don't support the remove command */

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.shards; package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadMetrics;
@ -13,81 +14,16 @@ import java.util.List;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class MonolithicShard implements Shard { public class MonolithicShard extends BAMFormatAwareShard {
/**
* Reads data, if applicable.
*/
private final SAMDataSource readsDataSource;
/**
* What type of MonolithicShard is this? Read or locus?
*/
private final ShardType shardType;
/**
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
*/
private final List<GenomeLoc> locs;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
/** /**
* Creates a new monolithic shard of the given type. * Creates a new monolithic shard of the given type.
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
* @param locs Intervals that this monolithic shard should process. * @param locs Intervals that this monolithic shard should process.
*/ */
public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) { public MonolithicShard(GenomeLocParser parser, SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
this.readsDataSource = readsDataSource; super(parser, shardType, locs, readsDataSource, null, false);
if(shardType != ShardType.LOCUS && shardType != ShardType.READ) if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType); throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType);
this.shardType = shardType;
this.locs = locs;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Returns null, indicating that (in this case) the entire genome is covered.
* @return null.
*/
public List<GenomeLoc> getGenomeLocs() {
return locs;
}
/**
* Reports the type of monolithic shard.
* @return Type of monolithic shard.
*/
@Override
public ShardType getShardType() {
return shardType;
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
} }
/** /**

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
@ -23,8 +24,8 @@ public class MonolithicShardStrategy implements ShardStrategy {
* Create a new shard strategy for shards of the given type. * Create a new shard strategy for shards of the given type.
* @param shardType The shard type. * @param shardType The shard type.
*/ */
public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) { public MonolithicShardStrategy(final GenomeLocParser parser, final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
shard = new MonolithicShard(readsDataSource,shardType,region); shard = new MonolithicShard(parser,readsDataSource,shardType,region);
} }
/** /**

View File

@ -13,6 +13,7 @@ import java.util.*;
import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMFileSpan;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
/** /**
* *
@ -36,80 +37,21 @@ import net.sf.picard.filter.SamRecordFilter;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class ReadShard implements BAMFormatAwareShard { public class ReadShard extends BAMFormatAwareShard {
private final SAMDataSource readsDataSource;
/**
* The data backing the next chunks to deliver to the traversal engine.
*/
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
/** /**
* The reads making up this shard. * The reads making up this shard.
*/ */
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS); private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
/** public ReadShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
* currently our location super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
*/
private final List<GenomeLoc> loci;
/**
* Whether the current location is unmapped.
*/
private final boolean isUnmapped;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
private final ReadMetrics readMetrics = new ReadMetrics();
public ReadShard(SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
this.readsDataSource = readsDataSource;
this.fileSpans = fileSpans;
this.loci = loci;
this.isUnmapped = isUnmapped;
} }
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard.
*/
@Override
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
return Collections.unmodifiableMap(fileSpans);
}
/** @return the genome location represented by this shard */
@Override
public List<GenomeLoc> getGenomeLocs() {
return loci;
}
/**
* Whether this shard points to an unmapped region.
* @return True if this shard is unmapped. False otherwise.
*/
@Override
public boolean isUnmapped() {
return isUnmapped;
}
/** /**
* Returns true if this shard is meant to buffer reads, rather * Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations. * than just holding pointers to their locations.
* @return True if this shard can buffer reads. False otherwise. * @return True if this shard can buffer reads. False otherwise.
*/ */
@Override
public boolean buffersReads() { public boolean buffersReads() {
return true; return true;
} }
@ -118,7 +60,6 @@ public class ReadShard implements BAMFormatAwareShard {
* Returns true if the read buffer is currently full. * Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads). * @return True if this shard's buffer is full (and the shard can buffer reads).
*/ */
@Override
public boolean isBufferEmpty() { public boolean isBufferEmpty() {
return reads.size() == 0; return reads.size() == 0;
} }
@ -127,7 +68,6 @@ public class ReadShard implements BAMFormatAwareShard {
* Returns true if the read buffer is currently full. * Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads). * @return True if this shard's buffer is full (and the shard can buffer reads).
*/ */
@Override
public boolean isBufferFull() { public boolean isBufferFull() {
return reads.size() > ReadShardStrategy.MAX_READS; return reads.size() > ReadShardStrategy.MAX_READS;
} }
@ -136,7 +76,6 @@ public class ReadShard implements BAMFormatAwareShard {
* Adds a read to the read buffer. * Adds a read to the read buffer.
* @param read Add a read to the internal shard buffer. * @param read Add a read to the internal shard buffer.
*/ */
@Override
public void addRead(SAMRecord read) { public void addRead(SAMRecord read) {
// DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another // DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another
// read or two into the buffer. // read or two into the buffer.
@ -147,39 +86,10 @@ public class ReadShard implements BAMFormatAwareShard {
* Creates an iterator over reads stored in this shard's read cache. * Creates an iterator over reads stored in this shard's read cache.
* @return * @return
*/ */
@Override
public StingSAMIterator iterator() { public StingSAMIterator iterator() {
return StingSAMIteratorAdapter.adapt(reads.iterator()); return StingSAMIteratorAdapter.adapt(reads.iterator());
} }
/**
* what kind of shard do we return
*
* @return ShardType, indicating the type
*/
@Override
public ShardType getShardType() {
return ShardType.READ;
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
@Override
public ReadMetrics getReadMetrics() {
return readMetrics;
}
/** /**
* String representation of this shard. * String representation of this shard.
* @return A string representation of the boundaries of this shard. * @return A string representation of the boundaries of this shard.
@ -187,7 +97,7 @@ public class ReadShard implements BAMFormatAwareShard {
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for(Map.Entry<SAMReaderID,SAMFileSpan> entry: fileSpans.entrySet()) { for(Map.Entry<SAMReaderID,SAMFileSpan> entry: getFileSpans().entrySet()) {
sb.append(entry.getKey()); sb.append(entry.getKey());
sb.append(": "); sb.append(": ");
sb.append(entry.getValue()); sb.append(entry.getValue());
@ -195,6 +105,4 @@ public class ReadShard implements BAMFormatAwareShard {
} }
return sb.toString(); return sb.toString();
} }
} }

View File

@ -31,6 +31,7 @@ import java.util.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
/** /**
@ -84,13 +85,16 @@ public class ReadShardStrategy implements ShardStrategy {
*/ */
private boolean isIntoUnmappedRegion = false; private boolean isIntoUnmappedRegion = false;
private final GenomeLocParser parser;
/** /**
* Create a new read shard strategy, loading read shards from the given BAM file. * Create a new read shard strategy, loading read shards from the given BAM file.
* @param dataSource Data source from which to load shards. * @param dataSource Data source from which to load shards.
* @param locations intervals to use for sharding. * @param locations intervals to use for sharding.
*/ */
public ReadShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) { public ReadShardStrategy(GenomeLocParser parser, SAMDataSource dataSource, GenomeLocSortedSet locations) {
this.dataSource = dataSource; this.dataSource = dataSource;
this.parser = parser;
this.position = this.dataSource.getCurrentPosition(); this.position = this.dataSource.getCurrentPosition();
this.locations = locations; this.locations = locations;
@ -155,7 +159,7 @@ public class ReadShardStrategy implements ShardStrategy {
} }
if(selectedReaders.size() > 0) { if(selectedReaders.size() > 0) {
BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
dataSource.fillShard(shard); dataSource.fillShard(shard);
if(!shard.isBufferEmpty()) { if(!shard.isBufferEmpty()) {
@ -169,7 +173,9 @@ public class ReadShardStrategy implements ShardStrategy {
} }
} }
else { else {
BAMFormatAwareShard shard = new ReadShard(dataSource,position,null,false); // todo -- this nulling of intervals is a bit annoying since readwalkers without
// todo -- any -L values need to be special cased throughout the code.
BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false);
dataSource.fillShard(shard); dataSource.fillShard(shard);
nextShard = !shard.isBufferEmpty() ? shard : null; nextShard = !shard.isBufferEmpty() ? shard : null;
} }

View File

@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
@ -29,41 +32,101 @@ import java.util.List;
* <p/> * <p/>
* Interface Shard * Interface Shard
* <p/> * <p/>
* The base interface for shards. * The base abstract class for shards.
*/ */
public interface Shard extends Serializable { public abstract class Shard implements HasGenomeLocation {
enum ShardType { public enum ShardType {
READ, LOCUS READ, LOCUS
} }
protected final GenomeLocParser parser; // incredibly annoying!
/**
* What type of MonolithicShard is this? Read or locus?
*/
protected final ShardType shardType;
/**
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
*/
protected final List<GenomeLoc> locs;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
protected final ReadMetrics readMetrics = new ReadMetrics();
public Shard(GenomeLocParser parser, ShardType shardType, List<GenomeLoc> locs) {
this.locs = locs;
this.parser = parser;
this.shardType = shardType;
}
/** /**
* If isUnmapped is true, than getGenomeLocs by * If isUnmapped is true, than getGenomeLocs by
* definition will return a singleton list with a GenomeLoc.UNMAPPED * definition will return a singleton list with a GenomeLoc.UNMAPPED
* *
* Can return null, indicating that the entire genome is covered.
*
* @return the genome location represented by this shard * @return the genome location represented by this shard
*/ */
public List<GenomeLoc> getGenomeLocs(); public List<GenomeLoc> getGenomeLocs() {
return locs;
}
/**
* Returns the span of the genomeLocs comprising this shard
* @param
* @return
*/
public GenomeLoc getLocation() {
if ( getGenomeLocs() == null )
return GenomeLoc.WHOLE_GENOME;
int start = Integer.MAX_VALUE;
int stop = Integer.MIN_VALUE;
String contig = null;
for ( GenomeLoc loc : getGenomeLocs() ) {
if ( GenomeLoc.isUnmapped(loc) )
// special case the unmapped region marker, just abort out
return loc;
contig = loc.getContig();
if ( loc.getStart() < start ) start = loc.getStart();
if ( loc.getStop() > stop ) stop = loc.getStop();
}
return parser.createGenomeLoc(contig, start, stop);
}
/** /**
* what kind of shard do we return * what kind of shard do we return
* @return ShardType, indicating the type * @return ShardType, indicating the type
*/ */
public ShardType getShardType(); public ShardType getShardType() {
return shardType;
}
/** /**
* Does any releasing / aggregation required when the shard is through being processed. * Does any releasing / aggregation required when the shard is through being processed.
*/ */
public void close(); public void close() {
; // by default don't do anything
}
/** /**
* Gets required configuration for validating and filtering reads. * Gets required configuration for validating and filtering reads.
* @return read configuration properties. * @return read configuration properties.
*/ */
public ReadProperties getReadProperties(); public abstract ReadProperties getReadProperties();
/** /**
* Gets the runtime metrics associated with this shard. * Gets the runtime metrics associated with this shard.
* @return metrics and read counts. * Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/ */
public ReadMetrics getReadMetrics(); public ReadMetrics getReadMetrics() {
return readMetrics;
}
} }

View File

@ -70,7 +70,7 @@ public class ShardStrategyFactory {
case LOCUS_EXPERIMENTAL: case LOCUS_EXPERIMENTAL:
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null); return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
case READS_EXPERIMENTAL: case READS_EXPERIMENTAL:
return new ReadShardStrategy(readsDataSource,null); return new ReadShardStrategy(genomeLocParser,readsDataSource,null);
default: default:
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request"); throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
} }
@ -108,7 +108,7 @@ public class ShardStrategyFactory {
case LOCUS_EXPERIMENTAL: case LOCUS_EXPERIMENTAL:
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst); return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
case READS_EXPERIMENTAL: case READS_EXPERIMENTAL:
return new ReadShardStrategy(readsDataSource,lst); return new ReadShardStrategy(genomeLocParser, readsDataSource,lst);
default: default:
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented"); throw new ReviewedStingException("Strategy: " + strat + " isn't implemented");
} }