From cacdac391486f7ee4a3b9f5bcc4730ac310721b8 Mon Sep 17 00:00:00 2001 From: depristo Date: Thu, 20 Jan 2011 12:36:56 +0000 Subject: [PATCH] Major refactoring of shards. No longer uses interfaces but is now an actual object hierarchy with most of the important and common functionality pushed up to base classes. Eliminated a lot of duplicated code, and the shards are much more understandable now. Also now require a GenomeLocParser to work with their own GenomeLocs. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5030 348d0f76-0448-11de-a6fe-93d51630548a --- .../shards/BAMFormatAwareShard.java | 85 +++++++++--- .../gatk/datasources/shards/LocusShard.java | 122 +----------------- .../shards/LocusShardStrategy.java | 9 +- .../datasources/shards/MonolithicShard.java | 72 +---------- .../shards/MonolithicShardStrategy.java | 5 +- .../gatk/datasources/shards/ReadShard.java | 102 +-------------- .../datasources/shards/ReadShardStrategy.java | 12 +- .../sting/gatk/datasources/shards/Shard.java | 81 ++++++++++-- .../shards/ShardStrategyFactory.java | 4 +- 9 files changed, 177 insertions(+), 315 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java index be46d7094..624f7e949 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java @@ -3,11 +3,16 @@ package org.broadinstitute.sting.gatk.datasources.shards; import net.sf.samtools.*; import net.sf.picard.filter.SamRecordFilter; +import java.util.Collections; import java.util.List; import java.util.Map; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * A common interface for shards that natively understand the BAM format. @@ -15,44 +20,90 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; * @author mhanna * @version 0.1 */ -public interface BAMFormatAwareShard extends Shard { +public abstract class BAMFormatAwareShard extends Shard { + /** + * Whether the current location is unmapped. + */ + private final boolean isUnmapped; + + /** + * Reads data, if applicable. + */ + private final SAMDataSource readsDataSource; + + /** + * The data backing the next chunks to deliver to the traversal engine. + */ + private final Map fileSpans; + + public BAMFormatAwareShard(GenomeLocParser parser, + ShardType shardType, + List locs, + SAMDataSource readsDataSource, + Map fileSpans, + boolean isUnmapped) { + super(parser, shardType, locs); + this.readsDataSource = readsDataSource; + this.fileSpans = fileSpans; + this.isUnmapped = isUnmapped; + } + + /** + * Closes the shard, tallying and incorporating read data. + */ + @Override + public void close() { + readsDataSource.incorporateReadMetrics(readMetrics); + } + /** * Get the list of chunks delimiting this shard. * @return a list of chunks that contain data for this shard. */ - public Map getFileSpans(); + public Map getFileSpans() { + return Collections.unmodifiableMap(fileSpans); + } + + /** + * Gets key read validation and filtering properties. + * @return set of read properties associated with this shard. + */ + @Override + public ReadProperties getReadProperties() { + return readsDataSource.getReadsInfo(); + } /** * Returns true if this shard is meant to buffer reads, rather * than just holding pointers to their locations. * @return True if this shard can buffer reads. False otherwise. */ - public boolean buffersReads(); - - /** - * Checks to see whether the buffer is empty. - * @return True if the buffer is empty. - */ - public boolean isBufferEmpty(); + public boolean buffersReads() { return false; } /** * Returns true if the read buffer is currently full. * @return True if this shard's buffer is full (and the shard can buffer reads). */ - public boolean isBufferFull(); + public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); } + + /** + * Returns true if the read buffer is currently full. + * @return True if this shard's buffer is full (and the shard can buffer reads). + */ + public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); } /** * Adds a read to the read buffer. * @param read Add a read to the internal shard buffer. */ - public void addRead(SAMRecord read); + public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); } /** - * Assuming this iterator buffers reads, an iterator to the reads - * stored in the shard. - * @return An iterator over the reads stored in the shard. + * Gets the iterator over the elements cached in the shard. + * @return */ - public StingSAMIterator iterator(); + public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); } + /** * Whether this shard points to an unmapped region. @@ -60,5 +111,7 @@ public interface BAMFormatAwareShard extends Shard { * this case, isUnmapped should always return false. * @return True if this shard is unmapped. False otherwise. */ - public boolean isUnmapped(); + public boolean isUnmapped() { + return isUnmapped; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java index 55bd5be70..dc38e252f 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; @@ -21,127 +22,14 @@ import net.sf.picard.filter.SamRecordFilter; * @version 1.0 * @date Apr 7, 2009 */ -public class LocusShard implements BAMFormatAwareShard { - /** - * Source for read data. - */ - private SAMDataSource dataSource; - - /** - * A list of the chunks associated with this shard. - */ - private final Map fileSpans; - - // currently our location - private final List loci; - - /** - * Statistics about which reads in this shards were used and which were filtered away. - */ - private final ReadMetrics readMetrics = new ReadMetrics(); - +public class LocusShard extends BAMFormatAwareShard { /** * Create a new locus shard, divided by index. * @param intervals List of intervals to process. * @param fileSpans File spans associated with that interval. */ - public LocusShard(SAMDataSource dataSource, List intervals, Map fileSpans) { - this.dataSource = dataSource; - this.loci = intervals; - this.fileSpans = fileSpans; - } - - /** - * Closes the shard, tallying and incorporating read data. - */ - @Override - public void close() { - dataSource.incorporateReadMetrics(readMetrics); - } - - /** - * Gets the file spans associated with this locus shard. - * @return A list of the file spans to use when retrieving locus data. - */ - @Override - public Map getFileSpans() { - return fileSpans; - } - - /** @return the genome location represented by this shard */ - public List getGenomeLocs() { - return loci; - } - - /** - * Returns true if this shard is meant to buffer reads, rather - * than just holding pointers to their locations. - * @return True if this shard can buffer reads. False otherwise. - */ - @Override - public boolean buffersReads() { return false; } - - /** - * Returns true if the read buffer is currently full. - * @return True if this shard's buffer is full (and the shard can buffer reads). - */ - @Override - public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Returns true if the read buffer is currently full. - * @return True if this shard's buffer is full (and the shard can buffer reads). - */ - @Override - public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Adds a read to the read buffer. - * @param read Add a read to the internal shard buffer. - */ - @Override - public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Gets the iterator over the elements cached in the shard. - * @return - */ - @Override - public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * returns the type of shard. - */ - @Override - public ShardType getShardType() { - return ShardType.LOCUS; - } - - /** - * Locus shards don't make sense as unmapped regions. Always return false. - * @return False always. - */ - @Override - public boolean isUnmapped() { - return false; - } - - /** - * Gets key read validation and filtering properties. - * @return set of read properties associated with this shard. - */ - @Override - public ReadProperties getReadProperties() { - return dataSource.getReadsInfo(); - } - - /** - * Retrieves a storage space of metrics about number of reads included, filtered, etc. - * @return Storage space for metrics. - */ - @Override - public ReadMetrics getReadMetrics() { - return readMetrics; + public LocusShard(GenomeLocParser parser, SAMDataSource dataSource, List intervals, Map fileSpans) { + super(parser, ShardType.LOCUS, intervals, dataSource, fileSpans, false); } /** @@ -150,6 +38,6 @@ public class LocusShard implements BAMFormatAwareShard { */ @Override public String toString() { - return Utils.join(";",loci); + return Utils.join(";",getGenomeLocs()); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java index 3313b992e..b87b351b0 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java @@ -48,6 +48,11 @@ public class LocusShardStrategy implements ShardStrategy { */ private final SAMDataSource reads; + /** + * the parser for creating shards + */ + private GenomeLocParser genomeLocParser; + /** * An iterator through the available file pointers. */ @@ -60,6 +65,8 @@ public class LocusShardStrategy implements ShardStrategy { */ LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) { this.reads = reads; + this.genomeLocParser = genomeLocParser; + if(!reads.isEmpty()) { GenomeLocSortedSet intervals; if(locations == null) { @@ -124,7 +131,7 @@ public class LocusShardStrategy implements ShardStrategy { public LocusShard next() { FilePointer nextFilePointer = filePointerIterator.next(); Map fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null; - return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding); + return new LocusShard(genomeLocParser, reads,nextFilePointer.locations,fileSpansBounding); } /** we don't support the remove command */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java index 98df7b7b1..6b24290c4 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.shards; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadMetrics; @@ -13,81 +14,16 @@ import java.util.List; * @author mhanna * @version 0.1 */ -public class MonolithicShard implements Shard { - /** - * Reads data, if applicable. - */ - private final SAMDataSource readsDataSource; - - /** - * What type of MonolithicShard is this? Read or locus? - */ - private final ShardType shardType; - - /** - * Locations. For the monolithic shard, should be a list of all available contigs in the reference. - */ - private final List locs; - - /** - * Statistics about which reads in this shards were used and which were filtered away. - */ - private final ReadMetrics readMetrics = new ReadMetrics(); - +public class MonolithicShard extends BAMFormatAwareShard { /** * Creates a new monolithic shard of the given type. * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. * @param locs Intervals that this monolithic shard should process. */ - public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List locs) { - this.readsDataSource = readsDataSource; + public MonolithicShard(GenomeLocParser parser, SAMDataSource readsDataSource, ShardType shardType, List locs) { + super(parser, shardType, locs, readsDataSource, null, false); if(shardType != ShardType.LOCUS && shardType != ShardType.READ) throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType); - this.shardType = shardType; - this.locs = locs; - } - - /** - * Closes the shard, tallying and incorporating read data. - */ - @Override - public void close() { - readsDataSource.incorporateReadMetrics(readMetrics); - } - - /** - * Returns null, indicating that (in this case) the entire genome is covered. - * @return null. - */ - public List getGenomeLocs() { - return locs; - } - - /** - * Reports the type of monolithic shard. - * @return Type of monolithic shard. - */ - @Override - public ShardType getShardType() { - return shardType; - } - - /** - * Gets key read validation and filtering properties. - * @return set of read properties associated with this shard. - */ - @Override - public ReadProperties getReadProperties() { - return readsDataSource.getReadsInfo(); - } - - /** - * Retrieves a storage space of metrics about number of reads included, filtered, etc. - * @return Storage space for metrics. - */ - @Override - public ReadMetrics getReadMetrics() { - return readMetrics; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java index 010a6d815..d9df841e4 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; import java.util.NoSuchElementException; @@ -23,8 +24,8 @@ public class MonolithicShardStrategy implements ShardStrategy { * Create a new shard strategy for shards of the given type. * @param shardType The shard type. */ - public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List region) { - shard = new MonolithicShard(readsDataSource,shardType,region); + public MonolithicShardStrategy(final GenomeLocParser parser, final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List region) { + shard = new MonolithicShard(parser,readsDataSource,shardType,region); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java index a6dacfa48..8c43f38da 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java @@ -13,6 +13,7 @@ import java.util.*; import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; import net.sf.picard.filter.SamRecordFilter; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * @@ -36,80 +37,21 @@ import net.sf.picard.filter.SamRecordFilter; * @author mhanna * @version 0.1 */ -public class ReadShard implements BAMFormatAwareShard { - private final SAMDataSource readsDataSource; - - /** - * The data backing the next chunks to deliver to the traversal engine. - */ - private final Map fileSpans; - +public class ReadShard extends BAMFormatAwareShard { /** * The reads making up this shard. */ private final Collection reads = new ArrayList(ReadShardStrategy.MAX_READS); - /** - * currently our location - */ - private final List loci; - - /** - * Whether the current location is unmapped. - */ - private final boolean isUnmapped; - - /** - * Statistics about which reads in this shards were used and which were filtered away. - */ - private final ReadMetrics readMetrics = new ReadMetrics(); - - public ReadShard(SAMDataSource readsDataSource, Map fileSpans, List loci, boolean isUnmapped) { - this.readsDataSource = readsDataSource; - this.fileSpans = fileSpans; - this.loci = loci; - this.isUnmapped = isUnmapped; + public ReadShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map fileSpans, List loci, boolean isUnmapped) { + super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped); } - /** - * Closes the shard, tallying and incorporating read data. - */ - @Override - public void close() { - readsDataSource.incorporateReadMetrics(readMetrics); - } - - /** - * Get the list of chunks delimiting this shard. - * @return a list of chunks that contain data for this shard. - */ - @Override - public Map getFileSpans() { - return Collections.unmodifiableMap(fileSpans); - } - - /** @return the genome location represented by this shard */ - @Override - public List getGenomeLocs() { - return loci; - } - - /** - * Whether this shard points to an unmapped region. - * @return True if this shard is unmapped. False otherwise. - */ - @Override - public boolean isUnmapped() { - return isUnmapped; - } - - /** * Returns true if this shard is meant to buffer reads, rather * than just holding pointers to their locations. * @return True if this shard can buffer reads. False otherwise. */ - @Override public boolean buffersReads() { return true; } @@ -118,7 +60,6 @@ public class ReadShard implements BAMFormatAwareShard { * Returns true if the read buffer is currently full. * @return True if this shard's buffer is full (and the shard can buffer reads). */ - @Override public boolean isBufferEmpty() { return reads.size() == 0; } @@ -127,7 +68,6 @@ public class ReadShard implements BAMFormatAwareShard { * Returns true if the read buffer is currently full. * @return True if this shard's buffer is full (and the shard can buffer reads). */ - @Override public boolean isBufferFull() { return reads.size() > ReadShardStrategy.MAX_READS; } @@ -136,7 +76,6 @@ public class ReadShard implements BAMFormatAwareShard { * Adds a read to the read buffer. * @param read Add a read to the internal shard buffer. */ - @Override public void addRead(SAMRecord read) { // DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another // read or two into the buffer. @@ -147,39 +86,10 @@ public class ReadShard implements BAMFormatAwareShard { * Creates an iterator over reads stored in this shard's read cache. * @return */ - @Override public StingSAMIterator iterator() { return StingSAMIteratorAdapter.adapt(reads.iterator()); } - /** - * what kind of shard do we return - * - * @return ShardType, indicating the type - */ - @Override - public ShardType getShardType() { - return ShardType.READ; - } - - /** - * Gets key read validation and filtering properties. - * @return set of read properties associated with this shard. - */ - @Override - public ReadProperties getReadProperties() { - return readsDataSource.getReadsInfo(); - } - - /** - * Retrieves a storage space of metrics about number of reads included, filtered, etc. - * @return Storage space for metrics. - */ - @Override - public ReadMetrics getReadMetrics() { - return readMetrics; - } - /** * String representation of this shard. * @return A string representation of the boundaries of this shard. @@ -187,7 +97,7 @@ public class ReadShard implements BAMFormatAwareShard { @Override public String toString() { StringBuilder sb = new StringBuilder(); - for(Map.Entry entry: fileSpans.entrySet()) { + for(Map.Entry entry: getFileSpans().entrySet()) { sb.append(entry.getKey()); sb.append(": "); sb.append(entry.getValue()); @@ -195,6 +105,4 @@ public class ReadShard implements BAMFormatAwareShard { } return sb.toString(); } - - } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java index 804439e23..6c229901e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java @@ -31,6 +31,7 @@ import java.util.*; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; /** @@ -84,13 +85,16 @@ public class ReadShardStrategy implements ShardStrategy { */ private boolean isIntoUnmappedRegion = false; + private final GenomeLocParser parser; + /** * Create a new read shard strategy, loading read shards from the given BAM file. * @param dataSource Data source from which to load shards. * @param locations intervals to use for sharding. */ - public ReadShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) { + public ReadShardStrategy(GenomeLocParser parser, SAMDataSource dataSource, GenomeLocSortedSet locations) { this.dataSource = dataSource; + this.parser = parser; this.position = this.dataSource.getCurrentPosition(); this.locations = locations; @@ -155,7 +159,7 @@ public class ReadShardStrategy implements ShardStrategy { } if(selectedReaders.size() > 0) { - BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); + BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); dataSource.fillShard(shard); if(!shard.isBufferEmpty()) { @@ -169,7 +173,9 @@ public class ReadShardStrategy implements ShardStrategy { } } else { - BAMFormatAwareShard shard = new ReadShard(dataSource,position,null,false); + // todo -- this nulling of intervals is a bit annoying since readwalkers without + // todo -- any -L values need to be special cased throughout the code. + BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false); dataSource.fillShard(shard); nextShard = !shard.isBufferEmpty() ? shard : null; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java index 061bece45..c4928103a 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java @@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.Serializable; import java.util.List; @@ -29,41 +32,101 @@ import java.util.List; *

* Interface Shard *

- * The base interface for shards. + * The base abstract class for shards. */ -public interface Shard extends Serializable { - enum ShardType { +public abstract class Shard implements HasGenomeLocation { + public enum ShardType { READ, LOCUS } + protected final GenomeLocParser parser; // incredibly annoying! + + /** + * What type of MonolithicShard is this? Read or locus? + */ + protected final ShardType shardType; + + /** + * Locations. For the monolithic shard, should be a list of all available contigs in the reference. + */ + protected final List locs; + + /** + * Statistics about which reads in this shards were used and which were filtered away. + */ + protected final ReadMetrics readMetrics = new ReadMetrics(); + + public Shard(GenomeLocParser parser, ShardType shardType, List locs) { + this.locs = locs; + this.parser = parser; + this.shardType = shardType; + } + /** * If isUnmapped is true, than getGenomeLocs by * definition will return a singleton list with a GenomeLoc.UNMAPPED * + * Can return null, indicating that the entire genome is covered. + * * @return the genome location represented by this shard */ - public List getGenomeLocs(); + public List getGenomeLocs() { + return locs; + } + + /** + * Returns the span of the genomeLocs comprising this shard + * @param + * @return + */ + public GenomeLoc getLocation() { + if ( getGenomeLocs() == null ) + return GenomeLoc.WHOLE_GENOME; + + int start = Integer.MAX_VALUE; + int stop = Integer.MIN_VALUE; + String contig = null; + + for ( GenomeLoc loc : getGenomeLocs() ) { + if ( GenomeLoc.isUnmapped(loc) ) + // special case the unmapped region marker, just abort out + return loc; + contig = loc.getContig(); + if ( loc.getStart() < start ) start = loc.getStart(); + if ( loc.getStop() > stop ) stop = loc.getStop(); + } + + return parser.createGenomeLoc(contig, start, stop); + } + /** * what kind of shard do we return * @return ShardType, indicating the type */ - public ShardType getShardType(); + public ShardType getShardType() { + return shardType; + } /** * Does any releasing / aggregation required when the shard is through being processed. */ - public void close(); + public void close() { + ; // by default don't do anything + } /** * Gets required configuration for validating and filtering reads. * @return read configuration properties. */ - public ReadProperties getReadProperties(); + public abstract ReadProperties getReadProperties(); /** * Gets the runtime metrics associated with this shard. - * @return metrics and read counts. + * Retrieves a storage space of metrics about number of reads included, filtered, etc. + * @return Storage space for metrics. */ - public ReadMetrics getReadMetrics(); + public ReadMetrics getReadMetrics() { + return readMetrics; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java index 9af08c22e..6777e69ee 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java @@ -70,7 +70,7 @@ public class ShardStrategyFactory { case LOCUS_EXPERIMENTAL: return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null); case READS_EXPERIMENTAL: - return new ReadShardStrategy(readsDataSource,null); + return new ReadShardStrategy(genomeLocParser,readsDataSource,null); default: throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request"); } @@ -108,7 +108,7 @@ public class ShardStrategyFactory { case LOCUS_EXPERIMENTAL: return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst); case READS_EXPERIMENTAL: - return new ReadShardStrategy(readsDataSource,lst); + return new ReadShardStrategy(genomeLocParser, readsDataSource,lst); default: throw new ReviewedStingException("Strategy: " + strat + " isn't implemented"); }