Major refactoring of shards. No longer uses interfaces but is now an actual object hierarchy with most of the important and common functionality pushed up to base classes. Eliminated a lot of duplicated code, and the shards are much more understandable now. Also now require a GenomeLocParser to work with their own GenomeLocs.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5030 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4d611e53e7
commit
cacdac3914
|
|
@ -3,11 +3,16 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import net.sf.samtools.*;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/**
|
||||
* A common interface for shards that natively understand the BAM format.
|
||||
|
|
@ -15,44 +20,90 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface BAMFormatAwareShard extends Shard {
|
||||
public abstract class BAMFormatAwareShard extends Shard {
|
||||
/**
|
||||
* Whether the current location is unmapped.
|
||||
*/
|
||||
private final boolean isUnmapped;
|
||||
|
||||
/**
|
||||
* Reads data, if applicable.
|
||||
*/
|
||||
private final SAMDataSource readsDataSource;
|
||||
|
||||
/**
|
||||
* The data backing the next chunks to deliver to the traversal engine.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
public BAMFormatAwareShard(GenomeLocParser parser,
|
||||
ShardType shardType,
|
||||
List<GenomeLoc> locs,
|
||||
SAMDataSource readsDataSource,
|
||||
Map<SAMReaderID,SAMFileSpan> fileSpans,
|
||||
boolean isUnmapped) {
|
||||
super(parser, shardType, locs);
|
||||
this.readsDataSource = readsDataSource;
|
||||
this.fileSpans = fileSpans;
|
||||
this.isUnmapped = isUnmapped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the shard, tallying and incorporating read data.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of chunks delimiting this shard.
|
||||
* @return a list of chunks that contain data for this shard.
|
||||
*/
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans();
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||
return Collections.unmodifiableMap(fileSpans);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
@Override
|
||||
public ReadProperties getReadProperties() {
|
||||
return readsDataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
* @return True if this shard can buffer reads. False otherwise.
|
||||
*/
|
||||
public boolean buffersReads();
|
||||
|
||||
/**
|
||||
* Checks to see whether the buffer is empty.
|
||||
* @return True if the buffer is empty.
|
||||
*/
|
||||
public boolean isBufferEmpty();
|
||||
public boolean buffersReads() { return false; }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferFull();
|
||||
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Adds a read to the read buffer.
|
||||
* @param read Add a read to the internal shard buffer.
|
||||
*/
|
||||
public void addRead(SAMRecord read);
|
||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Assuming this iterator buffers reads, an iterator to the reads
|
||||
* stored in the shard.
|
||||
* @return An iterator over the reads stored in the shard.
|
||||
* Gets the iterator over the elements cached in the shard.
|
||||
* @return
|
||||
*/
|
||||
public StingSAMIterator iterator();
|
||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
|
||||
/**
|
||||
* Whether this shard points to an unmapped region.
|
||||
|
|
@ -60,5 +111,7 @@ public interface BAMFormatAwareShard extends Shard {
|
|||
* this case, isUnmapped should always return false.
|
||||
* @return True if this shard is unmapped. False otherwise.
|
||||
*/
|
||||
public boolean isUnmapped();
|
||||
public boolean isUnmapped() {
|
||||
return isUnmapped;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
|
|
@ -21,127 +22,14 @@ import net.sf.picard.filter.SamRecordFilter;
|
|||
* @version 1.0
|
||||
* @date Apr 7, 2009
|
||||
*/
|
||||
public class LocusShard implements BAMFormatAwareShard {
|
||||
/**
|
||||
* Source for read data.
|
||||
*/
|
||||
private SAMDataSource dataSource;
|
||||
|
||||
/**
|
||||
* A list of the chunks associated with this shard.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
// currently our location
|
||||
private final List<GenomeLoc> loci;
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
public class LocusShard extends BAMFormatAwareShard {
|
||||
/**
|
||||
* Create a new locus shard, divided by index.
|
||||
* @param intervals List of intervals to process.
|
||||
* @param fileSpans File spans associated with that interval.
|
||||
*/
|
||||
public LocusShard(SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
|
||||
this.dataSource = dataSource;
|
||||
this.loci = intervals;
|
||||
this.fileSpans = fileSpans;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the shard, tallying and incorporating read data.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
dataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file spans associated with this locus shard.
|
||||
* @return A list of the file spans to use when retrieving locus data.
|
||||
*/
|
||||
@Override
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||
return fileSpans;
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return loci;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
* @return True if this shard can buffer reads. False otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean buffersReads() { return false; }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
@Override
|
||||
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
@Override
|
||||
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Adds a read to the read buffer.
|
||||
* @param read Add a read to the internal shard buffer.
|
||||
*/
|
||||
@Override
|
||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Gets the iterator over the elements cached in the shard.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* returns the type of shard.
|
||||
*/
|
||||
@Override
|
||||
public ShardType getShardType() {
|
||||
return ShardType.LOCUS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Locus shards don't make sense as unmapped regions. Always return false.
|
||||
* @return False always.
|
||||
*/
|
||||
@Override
|
||||
public boolean isUnmapped() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
@Override
|
||||
public ReadProperties getReadProperties() {
|
||||
return dataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
@Override
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
public LocusShard(GenomeLocParser parser, SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
|
||||
super(parser, ShardType.LOCUS, intervals, dataSource, fileSpans, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -150,6 +38,6 @@ public class LocusShard implements BAMFormatAwareShard {
|
|||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return Utils.join(";",loci);
|
||||
return Utils.join(";",getGenomeLocs());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,11 @@ public class LocusShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
private final SAMDataSource reads;
|
||||
|
||||
/**
|
||||
* the parser for creating shards
|
||||
*/
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* An iterator through the available file pointers.
|
||||
*/
|
||||
|
|
@ -60,6 +65,8 @@ public class LocusShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
|
||||
this.reads = reads;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
if(!reads.isEmpty()) {
|
||||
GenomeLocSortedSet intervals;
|
||||
if(locations == null) {
|
||||
|
|
@ -124,7 +131,7 @@ public class LocusShardStrategy implements ShardStrategy {
|
|||
public LocusShard next() {
|
||||
FilePointer nextFilePointer = filePointerIterator.next();
|
||||
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
|
||||
return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding);
|
||||
return new LocusShard(genomeLocParser, reads,nextFilePointer.locations,fileSpansBounding);
|
||||
}
|
||||
|
||||
/** we don't support the remove command */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
|
|
@ -13,81 +14,16 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class MonolithicShard implements Shard {
|
||||
/**
|
||||
* Reads data, if applicable.
|
||||
*/
|
||||
private final SAMDataSource readsDataSource;
|
||||
|
||||
/**
|
||||
* What type of MonolithicShard is this? Read or locus?
|
||||
*/
|
||||
private final ShardType shardType;
|
||||
|
||||
/**
|
||||
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
|
||||
*/
|
||||
private final List<GenomeLoc> locs;
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
public class MonolithicShard extends BAMFormatAwareShard {
|
||||
/**
|
||||
* Creates a new monolithic shard of the given type.
|
||||
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
|
||||
* @param locs Intervals that this monolithic shard should process.
|
||||
*/
|
||||
public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
|
||||
this.readsDataSource = readsDataSource;
|
||||
public MonolithicShard(GenomeLocParser parser, SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
|
||||
super(parser, shardType, locs, readsDataSource, null, false);
|
||||
if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
|
||||
throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType);
|
||||
this.shardType = shardType;
|
||||
this.locs = locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the shard, tallying and incorporating read data.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns null, indicating that (in this case) the entire genome is covered.
|
||||
* @return null.
|
||||
*/
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports the type of monolithic shard.
|
||||
* @return Type of monolithic shard.
|
||||
*/
|
||||
@Override
|
||||
public ShardType getShardType() {
|
||||
return shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
@Override
|
||||
public ReadProperties getReadProperties() {
|
||||
return readsDataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
@Override
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
|
@ -23,8 +24,8 @@ public class MonolithicShardStrategy implements ShardStrategy {
|
|||
* Create a new shard strategy for shards of the given type.
|
||||
* @param shardType The shard type.
|
||||
*/
|
||||
public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
|
||||
shard = new MonolithicShard(readsDataSource,shardType,region);
|
||||
public MonolithicShardStrategy(final GenomeLocParser parser, final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
|
||||
shard = new MonolithicShard(parser,readsDataSource,shardType,region);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import java.util.*;
|
|||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -36,80 +37,21 @@ import net.sf.picard.filter.SamRecordFilter;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReadShard implements BAMFormatAwareShard {
|
||||
private final SAMDataSource readsDataSource;
|
||||
|
||||
/**
|
||||
* The data backing the next chunks to deliver to the traversal engine.
|
||||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
public class ReadShard extends BAMFormatAwareShard {
|
||||
/**
|
||||
* The reads making up this shard.
|
||||
*/
|
||||
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
|
||||
|
||||
/**
|
||||
* currently our location
|
||||
*/
|
||||
private final List<GenomeLoc> loci;
|
||||
|
||||
/**
|
||||
* Whether the current location is unmapped.
|
||||
*/
|
||||
private final boolean isUnmapped;
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
public ReadShard(SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||
this.readsDataSource = readsDataSource;
|
||||
this.fileSpans = fileSpans;
|
||||
this.loci = loci;
|
||||
this.isUnmapped = isUnmapped;
|
||||
public ReadShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||
super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the shard, tallying and incorporating read data.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of chunks delimiting this shard.
|
||||
* @return a list of chunks that contain data for this shard.
|
||||
*/
|
||||
@Override
|
||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||
return Collections.unmodifiableMap(fileSpans);
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
@Override
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return loci;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this shard points to an unmapped region.
|
||||
* @return True if this shard is unmapped. False otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean isUnmapped() {
|
||||
return isUnmapped;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
* @return True if this shard can buffer reads. False otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean buffersReads() {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -118,7 +60,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
@Override
|
||||
public boolean isBufferEmpty() {
|
||||
return reads.size() == 0;
|
||||
}
|
||||
|
|
@ -127,7 +68,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
* Returns true if the read buffer is currently full.
|
||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||
*/
|
||||
@Override
|
||||
public boolean isBufferFull() {
|
||||
return reads.size() > ReadShardStrategy.MAX_READS;
|
||||
}
|
||||
|
|
@ -136,7 +76,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
* Adds a read to the read buffer.
|
||||
* @param read Add a read to the internal shard buffer.
|
||||
*/
|
||||
@Override
|
||||
public void addRead(SAMRecord read) {
|
||||
// DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another
|
||||
// read or two into the buffer.
|
||||
|
|
@ -147,39 +86,10 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
* Creates an iterator over reads stored in this shard's read cache.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator iterator() {
|
||||
return StingSAMIteratorAdapter.adapt(reads.iterator());
|
||||
}
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
*
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
@Override
|
||||
public ShardType getShardType() {
|
||||
return ShardType.READ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets key read validation and filtering properties.
|
||||
* @return set of read properties associated with this shard.
|
||||
*/
|
||||
@Override
|
||||
public ReadProperties getReadProperties() {
|
||||
return readsDataSource.getReadsInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
@Override
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* String representation of this shard.
|
||||
* @return A string representation of the boundaries of this shard.
|
||||
|
|
@ -187,7 +97,7 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(Map.Entry<SAMReaderID,SAMFileSpan> entry: fileSpans.entrySet()) {
|
||||
for(Map.Entry<SAMReaderID,SAMFileSpan> entry: getFileSpans().entrySet()) {
|
||||
sb.append(entry.getKey());
|
||||
sb.append(": ");
|
||||
sb.append(entry.getValue());
|
||||
|
|
@ -195,6 +105,4 @@ public class ReadShard implements BAMFormatAwareShard {
|
|||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import java.util.*;
|
|||
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
/**
|
||||
|
|
@ -84,13 +85,16 @@ public class ReadShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
private boolean isIntoUnmappedRegion = false;
|
||||
|
||||
private final GenomeLocParser parser;
|
||||
|
||||
/**
|
||||
* Create a new read shard strategy, loading read shards from the given BAM file.
|
||||
* @param dataSource Data source from which to load shards.
|
||||
* @param locations intervals to use for sharding.
|
||||
*/
|
||||
public ReadShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||
public ReadShardStrategy(GenomeLocParser parser, SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||
this.dataSource = dataSource;
|
||||
this.parser = parser;
|
||||
this.position = this.dataSource.getCurrentPosition();
|
||||
this.locations = locations;
|
||||
|
||||
|
|
@ -155,7 +159,7 @@ public class ReadShardStrategy implements ShardStrategy {
|
|||
}
|
||||
|
||||
if(selectedReaders.size() > 0) {
|
||||
BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
dataSource.fillShard(shard);
|
||||
|
||||
if(!shard.isBufferEmpty()) {
|
||||
|
|
@ -169,7 +173,9 @@ public class ReadShardStrategy implements ShardStrategy {
|
|||
}
|
||||
}
|
||||
else {
|
||||
BAMFormatAwareShard shard = new ReadShard(dataSource,position,null,false);
|
||||
// todo -- this nulling of intervals is a bit annoying since readwalkers without
|
||||
// todo -- any -L values need to be special cased throughout the code.
|
||||
BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false);
|
||||
dataSource.fillShard(shard);
|
||||
nextShard = !shard.isBufferEmpty() ? shard : null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
|
@ -29,41 +32,101 @@ import java.util.List;
|
|||
* <p/>
|
||||
* Interface Shard
|
||||
* <p/>
|
||||
* The base interface for shards.
|
||||
* The base abstract class for shards.
|
||||
*/
|
||||
public interface Shard extends Serializable {
|
||||
enum ShardType {
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
||||
/**
|
||||
* What type of MonolithicShard is this? Read or locus?
|
||||
*/
|
||||
protected final ShardType shardType;
|
||||
|
||||
/**
|
||||
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
|
||||
*/
|
||||
protected final List<GenomeLoc> locs;
|
||||
|
||||
/**
|
||||
* Statistics about which reads in this shards were used and which were filtered away.
|
||||
*/
|
||||
protected final ReadMetrics readMetrics = new ReadMetrics();
|
||||
|
||||
public Shard(GenomeLocParser parser, ShardType shardType, List<GenomeLoc> locs) {
|
||||
this.locs = locs;
|
||||
this.parser = parser;
|
||||
this.shardType = shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* If isUnmapped is true, than getGenomeLocs by
|
||||
* definition will return a singleton list with a GenomeLoc.UNMAPPED
|
||||
*
|
||||
* Can return null, indicating that the entire genome is covered.
|
||||
*
|
||||
* @return the genome location represented by this shard
|
||||
*/
|
||||
public List<GenomeLoc> getGenomeLocs();
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the span of the genomeLocs comprising this shard
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
if ( getGenomeLocs() == null )
|
||||
return GenomeLoc.WHOLE_GENOME;
|
||||
|
||||
int start = Integer.MAX_VALUE;
|
||||
int stop = Integer.MIN_VALUE;
|
||||
String contig = null;
|
||||
|
||||
for ( GenomeLoc loc : getGenomeLocs() ) {
|
||||
if ( GenomeLoc.isUnmapped(loc) )
|
||||
// special case the unmapped region marker, just abort out
|
||||
return loc;
|
||||
contig = loc.getContig();
|
||||
if ( loc.getStart() < start ) start = loc.getStart();
|
||||
if ( loc.getStop() > stop ) stop = loc.getStop();
|
||||
}
|
||||
|
||||
return parser.createGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public ShardType getShardType();
|
||||
public ShardType getShardType() {
|
||||
return shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does any releasing / aggregation required when the shard is through being processed.
|
||||
*/
|
||||
public void close();
|
||||
public void close() {
|
||||
; // by default don't do anything
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets required configuration for validating and filtering reads.
|
||||
* @return read configuration properties.
|
||||
*/
|
||||
public ReadProperties getReadProperties();
|
||||
public abstract ReadProperties getReadProperties();
|
||||
|
||||
/**
|
||||
* Gets the runtime metrics associated with this shard.
|
||||
* @return metrics and read counts.
|
||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||
* @return Storage space for metrics.
|
||||
*/
|
||||
public ReadMetrics getReadMetrics();
|
||||
public ReadMetrics getReadMetrics() {
|
||||
return readMetrics;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class ShardStrategyFactory {
|
|||
case LOCUS_EXPERIMENTAL:
|
||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
|
||||
case READS_EXPERIMENTAL:
|
||||
return new ReadShardStrategy(readsDataSource,null);
|
||||
return new ReadShardStrategy(genomeLocParser,readsDataSource,null);
|
||||
default:
|
||||
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class ShardStrategyFactory {
|
|||
case LOCUS_EXPERIMENTAL:
|
||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
|
||||
case READS_EXPERIMENTAL:
|
||||
return new ReadShardStrategy(readsDataSource,lst);
|
||||
return new ReadShardStrategy(genomeLocParser, readsDataSource,lst);
|
||||
default:
|
||||
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented");
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue