Major refactoring of shards. No longer uses interfaces but is now an actual object hierarchy with most of the important and common functionality pushed up to base classes. Eliminated a lot of duplicated code, and the shards are much more understandable now. Also now require a GenomeLocParser to work with their own GenomeLocs.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5030 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4d611e53e7
commit
cacdac3914
|
|
@ -3,11 +3,16 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A common interface for shards that natively understand the BAM format.
|
* A common interface for shards that natively understand the BAM format.
|
||||||
|
|
@ -15,44 +20,90 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public interface BAMFormatAwareShard extends Shard {
|
public abstract class BAMFormatAwareShard extends Shard {
|
||||||
|
/**
|
||||||
|
* Whether the current location is unmapped.
|
||||||
|
*/
|
||||||
|
private final boolean isUnmapped;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads data, if applicable.
|
||||||
|
*/
|
||||||
|
private final SAMDataSource readsDataSource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The data backing the next chunks to deliver to the traversal engine.
|
||||||
|
*/
|
||||||
|
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||||
|
|
||||||
|
public BAMFormatAwareShard(GenomeLocParser parser,
|
||||||
|
ShardType shardType,
|
||||||
|
List<GenomeLoc> locs,
|
||||||
|
SAMDataSource readsDataSource,
|
||||||
|
Map<SAMReaderID,SAMFileSpan> fileSpans,
|
||||||
|
boolean isUnmapped) {
|
||||||
|
super(parser, shardType, locs);
|
||||||
|
this.readsDataSource = readsDataSource;
|
||||||
|
this.fileSpans = fileSpans;
|
||||||
|
this.isUnmapped = isUnmapped;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the shard, tallying and incorporating read data.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
readsDataSource.incorporateReadMetrics(readMetrics);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of chunks delimiting this shard.
|
* Get the list of chunks delimiting this shard.
|
||||||
* @return a list of chunks that contain data for this shard.
|
* @return a list of chunks that contain data for this shard.
|
||||||
*/
|
*/
|
||||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans();
|
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
||||||
|
return Collections.unmodifiableMap(fileSpans);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets key read validation and filtering properties.
|
||||||
|
* @return set of read properties associated with this shard.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public ReadProperties getReadProperties() {
|
||||||
|
return readsDataSource.getReadsInfo();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if this shard is meant to buffer reads, rather
|
* Returns true if this shard is meant to buffer reads, rather
|
||||||
* than just holding pointers to their locations.
|
* than just holding pointers to their locations.
|
||||||
* @return True if this shard can buffer reads. False otherwise.
|
* @return True if this shard can buffer reads. False otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean buffersReads();
|
public boolean buffersReads() { return false; }
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks to see whether the buffer is empty.
|
|
||||||
* @return True if the buffer is empty.
|
|
||||||
*/
|
|
||||||
public boolean isBufferEmpty();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the read buffer is currently full.
|
* Returns true if the read buffer is currently full.
|
||||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||||
*/
|
*/
|
||||||
public boolean isBufferFull();
|
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the read buffer is currently full.
|
||||||
|
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||||
|
*/
|
||||||
|
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a read to the read buffer.
|
* Adds a read to the read buffer.
|
||||||
* @param read Add a read to the internal shard buffer.
|
* @param read Add a read to the internal shard buffer.
|
||||||
*/
|
*/
|
||||||
public void addRead(SAMRecord read);
|
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assuming this iterator buffers reads, an iterator to the reads
|
* Gets the iterator over the elements cached in the shard.
|
||||||
* stored in the shard.
|
* @return
|
||||||
* @return An iterator over the reads stored in the shard.
|
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator iterator();
|
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether this shard points to an unmapped region.
|
* Whether this shard points to an unmapped region.
|
||||||
|
|
@ -60,5 +111,7 @@ public interface BAMFormatAwareShard extends Shard {
|
||||||
* this case, isUnmapped should always return false.
|
* this case, isUnmapped should always return false.
|
||||||
* @return True if this shard is unmapped. False otherwise.
|
* @return True if this shard is unmapped. False otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean isUnmapped();
|
public boolean isUnmapped() {
|
||||||
|
return isUnmapped;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
|
|
@ -21,127 +22,14 @@ import net.sf.picard.filter.SamRecordFilter;
|
||||||
* @version 1.0
|
* @version 1.0
|
||||||
* @date Apr 7, 2009
|
* @date Apr 7, 2009
|
||||||
*/
|
*/
|
||||||
public class LocusShard implements BAMFormatAwareShard {
|
public class LocusShard extends BAMFormatAwareShard {
|
||||||
/**
|
|
||||||
* Source for read data.
|
|
||||||
*/
|
|
||||||
private SAMDataSource dataSource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A list of the chunks associated with this shard.
|
|
||||||
*/
|
|
||||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
|
||||||
|
|
||||||
// currently our location
|
|
||||||
private final List<GenomeLoc> loci;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Statistics about which reads in this shards were used and which were filtered away.
|
|
||||||
*/
|
|
||||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new locus shard, divided by index.
|
* Create a new locus shard, divided by index.
|
||||||
* @param intervals List of intervals to process.
|
* @param intervals List of intervals to process.
|
||||||
* @param fileSpans File spans associated with that interval.
|
* @param fileSpans File spans associated with that interval.
|
||||||
*/
|
*/
|
||||||
public LocusShard(SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
|
public LocusShard(GenomeLocParser parser, SAMDataSource dataSource, List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
|
||||||
this.dataSource = dataSource;
|
super(parser, ShardType.LOCUS, intervals, dataSource, fileSpans, false);
|
||||||
this.loci = intervals;
|
|
||||||
this.fileSpans = fileSpans;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the shard, tallying and incorporating read data.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
dataSource.incorporateReadMetrics(readMetrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the file spans associated with this locus shard.
|
|
||||||
* @return A list of the file spans to use when retrieving locus data.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
|
||||||
return fileSpans;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return the genome location represented by this shard */
|
|
||||||
public List<GenomeLoc> getGenomeLocs() {
|
|
||||||
return loci;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if this shard is meant to buffer reads, rather
|
|
||||||
* than just holding pointers to their locations.
|
|
||||||
* @return True if this shard can buffer reads. False otherwise.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean buffersReads() { return false; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the read buffer is currently full.
|
|
||||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the read buffer is currently full.
|
|
||||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a read to the read buffer.
|
|
||||||
* @param read Add a read to the internal shard buffer.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the iterator over the elements cached in the shard.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns the type of shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ShardType getShardType() {
|
|
||||||
return ShardType.LOCUS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Locus shards don't make sense as unmapped regions. Always return false.
|
|
||||||
* @return False always.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isUnmapped() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets key read validation and filtering properties.
|
|
||||||
* @return set of read properties associated with this shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadProperties getReadProperties() {
|
|
||||||
return dataSource.getReadsInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
|
||||||
* @return Storage space for metrics.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadMetrics getReadMetrics() {
|
|
||||||
return readMetrics;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -150,6 +38,6 @@ public class LocusShard implements BAMFormatAwareShard {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return Utils.join(";",loci);
|
return Utils.join(";",getGenomeLocs());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,11 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
*/
|
*/
|
||||||
private final SAMDataSource reads;
|
private final SAMDataSource reads;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the parser for creating shards
|
||||||
|
*/
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An iterator through the available file pointers.
|
* An iterator through the available file pointers.
|
||||||
*/
|
*/
|
||||||
|
|
@ -60,6 +65,8 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
*/
|
*/
|
||||||
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
|
LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
|
||||||
if(!reads.isEmpty()) {
|
if(!reads.isEmpty()) {
|
||||||
GenomeLocSortedSet intervals;
|
GenomeLocSortedSet intervals;
|
||||||
if(locations == null) {
|
if(locations == null) {
|
||||||
|
|
@ -124,7 +131,7 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
public LocusShard next() {
|
public LocusShard next() {
|
||||||
FilePointer nextFilePointer = filePointerIterator.next();
|
FilePointer nextFilePointer = filePointerIterator.next();
|
||||||
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
|
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
|
||||||
return new LocusShard(reads,nextFilePointer.locations,fileSpansBounding);
|
return new LocusShard(genomeLocParser, reads,nextFilePointer.locations,fileSpansBounding);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** we don't support the remove command */
|
/** we don't support the remove command */
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
|
|
@ -13,81 +14,16 @@ import java.util.List;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class MonolithicShard implements Shard {
|
public class MonolithicShard extends BAMFormatAwareShard {
|
||||||
/**
|
|
||||||
* Reads data, if applicable.
|
|
||||||
*/
|
|
||||||
private final SAMDataSource readsDataSource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* What type of MonolithicShard is this? Read or locus?
|
|
||||||
*/
|
|
||||||
private final ShardType shardType;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
|
|
||||||
*/
|
|
||||||
private final List<GenomeLoc> locs;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Statistics about which reads in this shards were used and which were filtered away.
|
|
||||||
*/
|
|
||||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new monolithic shard of the given type.
|
* Creates a new monolithic shard of the given type.
|
||||||
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
|
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.
|
||||||
* @param locs Intervals that this monolithic shard should process.
|
* @param locs Intervals that this monolithic shard should process.
|
||||||
*/
|
*/
|
||||||
public MonolithicShard(SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
|
public MonolithicShard(GenomeLocParser parser, SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
|
||||||
this.readsDataSource = readsDataSource;
|
super(parser, shardType, locs, readsDataSource, null, false);
|
||||||
if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
|
if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
|
||||||
throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType);
|
throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType);
|
||||||
this.shardType = shardType;
|
|
||||||
this.locs = locs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the shard, tallying and incorporating read data.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns null, indicating that (in this case) the entire genome is covered.
|
|
||||||
* @return null.
|
|
||||||
*/
|
|
||||||
public List<GenomeLoc> getGenomeLocs() {
|
|
||||||
return locs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reports the type of monolithic shard.
|
|
||||||
* @return Type of monolithic shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ShardType getShardType() {
|
|
||||||
return shardType;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets key read validation and filtering properties.
|
|
||||||
* @return set of read properties associated with this shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadProperties getReadProperties() {
|
|
||||||
return readsDataSource.getReadsInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
|
||||||
* @return Storage space for metrics.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadMetrics getReadMetrics() {
|
|
||||||
return readMetrics;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
@ -23,8 +24,8 @@ public class MonolithicShardStrategy implements ShardStrategy {
|
||||||
* Create a new shard strategy for shards of the given type.
|
* Create a new shard strategy for shards of the given type.
|
||||||
* @param shardType The shard type.
|
* @param shardType The shard type.
|
||||||
*/
|
*/
|
||||||
public MonolithicShardStrategy(final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
|
public MonolithicShardStrategy(final GenomeLocParser parser, final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
|
||||||
shard = new MonolithicShard(readsDataSource,shardType,region);
|
shard = new MonolithicShard(parser,readsDataSource,shardType,region);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ import java.util.*;
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -36,80 +37,21 @@ import net.sf.picard.filter.SamRecordFilter;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class ReadShard implements BAMFormatAwareShard {
|
public class ReadShard extends BAMFormatAwareShard {
|
||||||
private final SAMDataSource readsDataSource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The data backing the next chunks to deliver to the traversal engine.
|
|
||||||
*/
|
|
||||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The reads making up this shard.
|
* The reads making up this shard.
|
||||||
*/
|
*/
|
||||||
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
|
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
|
||||||
|
|
||||||
/**
|
public ReadShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||||
* currently our location
|
super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
|
||||||
*/
|
|
||||||
private final List<GenomeLoc> loci;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Whether the current location is unmapped.
|
|
||||||
*/
|
|
||||||
private final boolean isUnmapped;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Statistics about which reads in this shards were used and which were filtered away.
|
|
||||||
*/
|
|
||||||
private final ReadMetrics readMetrics = new ReadMetrics();
|
|
||||||
|
|
||||||
public ReadShard(SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
|
||||||
this.readsDataSource = readsDataSource;
|
|
||||||
this.fileSpans = fileSpans;
|
|
||||||
this.loci = loci;
|
|
||||||
this.isUnmapped = isUnmapped;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the shard, tallying and incorporating read data.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
readsDataSource.incorporateReadMetrics(readMetrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the list of chunks delimiting this shard.
|
|
||||||
* @return a list of chunks that contain data for this shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
|
|
||||||
return Collections.unmodifiableMap(fileSpans);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return the genome location represented by this shard */
|
|
||||||
@Override
|
|
||||||
public List<GenomeLoc> getGenomeLocs() {
|
|
||||||
return loci;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Whether this shard points to an unmapped region.
|
|
||||||
* @return True if this shard is unmapped. False otherwise.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isUnmapped() {
|
|
||||||
return isUnmapped;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if this shard is meant to buffer reads, rather
|
* Returns true if this shard is meant to buffer reads, rather
|
||||||
* than just holding pointers to their locations.
|
* than just holding pointers to their locations.
|
||||||
* @return True if this shard can buffer reads. False otherwise.
|
* @return True if this shard can buffer reads. False otherwise.
|
||||||
*/
|
*/
|
||||||
@Override
|
|
||||||
public boolean buffersReads() {
|
public boolean buffersReads() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -118,7 +60,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
* Returns true if the read buffer is currently full.
|
* Returns true if the read buffer is currently full.
|
||||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||||
*/
|
*/
|
||||||
@Override
|
|
||||||
public boolean isBufferEmpty() {
|
public boolean isBufferEmpty() {
|
||||||
return reads.size() == 0;
|
return reads.size() == 0;
|
||||||
}
|
}
|
||||||
|
|
@ -127,7 +68,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
* Returns true if the read buffer is currently full.
|
* Returns true if the read buffer is currently full.
|
||||||
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
* @return True if this shard's buffer is full (and the shard can buffer reads).
|
||||||
*/
|
*/
|
||||||
@Override
|
|
||||||
public boolean isBufferFull() {
|
public boolean isBufferFull() {
|
||||||
return reads.size() > ReadShardStrategy.MAX_READS;
|
return reads.size() > ReadShardStrategy.MAX_READS;
|
||||||
}
|
}
|
||||||
|
|
@ -136,7 +76,6 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
* Adds a read to the read buffer.
|
* Adds a read to the read buffer.
|
||||||
* @param read Add a read to the internal shard buffer.
|
* @param read Add a read to the internal shard buffer.
|
||||||
*/
|
*/
|
||||||
@Override
|
|
||||||
public void addRead(SAMRecord read) {
|
public void addRead(SAMRecord read) {
|
||||||
// DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another
|
// DO NOT validate that the buffer is full. Paired read sharding will occasionally have to stuff another
|
||||||
// read or two into the buffer.
|
// read or two into the buffer.
|
||||||
|
|
@ -147,39 +86,10 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
* Creates an iterator over reads stored in this shard's read cache.
|
* Creates an iterator over reads stored in this shard's read cache.
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Override
|
|
||||||
public StingSAMIterator iterator() {
|
public StingSAMIterator iterator() {
|
||||||
return StingSAMIteratorAdapter.adapt(reads.iterator());
|
return StingSAMIteratorAdapter.adapt(reads.iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* what kind of shard do we return
|
|
||||||
*
|
|
||||||
* @return ShardType, indicating the type
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ShardType getShardType() {
|
|
||||||
return ShardType.READ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets key read validation and filtering properties.
|
|
||||||
* @return set of read properties associated with this shard.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadProperties getReadProperties() {
|
|
||||||
return readsDataSource.getReadsInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
|
||||||
* @return Storage space for metrics.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public ReadMetrics getReadMetrics() {
|
|
||||||
return readMetrics;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* String representation of this shard.
|
* String representation of this shard.
|
||||||
* @return A string representation of the boundaries of this shard.
|
* @return A string representation of the boundaries of this shard.
|
||||||
|
|
@ -187,7 +97,7 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for(Map.Entry<SAMReaderID,SAMFileSpan> entry: fileSpans.entrySet()) {
|
for(Map.Entry<SAMReaderID,SAMFileSpan> entry: getFileSpans().entrySet()) {
|
||||||
sb.append(entry.getKey());
|
sb.append(entry.getKey());
|
||||||
sb.append(": ");
|
sb.append(": ");
|
||||||
sb.append(entry.getValue());
|
sb.append(entry.getValue());
|
||||||
|
|
@ -195,6 +105,4 @@ public class ReadShard implements BAMFormatAwareShard {
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import java.util.*;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -84,13 +85,16 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
*/
|
*/
|
||||||
private boolean isIntoUnmappedRegion = false;
|
private boolean isIntoUnmappedRegion = false;
|
||||||
|
|
||||||
|
private final GenomeLocParser parser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new read shard strategy, loading read shards from the given BAM file.
|
* Create a new read shard strategy, loading read shards from the given BAM file.
|
||||||
* @param dataSource Data source from which to load shards.
|
* @param dataSource Data source from which to load shards.
|
||||||
* @param locations intervals to use for sharding.
|
* @param locations intervals to use for sharding.
|
||||||
*/
|
*/
|
||||||
public ReadShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
public ReadShardStrategy(GenomeLocParser parser, SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
|
this.parser = parser;
|
||||||
this.position = this.dataSource.getCurrentPosition();
|
this.position = this.dataSource.getCurrentPosition();
|
||||||
this.locations = locations;
|
this.locations = locations;
|
||||||
|
|
||||||
|
|
@ -155,7 +159,7 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(selectedReaders.size() > 0) {
|
if(selectedReaders.size() > 0) {
|
||||||
BAMFormatAwareShard shard = new ReadShard(dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||||
dataSource.fillShard(shard);
|
dataSource.fillShard(shard);
|
||||||
|
|
||||||
if(!shard.isBufferEmpty()) {
|
if(!shard.isBufferEmpty()) {
|
||||||
|
|
@ -169,7 +173,9 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
BAMFormatAwareShard shard = new ReadShard(dataSource,position,null,false);
|
// todo -- this nulling of intervals is a bit annoying since readwalkers without
|
||||||
|
// todo -- any -L values need to be special cased throughout the code.
|
||||||
|
BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false);
|
||||||
dataSource.fillShard(shard);
|
dataSource.fillShard(shard);
|
||||||
nextShard = !shard.isBufferEmpty() ? shard : null;
|
nextShard = !shard.isBufferEmpty() ? shard : null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -29,41 +32,101 @@ import java.util.List;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Interface Shard
|
* Interface Shard
|
||||||
* <p/>
|
* <p/>
|
||||||
* The base interface for shards.
|
* The base abstract class for shards.
|
||||||
*/
|
*/
|
||||||
public interface Shard extends Serializable {
|
public abstract class Shard implements HasGenomeLocation {
|
||||||
enum ShardType {
|
public enum ShardType {
|
||||||
READ, LOCUS
|
READ, LOCUS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected final GenomeLocParser parser; // incredibly annoying!
|
||||||
|
|
||||||
|
/**
|
||||||
|
* What type of MonolithicShard is this? Read or locus?
|
||||||
|
*/
|
||||||
|
protected final ShardType shardType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
|
||||||
|
*/
|
||||||
|
protected final List<GenomeLoc> locs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Statistics about which reads in this shards were used and which were filtered away.
|
||||||
|
*/
|
||||||
|
protected final ReadMetrics readMetrics = new ReadMetrics();
|
||||||
|
|
||||||
|
public Shard(GenomeLocParser parser, ShardType shardType, List<GenomeLoc> locs) {
|
||||||
|
this.locs = locs;
|
||||||
|
this.parser = parser;
|
||||||
|
this.shardType = shardType;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If isUnmapped is true, than getGenomeLocs by
|
* If isUnmapped is true, than getGenomeLocs by
|
||||||
* definition will return a singleton list with a GenomeLoc.UNMAPPED
|
* definition will return a singleton list with a GenomeLoc.UNMAPPED
|
||||||
*
|
*
|
||||||
|
* Can return null, indicating that the entire genome is covered.
|
||||||
|
*
|
||||||
* @return the genome location represented by this shard
|
* @return the genome location represented by this shard
|
||||||
*/
|
*/
|
||||||
public List<GenomeLoc> getGenomeLocs();
|
public List<GenomeLoc> getGenomeLocs() {
|
||||||
|
return locs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the span of the genomeLocs comprising this shard
|
||||||
|
* @param
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public GenomeLoc getLocation() {
|
||||||
|
if ( getGenomeLocs() == null )
|
||||||
|
return GenomeLoc.WHOLE_GENOME;
|
||||||
|
|
||||||
|
int start = Integer.MAX_VALUE;
|
||||||
|
int stop = Integer.MIN_VALUE;
|
||||||
|
String contig = null;
|
||||||
|
|
||||||
|
for ( GenomeLoc loc : getGenomeLocs() ) {
|
||||||
|
if ( GenomeLoc.isUnmapped(loc) )
|
||||||
|
// special case the unmapped region marker, just abort out
|
||||||
|
return loc;
|
||||||
|
contig = loc.getContig();
|
||||||
|
if ( loc.getStart() < start ) start = loc.getStart();
|
||||||
|
if ( loc.getStop() > stop ) stop = loc.getStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
return parser.createGenomeLoc(contig, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* what kind of shard do we return
|
* what kind of shard do we return
|
||||||
* @return ShardType, indicating the type
|
* @return ShardType, indicating the type
|
||||||
*/
|
*/
|
||||||
public ShardType getShardType();
|
public ShardType getShardType() {
|
||||||
|
return shardType;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does any releasing / aggregation required when the shard is through being processed.
|
* Does any releasing / aggregation required when the shard is through being processed.
|
||||||
*/
|
*/
|
||||||
public void close();
|
public void close() {
|
||||||
|
; // by default don't do anything
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets required configuration for validating and filtering reads.
|
* Gets required configuration for validating and filtering reads.
|
||||||
* @return read configuration properties.
|
* @return read configuration properties.
|
||||||
*/
|
*/
|
||||||
public ReadProperties getReadProperties();
|
public abstract ReadProperties getReadProperties();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the runtime metrics associated with this shard.
|
* Gets the runtime metrics associated with this shard.
|
||||||
* @return metrics and read counts.
|
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
|
||||||
|
* @return Storage space for metrics.
|
||||||
*/
|
*/
|
||||||
public ReadMetrics getReadMetrics();
|
public ReadMetrics getReadMetrics() {
|
||||||
|
return readMetrics;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ public class ShardStrategyFactory {
|
||||||
case LOCUS_EXPERIMENTAL:
|
case LOCUS_EXPERIMENTAL:
|
||||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
|
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
|
||||||
case READS_EXPERIMENTAL:
|
case READS_EXPERIMENTAL:
|
||||||
return new ReadShardStrategy(readsDataSource,null);
|
return new ReadShardStrategy(genomeLocParser,readsDataSource,null);
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
||||||
}
|
}
|
||||||
|
|
@ -108,7 +108,7 @@ public class ShardStrategyFactory {
|
||||||
case LOCUS_EXPERIMENTAL:
|
case LOCUS_EXPERIMENTAL:
|
||||||
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
|
return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
|
||||||
case READS_EXPERIMENTAL:
|
case READS_EXPERIMENTAL:
|
||||||
return new ReadShardStrategy(readsDataSource,lst);
|
return new ReadShardStrategy(genomeLocParser, readsDataSource,lst);
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented");
|
throw new ReviewedStingException("Strategy: " + strat + " isn't implemented");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue