Support for read interval sharding with proper filtering.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2902 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d8fedd59be
commit
80f5d2829d
|
|
@ -3,11 +3,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import net.sf.samtools.Chunk;
|
||||
import net.sf.samtools.SAMFileReader2;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
||||
/**
|
||||
|
|
@ -48,4 +48,10 @@ public interface BAMFormatAwareShard extends Shard {
|
|||
* @return An iterator over the reads stored in the shard.
|
||||
*/
|
||||
public StingSAMIterator iterator();
|
||||
|
||||
/**
|
||||
* Gets any filter associated with this shard. Useful for filtering out overlaps, etc.
|
||||
* @return A filter if one exists. Null if not.
|
||||
*/
|
||||
public SamRecordFilter getFilter();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import net.sf.samtools.Chunk;
|
||||
import net.sf.samtools.SAMFileReader2;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
|
|
@ -34,15 +34,21 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
|
|||
*/
|
||||
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS);
|
||||
|
||||
/**
|
||||
* The filter to be applied to all reads meeting this criteria.
|
||||
*/
|
||||
private final SamRecordFilter filter;
|
||||
|
||||
/**
|
||||
* An BlockDelimitedLocusShard can be used either for READ or READ shard types.
|
||||
* Track which type is being used.
|
||||
*/
|
||||
private final Shard.ShardType shardType;
|
||||
|
||||
public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, Shard.ShardType shardType) {
|
||||
public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, SamRecordFilter filter, Shard.ShardType shardType) {
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.chunks = chunks;
|
||||
this.filter = filter;
|
||||
this.shardType = shardType;
|
||||
}
|
||||
|
||||
|
|
@ -73,10 +79,18 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
|
|||
reads.add(read);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an iterator over reads stored in this shard's read cache.
|
||||
* @return
|
||||
*/
|
||||
public StingSAMIterator iterator() {
|
||||
return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator());
|
||||
}
|
||||
|
||||
public SamRecordFilter getFilter() {
|
||||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of chunks delimiting this shard.
|
||||
* @return a list of chunks that contain data for this shard.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -72,6 +73,8 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
|||
throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard.");
|
||||
|
||||
Map<SAMFileReader2,List<Chunk>> shardPosition = null;
|
||||
SamRecordFilter filter = null;
|
||||
|
||||
if(!filePointers.isEmpty()) {
|
||||
boolean foundData = false;
|
||||
for(FilePointer filePointer: filePointers) {
|
||||
|
|
@ -89,8 +92,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
|||
}
|
||||
}
|
||||
}
|
||||
if(foundData)
|
||||
if(foundData) {
|
||||
filter = new ReadOverlapFilter(filePointer.locations);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
@ -98,9 +103,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
|||
shardPosition = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
|
||||
shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
|
||||
filter = null;
|
||||
}
|
||||
|
||||
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,Shard.ShardType.READ);
|
||||
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,filter,Shard.ShardType.READ);
|
||||
atEndOfStream = dataSource.fillShard(shard);
|
||||
|
||||
this.position = dataSource.getCurrentPosition();
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import net.sf.samtools.Chunk;
|
||||
import net.sf.samtools.SAMFileReader2;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
|
@ -66,6 +66,14 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
|||
this.shardType = shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the chunks associated with this locus shard.
|
||||
* @return A list of the chunks to use when retrieving locus data.
|
||||
*/
|
||||
public Map<SAMFileReader2,List<Chunk>> getChunks() {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this shard is meant to buffer reads, rather
|
||||
* than just holding pointers to their locations.
|
||||
|
|
@ -85,14 +93,18 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
|||
*/
|
||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Gets the iterator over the elements cached in the shard.
|
||||
* @return
|
||||
*/
|
||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||
|
||||
/**
|
||||
* Gets the chunks associated with this locus shard.
|
||||
* @return A list of the chunks to use when retrieving locus data.
|
||||
* Gets a filter testing for overlap of this read with the given shard.
|
||||
* @return A filter capable of filtering out reads outside a given shard.
|
||||
*/
|
||||
public Map<SAMFileReader2,List<Chunk>> getChunks() {
|
||||
return chunks;
|
||||
public SamRecordFilter getFilter() {
|
||||
return new ReadOverlapFilter(loci);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -13,13 +12,13 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class IntervalOverlappingFilter implements SamRecordFilter {
|
||||
public class ReadOverlapFilter implements SamRecordFilter {
|
||||
/**
|
||||
* The list of locations containing reads to keep.
|
||||
*/
|
||||
private final List<GenomeLoc> intervals;
|
||||
|
||||
public IntervalOverlappingFilter(List<GenomeLoc> intervals) {
|
||||
public ReadOverlapFilter(List<GenomeLoc> intervals) {
|
||||
this.intervals = intervals;
|
||||
}
|
||||
|
||||
|
|
@ -198,8 +198,8 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
|
||||
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||
if(addIntervalFilter)
|
||||
iterator = new FilteringIterator(iterator,new IntervalOverlappingFilter(shard.getGenomeLocs()));
|
||||
if(shard.getFilter() != null)
|
||||
iterator = new FilteringIterator(iterator,shard.getFilter());
|
||||
|
||||
return applyDecoratingIterators(enableVerification,
|
||||
StingSAMIteratorAdapter.adapt(reads,iterator),
|
||||
|
|
|
|||
Loading…
Reference in New Issue