Support for read interval sharding with proper filtering.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2902 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d8fedd59be
commit
80f5d2829d
|
|
@ -3,11 +3,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
import net.sf.samtools.Chunk;
|
import net.sf.samtools.Chunk;
|
||||||
import net.sf.samtools.SAMFileReader2;
|
import net.sf.samtools.SAMFileReader2;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -48,4 +48,10 @@ public interface BAMFormatAwareShard extends Shard {
|
||||||
* @return An iterator over the reads stored in the shard.
|
* @return An iterator over the reads stored in the shard.
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator iterator();
|
public StingSAMIterator iterator();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets any filter associated with this shard. Useful for filtering out overlaps, etc.
|
||||||
|
* @return A filter if one exists. Null if not.
|
||||||
|
*/
|
||||||
|
public SamRecordFilter getFilter();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
import net.sf.samtools.Chunk;
|
import net.sf.samtools.Chunk;
|
||||||
import net.sf.samtools.SAMFileReader2;
|
import net.sf.samtools.SAMFileReader2;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||||
|
|
@ -34,15 +34,21 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
|
||||||
*/
|
*/
|
||||||
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS);
|
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The filter to be applied to all reads meeting this criteria.
|
||||||
|
*/
|
||||||
|
private final SamRecordFilter filter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An BlockDelimitedLocusShard can be used either for READ or READ shard types.
|
* An BlockDelimitedLocusShard can be used either for READ or READ shard types.
|
||||||
* Track which type is being used.
|
* Track which type is being used.
|
||||||
*/
|
*/
|
||||||
private final Shard.ShardType shardType;
|
private final Shard.ShardType shardType;
|
||||||
|
|
||||||
public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, Shard.ShardType shardType) {
|
public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, SamRecordFilter filter, Shard.ShardType shardType) {
|
||||||
this.sourceInfo = sourceInfo;
|
this.sourceInfo = sourceInfo;
|
||||||
this.chunks = chunks;
|
this.chunks = chunks;
|
||||||
|
this.filter = filter;
|
||||||
this.shardType = shardType;
|
this.shardType = shardType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -73,10 +79,18 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
|
||||||
reads.add(read);
|
reads.add(read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an iterator over reads stored in this shard's read cache.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public StingSAMIterator iterator() {
|
public StingSAMIterator iterator() {
|
||||||
return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator());
|
return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SamRecordFilter getFilter() {
|
||||||
|
return filter;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of chunks delimiting this shard.
|
* Get the list of chunks delimiting this shard.
|
||||||
* @return a list of chunks that contain data for this shard.
|
* @return a list of chunks that contain data for this shard.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -72,6 +73,8 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
||||||
throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard.");
|
throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard.");
|
||||||
|
|
||||||
Map<SAMFileReader2,List<Chunk>> shardPosition = null;
|
Map<SAMFileReader2,List<Chunk>> shardPosition = null;
|
||||||
|
SamRecordFilter filter = null;
|
||||||
|
|
||||||
if(!filePointers.isEmpty()) {
|
if(!filePointers.isEmpty()) {
|
||||||
boolean foundData = false;
|
boolean foundData = false;
|
||||||
for(FilePointer filePointer: filePointers) {
|
for(FilePointer filePointer: filePointers) {
|
||||||
|
|
@ -89,8 +92,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(foundData)
|
if(foundData) {
|
||||||
|
filter = new ReadOverlapFilter(filePointer.locations);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -98,9 +103,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
|
||||||
shardPosition = new HashMap<SAMFileReader2,List<Chunk>>();
|
shardPosition = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||||
for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
|
for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
|
||||||
shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
|
shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
|
||||||
|
filter = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,Shard.ShardType.READ);
|
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,filter,Shard.ShardType.READ);
|
||||||
atEndOfStream = dataSource.fillShard(shard);
|
atEndOfStream = dataSource.fillShard(shard);
|
||||||
|
|
||||||
this.position = dataSource.getCurrentPosition();
|
this.position = dataSource.getCurrentPosition();
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import net.sf.samtools.Chunk;
|
import net.sf.samtools.Chunk;
|
||||||
import net.sf.samtools.SAMFileReader2;
|
import net.sf.samtools.SAMFileReader2;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -66,6 +66,14 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
||||||
this.shardType = shardType;
|
this.shardType = shardType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the chunks associated with this locus shard.
|
||||||
|
* @return A list of the chunks to use when retrieving locus data.
|
||||||
|
*/
|
||||||
|
public Map<SAMFileReader2,List<Chunk>> getChunks() {
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if this shard is meant to buffer reads, rather
|
* Returns true if this shard is meant to buffer reads, rather
|
||||||
* than just holding pointers to their locations.
|
* than just holding pointers to their locations.
|
||||||
|
|
@ -85,14 +93,18 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
||||||
*/
|
*/
|
||||||
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the iterator over the elements cached in the shard.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the chunks associated with this locus shard.
|
* Gets a filter testing for overlap of this read with the given shard.
|
||||||
* @return A list of the chunks to use when retrieving locus data.
|
* @return A filter capable of filtering out reads outside a given shard.
|
||||||
*/
|
*/
|
||||||
public Map<SAMFileReader2,List<Chunk>> getChunks() {
|
public SamRecordFilter getFilter() {
|
||||||
return chunks;
|
return new ReadOverlapFilter(loci);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -13,13 +12,13 @@ import java.util.List;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class IntervalOverlappingFilter implements SamRecordFilter {
|
public class ReadOverlapFilter implements SamRecordFilter {
|
||||||
/**
|
/**
|
||||||
* The list of locations containing reads to keep.
|
* The list of locations containing reads to keep.
|
||||||
*/
|
*/
|
||||||
private final List<GenomeLoc> intervals;
|
private final List<GenomeLoc> intervals;
|
||||||
|
|
||||||
public IntervalOverlappingFilter(List<GenomeLoc> intervals) {
|
public ReadOverlapFilter(List<GenomeLoc> intervals) {
|
||||||
this.intervals = intervals;
|
this.intervals = intervals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -198,8 +198,8 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||||
|
|
||||||
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||||
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||||
if(addIntervalFilter)
|
if(shard.getFilter() != null)
|
||||||
iterator = new FilteringIterator(iterator,new IntervalOverlappingFilter(shard.getGenomeLocs()));
|
iterator = new FilteringIterator(iterator,shard.getFilter());
|
||||||
|
|
||||||
return applyDecoratingIterators(enableVerification,
|
return applyDecoratingIterators(enableVerification,
|
||||||
StingSAMIteratorAdapter.adapt(reads,iterator),
|
StingSAMIteratorAdapter.adapt(reads,iterator),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue