Support for read interval sharding with proper filtering.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2902 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-02-27 20:26:34 +00:00
parent d8fedd59be
commit 80f5d2829d
6 changed files with 53 additions and 16 deletions

View File

@ -3,11 +3,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.Chunk; import net.sf.samtools.Chunk;
import net.sf.samtools.SAMFileReader2; import net.sf.samtools.SAMFileReader2;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
/** /**
@ -48,4 +48,10 @@ public interface BAMFormatAwareShard extends Shard {
* @return An iterator over the reads stored in the shard. * @return An iterator over the reads stored in the shard.
*/ */
public StingSAMIterator iterator(); public StingSAMIterator iterator();
/**
* Gets any filter associated with this shard. Useful for filtering out overlaps, etc.
* @return A filter if one exists. Null if not.
*/
public SamRecordFilter getFilter();
} }

View File

@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.Chunk; import net.sf.samtools.Chunk;
import net.sf.samtools.SAMFileReader2; import net.sf.samtools.SAMFileReader2;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter;
import java.util.*; import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
@ -34,15 +34,21 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
*/ */
private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS); private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS);
/**
* The filter to be applied to all reads meeting this criteria.
*/
private final SamRecordFilter filter;
/** /**
* An BlockDelimitedLocusShard can be used either for READ or READ shard types. * An BlockDelimitedLocusShard can be used either for READ or READ shard types.
* Track which type is being used. * Track which type is being used.
*/ */
private final Shard.ShardType shardType; private final Shard.ShardType shardType;
public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, Shard.ShardType shardType) { public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, SamRecordFilter filter, Shard.ShardType shardType) {
this.sourceInfo = sourceInfo; this.sourceInfo = sourceInfo;
this.chunks = chunks; this.chunks = chunks;
this.filter = filter;
this.shardType = shardType; this.shardType = shardType;
} }
@ -73,10 +79,18 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
reads.add(read); reads.add(read);
} }
/**
* Creates an iterator over reads stored in this shard's read cache.
* @return
*/
public StingSAMIterator iterator() { public StingSAMIterator iterator() {
return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator()); return StingSAMIteratorAdapter.adapt(sourceInfo,reads.iterator());
} }
public SamRecordFilter getFilter() {
return filter;
}
/** /**
* Get the list of chunks delimiting this shard. * Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard. * @return a list of chunks that contain data for this shard.

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.shards; package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.*; import net.sf.samtools.*;
import net.sf.picard.filter.SamRecordFilter;
import java.util.*; import java.util.*;
@ -72,6 +73,8 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard."); throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard.");
Map<SAMFileReader2,List<Chunk>> shardPosition = null; Map<SAMFileReader2,List<Chunk>> shardPosition = null;
SamRecordFilter filter = null;
if(!filePointers.isEmpty()) { if(!filePointers.isEmpty()) {
boolean foundData = false; boolean foundData = false;
for(FilePointer filePointer: filePointers) { for(FilePointer filePointer: filePointers) {
@ -89,8 +92,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
} }
} }
} }
if(foundData) if(foundData) {
filter = new ReadOverlapFilter(filePointer.locations);
break; break;
}
} }
} }
else { else {
@ -98,9 +103,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
shardPosition = new HashMap<SAMFileReader2,List<Chunk>>(); shardPosition = new HashMap<SAMFileReader2,List<Chunk>>();
for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet()) for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue())); shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
filter = null;
} }
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,Shard.ShardType.READ); BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,filter,Shard.ShardType.READ);
atEndOfStream = dataSource.fillShard(shard); atEndOfStream = dataSource.fillShard(shard);
this.position = dataSource.getCurrentPosition(); this.position = dataSource.getCurrentPosition();

View File

@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import net.sf.samtools.Chunk; import net.sf.samtools.Chunk;
import net.sf.samtools.SAMFileReader2; import net.sf.samtools.SAMFileReader2;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -66,6 +66,14 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
this.shardType = shardType; this.shardType = shardType;
} }
/**
* Gets the chunks associated with this locus shard.
* @return A list of the chunks to use when retrieving locus data.
*/
public Map<SAMFileReader2,List<Chunk>> getChunks() {
return chunks;
}
/** /**
* Returns true if this shard is meant to buffer reads, rather * Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations. * than just holding pointers to their locations.
@ -85,14 +93,18 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
*/ */
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); } public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Gets the iterator over the elements cached in the shard.
* @return
*/
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); } public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/** /**
* Gets the chunks associated with this locus shard. * Gets a filter testing for overlap of this read with the given shard.
* @return A list of the chunks to use when retrieving locus data. * @return A filter capable of filtering out reads outside a given shard.
*/ */
public Map<SAMFileReader2,List<Chunk>> getChunks() { public SamRecordFilter getFilter() {
return chunks; return new ReadOverlapFilter(loci);
} }
/** /**

View File

@ -1,9 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.List; import java.util.List;
@ -13,13 +12,13 @@ import java.util.List;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class IntervalOverlappingFilter implements SamRecordFilter { public class ReadOverlapFilter implements SamRecordFilter {
/** /**
* The list of locations containing reads to keep. * The list of locations containing reads to keep.
*/ */
private final List<GenomeLoc> intervals; private final List<GenomeLoc> intervals;
public IntervalOverlappingFilter(List<GenomeLoc> intervals) { public ReadOverlapFilter(List<GenomeLoc> intervals) {
this.intervals = intervals; this.intervals = intervals;
} }

View File

@ -198,8 +198,8 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set. // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true); CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
if(addIntervalFilter) if(shard.getFilter() != null)
iterator = new FilteringIterator(iterator,new IntervalOverlappingFilter(shard.getGenomeLocs())); iterator = new FilteringIterator(iterator,shard.getFilter());
return applyDecoratingIterators(enableVerification, return applyDecoratingIterators(enableVerification,
StingSAMIteratorAdapter.adapt(reads,iterator), StingSAMIteratorAdapter.adapt(reads,iterator),