From f1109e90709da70b63032768760f5b89a7ffee8d Mon Sep 17 00:00:00 2001 From: aaron Date: Sat, 25 Jul 2009 22:36:29 +0000 Subject: [PATCH] Added the interator to SAMDataSource to prevent seeing dupplicate reads, only in a byReads traversal. The iterator discards any reads in the current interval that would have been seen in the previous interval. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1317 348d0f76-0448-11de-a6fe-93d51630548a --- .../simpleDataSources/SAMDataSource.java | 16 +++++++++++++++- .../gatk/iterators/IntervalOverlapIterator.java | 6 +++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 3341d573b..d8921047a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -81,6 +81,8 @@ public class SAMDataSource implements SimpleDataSource { // A pool of SAM iterators. private SAMResourcePool resourcePool = null; + private GenomeLoc mLastInterval = null; + /** * Returns a histogram of reads that were screened out, grouped by the nature of the error. * @return Histogram of reads. Will not be null. @@ -153,14 +155,26 @@ public class SAMDataSource implements SimpleDataSource { reads.getDownsamplingFraction(), reads.getSafetyChecking(), reads.getSupplementalFilters()); - } else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) { + } else if (shard.getShardType() == Shard.ShardType.LOCUS) { iterator = seekLocus(shard.getGenomeLoc()); iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), reads.getSafetyChecking(), reads.getSupplementalFilters()); + } else if (shard.getShardType() == Shard.ShardType.INTERVAL) { + iterator = seekLocus(shard.getGenomeLoc()); + iterator = applyDecoratingIterators(false, + iterator, + reads.getDownsamplingFraction(), + reads.getSafetyChecking(), + reads.getSupplementalFilters()); + + // add the new overlapping detection iterator, if we have a last interval + if (mLastInterval != null && queryOverlapping) iterator = new IntervalOverlapIterator(iterator,mLastInterval,false); + mLastInterval = shard.getGenomeLoc(); } else { + throw new StingException("seek: Unknown shard type"); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java index 8b4f197be..bb2caab9c 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java @@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; import java.util.Iterator; @@ -103,7 +104,10 @@ public class IntervalOverlapIterator implements StingSAMIterator { * @return true if it overlaps, false otherwise */ private boolean isOverlaping(SAMRecord rec) { - return mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec)); + boolean overlap = mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec)); + if (overlap && this.throwException) + throw new StingException("IntervalOverlapIterator found a overlapping read " + rec.getReadName() + " with overlap " + this.mLoc.toString()); + return overlap; } }