Added the interator to SAMDataSource to prevent seeing dupplicate reads, only in a byReads traversal. The iterator discards any reads in the current interval that would have been seen in the previous interval.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1317 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-25 22:36:29 +00:00
parent 5eca4c353c
commit f1109e9070
2 changed files with 20 additions and 2 deletions

View File

@ -81,6 +81,8 @@ public class SAMDataSource implements SimpleDataSource {
// A pool of SAM iterators. // A pool of SAM iterators.
private SAMResourcePool resourcePool = null; private SAMResourcePool resourcePool = null;
private GenomeLoc mLastInterval = null;
/** /**
* Returns a histogram of reads that were screened out, grouped by the nature of the error. * Returns a histogram of reads that were screened out, grouped by the nature of the error.
* @return Histogram of reads. Will not be null. * @return Histogram of reads. Will not be null.
@ -153,14 +155,26 @@ public class SAMDataSource implements SimpleDataSource {
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getSafetyChecking(), reads.getSafetyChecking(),
reads.getSupplementalFilters()); reads.getSupplementalFilters());
} else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) { } else if (shard.getShardType() == Shard.ShardType.LOCUS) {
iterator = seekLocus(shard.getGenomeLoc()); iterator = seekLocus(shard.getGenomeLoc());
iterator = applyDecoratingIterators(false, iterator = applyDecoratingIterators(false,
iterator, iterator,
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getSafetyChecking(), reads.getSafetyChecking(),
reads.getSupplementalFilters()); reads.getSupplementalFilters());
} else if (shard.getShardType() == Shard.ShardType.INTERVAL) {
iterator = seekLocus(shard.getGenomeLoc());
iterator = applyDecoratingIterators(false,
iterator,
reads.getDownsamplingFraction(),
reads.getSafetyChecking(),
reads.getSupplementalFilters());
// add the new overlapping detection iterator, if we have a last interval
if (mLastInterval != null && queryOverlapping) iterator = new IntervalOverlapIterator(iterator,mLastInterval,false);
mLastInterval = shard.getGenomeLoc();
} else { } else {
throw new StingException("seek: Unknown shard type"); throw new StingException("seek: Unknown shard type");
} }

View File

@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import java.util.Iterator; import java.util.Iterator;
@ -103,7 +104,10 @@ public class IntervalOverlapIterator implements StingSAMIterator {
* @return true if it overlaps, false otherwise * @return true if it overlaps, false otherwise
*/ */
private boolean isOverlaping(SAMRecord rec) { private boolean isOverlaping(SAMRecord rec) {
return mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec)); boolean overlap = mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec));
if (overlap && this.throwException)
throw new StingException("IntervalOverlapIterator found a overlapping read " + rec.getReadName() + " with overlap " + this.mLoc.toString());
return overlap;
} }
} }