Added the interator to SAMDataSource to prevent seeing dupplicate reads, only in a byReads traversal. The iterator discards any reads in the current interval that would have been seen in the previous interval.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1317 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-25 22:36:29 +00:00
parent 5eca4c353c
commit f1109e9070
2 changed files with 20 additions and 2 deletions

View File

@ -81,6 +81,8 @@ public class SAMDataSource implements SimpleDataSource {
// A pool of SAM iterators.
private SAMResourcePool resourcePool = null;
private GenomeLoc mLastInterval = null;
/**
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
* @return Histogram of reads. Will not be null.
@ -153,14 +155,26 @@ public class SAMDataSource implements SimpleDataSource {
reads.getDownsamplingFraction(),
reads.getSafetyChecking(),
reads.getSupplementalFilters());
} else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) {
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
iterator = seekLocus(shard.getGenomeLoc());
iterator = applyDecoratingIterators(false,
iterator,
reads.getDownsamplingFraction(),
reads.getSafetyChecking(),
reads.getSupplementalFilters());
} else if (shard.getShardType() == Shard.ShardType.INTERVAL) {
iterator = seekLocus(shard.getGenomeLoc());
iterator = applyDecoratingIterators(false,
iterator,
reads.getDownsamplingFraction(),
reads.getSafetyChecking(),
reads.getSupplementalFilters());
// add the new overlapping detection iterator, if we have a last interval
if (mLastInterval != null && queryOverlapping) iterator = new IntervalOverlapIterator(iterator,mLastInterval,false);
mLastInterval = shard.getGenomeLoc();
} else {
throw new StingException("seek: Unknown shard type");
}

View File

@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import java.util.Iterator;
@ -103,7 +104,10 @@ public class IntervalOverlapIterator implements StingSAMIterator {
* @return true if it overlaps, false otherwise
*/
private boolean isOverlaping(SAMRecord rec) {
return mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec));
boolean overlap = mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec));
if (overlap && this.throwException)
throw new StingException("IntervalOverlapIterator found a overlapping read " + rec.getReadName() + " with overlap " + this.mLoc.toString());
return overlap;
}
}