diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java
index 53d0cba5c..83a520e22 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java
@@ -4,7 +4,8 @@ import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@@ -26,6 +27,7 @@ import java.util.*;
* Copies reads from the input stream into the outputBAM, replacing those
* reads which have been cleaned with their new clean copies.
*/
+@Requires({DataSource.READS})
public class CleanedReadInjector extends ReadWalker {
/**
@@ -45,6 +47,11 @@ public class CleanedReadInjector extends ReadWalker {
*/
private Queue cleanedReads = new LinkedList();
+ /**
+ * The intervals specified by the user
+ */
+ private HashMap> intervals = null;
+
/**
* A fast lookup table for uniquified read info
*/
@@ -63,6 +70,21 @@ public class CleanedReadInjector extends ReadWalker {
cleanedReadHash.add(getUniquifiedReadName(read));
}
allReads.close();
+
+ // If there are intervals specified by the user,record them so we can make sure not
+ // to emit reads outside the intervals. For now, we'll group them by chromosome to
+ // make lookup a bit faster.
+ if ( this.getToolkit().getArguments().intervals != null ) {
+ intervals = new HashMap>();
+ List locs = GenomeAnalysisEngine.parseIntervalRegion(this.getToolkit().getArguments().intervals);
+ Iterator iter = GenomeLocSortedSet.createSetFromList(locs).iterator();
+ while ( iter.hasNext() ) {
+ GenomeLoc loc = iter.next();
+ if ( intervals.get(loc.getContig()) == null )
+ intervals.put(loc.getContig(), new ArrayList());
+ intervals.get(loc.getContig()).add(loc);
+ }
+ }
}
/**
@@ -81,9 +103,9 @@ public class CleanedReadInjector extends ReadWalker {
while ( firstCleanedRead != null &&
firstCleanedRead.getReferenceIndex() <= read.getReferenceIndex() &&
firstCleanedRead.getAlignmentStart() <= read.getAlignmentStart() ) {
- outputBAM.addAlignment(firstCleanedRead);
- cleanedReadCount++;
- cleanedReads.remove();
+ if ( emit(firstCleanedRead) )
+ cleanedReadCount++;
+ cleanedReads.remove();
firstCleanedRead = cleanedReads.peek();
}
@@ -92,6 +114,37 @@ public class CleanedReadInjector extends ReadWalker {
return cleanedReadCount;
}
+ /**
+ * Determine whether to emit the given read; if so, return true.
+ */
+ private boolean emit(SAMRecord read) {
+ // if no intervals were specified, emit everything
+ if ( intervals == null ) {
+ outputBAM.addAlignment(read);
+ return true;
+ }
+
+ ArrayList intervalList = intervals.get(read.getReferenceName());
+ if ( intervalList == null )
+ return false;
+
+ GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
+ for ( GenomeLoc interval : intervalList ) {
+ // if it overlaps an interval, then we can emit it
+ if ( interval.overlapsP(readLoc) ) {
+ outputBAM.addAlignment(read);
+ return true;
+ }
+
+ // once we've passed any interval that could overlap it, just quit
+ if ( interval.isPast(readLoc) )
+ return false;
+ }
+
+ // it didn't overlap an interval
+ return false;
+ }
+
/**
* Initialize traversal with number of reads which have been replaced with a clean version.
* @return 0 to initialize the traversal.
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java
index e06879097..5e2ef4ef2 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java
@@ -1,8 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.indels;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.gatk.walkers.WalkerName;
-import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@@ -18,6 +16,7 @@ import java.util.List;
// although this can easily be changed if necessary.
@WalkerName("IndelIntervals")
+@Requires({DataSource.READS})
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
public class IndelIntervalWalker extends ReadWalker {
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java
index 49aea4865..4817e05df 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java
@@ -1,4 +1,3 @@
-
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java
index 1ced3f714..0d94d140c 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java
@@ -1,4 +1,3 @@
-
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.gatk.refdata.*;
@@ -9,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@WalkerName("SNPClusters")
-@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)})
+@Requires(value={},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)})
public class SNPClusterWalker extends RefWalker {
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false)
int windowSize = 10;
@@ -60,4 +59,4 @@ public class SNPClusterWalker extends RefWalker {
out.println(sum);
return value;
}
-}
\ No newline at end of file
+}