diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java index 53d0cba5c..83a520e22 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java @@ -4,7 +4,8 @@ import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -26,6 +27,7 @@ import java.util.*; * Copies reads from the input stream into the outputBAM, replacing those * reads which have been cleaned with their new clean copies. */ +@Requires({DataSource.READS}) public class CleanedReadInjector extends ReadWalker { /** @@ -45,6 +47,11 @@ public class CleanedReadInjector extends ReadWalker { */ private Queue cleanedReads = new LinkedList(); + /** + * The intervals specified by the user + */ + private HashMap> intervals = null; + /** * A fast lookup table for uniquified read info */ @@ -63,6 +70,21 @@ public class CleanedReadInjector extends ReadWalker { cleanedReadHash.add(getUniquifiedReadName(read)); } allReads.close(); + + // If there are intervals specified by the user,record them so we can make sure not + // to emit reads outside the intervals. For now, we'll group them by chromosome to + // make lookup a bit faster. + if ( this.getToolkit().getArguments().intervals != null ) { + intervals = new HashMap>(); + List locs = GenomeAnalysisEngine.parseIntervalRegion(this.getToolkit().getArguments().intervals); + Iterator iter = GenomeLocSortedSet.createSetFromList(locs).iterator(); + while ( iter.hasNext() ) { + GenomeLoc loc = iter.next(); + if ( intervals.get(loc.getContig()) == null ) + intervals.put(loc.getContig(), new ArrayList()); + intervals.get(loc.getContig()).add(loc); + } + } } /** @@ -81,9 +103,9 @@ public class CleanedReadInjector extends ReadWalker { while ( firstCleanedRead != null && firstCleanedRead.getReferenceIndex() <= read.getReferenceIndex() && firstCleanedRead.getAlignmentStart() <= read.getAlignmentStart() ) { - outputBAM.addAlignment(firstCleanedRead); - cleanedReadCount++; - cleanedReads.remove(); + if ( emit(firstCleanedRead) ) + cleanedReadCount++; + cleanedReads.remove(); firstCleanedRead = cleanedReads.peek(); } @@ -92,6 +114,37 @@ public class CleanedReadInjector extends ReadWalker { return cleanedReadCount; } + /** + * Determine whether to emit the given read; if so, return true. + */ + private boolean emit(SAMRecord read) { + // if no intervals were specified, emit everything + if ( intervals == null ) { + outputBAM.addAlignment(read); + return true; + } + + ArrayList intervalList = intervals.get(read.getReferenceName()); + if ( intervalList == null ) + return false; + + GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + for ( GenomeLoc interval : intervalList ) { + // if it overlaps an interval, then we can emit it + if ( interval.overlapsP(readLoc) ) { + outputBAM.addAlignment(read); + return true; + } + + // once we've passed any interval that could overlap it, just quit + if ( interval.isPast(readLoc) ) + return false; + } + + // it didn't overlap an interval + return false; + } + /** * Initialize traversal with number of reads which have been replaced with a clean version. * @return 0 to initialize the traversal. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java index e06879097..5e2ef4ef2 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.WalkerName; -import org.broadinstitute.sting.gatk.walkers.ReadFilters; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -18,6 +16,7 @@ import java.util.List; // although this can easily be changed if necessary. @WalkerName("IndelIntervals") +@Requires({DataSource.READS}) @ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class}) public class IndelIntervalWalker extends ReadWalker { @Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java index 49aea4865..4817e05df 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java @@ -1,4 +1,3 @@ - package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java index 1ced3f714..0d94d140c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java @@ -1,4 +1,3 @@ - package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.gatk.refdata.*; @@ -9,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @WalkerName("SNPClusters") -@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)}) +@Requires(value={},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)}) public class SNPClusterWalker extends RefWalker { @Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false) int windowSize = 10; @@ -60,4 +59,4 @@ public class SNPClusterWalker extends RefWalker { out.println(sum); return value; } -} \ No newline at end of file +}