From cae54ec52d92ccfe0a1e2e8718985728d4b90971 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 23 Apr 2009 17:58:19 +0000 Subject: [PATCH] Walker for creating intervals to be used in the indel cleaner git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@508 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/IndelIntervalWalker.java | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java new file mode 100644 index 000000000..700708de5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.playground.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.AlignmentBlock; + +import java.util.List; + +// A walker to determine intervals within which reads should be cleaned and realigned +// because they contain one or more indels (which possibly caused them not to align perfectly). +// Note that the reduce step assumes that reductions occur in order (i.e. no hierarchical reductions), +// although this can easily be changed if necessary. + +@WalkerName("IndelIntervals") +public class IndelIntervalWalker extends ReadWalker { + @Argument(fullName="maxReadLength", shortName="maxRead", required=false, defaultValue="-1") + public int maxReadLength; + @Argument(fullName="minIndelsPerInterval", shortName="minIndels", required=false, defaultValue="1") + public int minIntervalIndelCount; + + public void initialize() {} + + public boolean filter(LocusContext context, SAMRecord read) { + return ( !read.getReadUnmappedFlag() && // mapped + read.getReadLength() <= maxReadLength && // not too big + read.getAlignmentBlocks().size() > 1); // indel + } + + public Interval map(LocusContext context, SAMRecord read) { + List blocks = read.getAlignmentBlocks(); + long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1; + long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1; + GenomeLoc indelLoc = new GenomeLoc(context.getLocation().getContigIndex(), indelLeftEdge, indelRightEdge); + return new Interval(context.getLocation(), indelLoc); + } + + public Interval reduceInit() { + return null; + } + + public Interval reduce(Interval value, Interval sum) { + // if there is no interval to the left, then this is the first one + if ( sum == null ) + return value; + + //System.out.println(sum + " ==> " + value); + + // if the intervals don't overlap, print out the leftmost one and start a new one + if ( !sum.readLoc.overlapsP(value.readLoc) ) { + if ( sum.indelCount >= minIntervalIndelCount ) + out.println(sum); + return value; + } + // otherwise, merge them + return sum.merge(value); + } + + public void onTraversalDone(Interval result) {} + + public class Interval { + public GenomeLoc readLoc = null; + public GenomeLoc indelLoc = null; + public int indelCount; + + Interval(GenomeLoc readLoc, GenomeLoc indelLoc) { + this.indelLoc = indelLoc; + this.readLoc = readLoc; + indelCount = 1; + } + + public Interval merge(Interval i) { + long indelLeftEdge = Math.min(this.indelLoc.getStart(), i.indelLoc.getStart()); + long indelRightEdge = Math.max(this.indelLoc.getStop(), i.indelLoc.getStop()); + GenomeLoc mergedIndelLoc = new GenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge); + Interval mergedInterval = new Interval(this.readLoc.merge(i.readLoc), mergedIndelLoc); + mergedInterval.indelCount = this.indelCount + i.indelCount; + return mergedInterval; + } + + public String toString() { + return indelLoc.toString(); + } + } +} \ No newline at end of file