From 70335b2b0a967f31b1aa063ace4845bff413b086 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 21 Sep 2011 17:12:01 -0400 Subject: [PATCH] Hard clipping soft clipped reads to fix misalignments. Pre-softclipped reads (with high qual) are a complicated event to deal with in the Reduced Reads environment. I chose to hard clip them out for now and added a todo item to bring them back on in the future, perhaps as a variant region. --- .../sting/utils/clipreads/ReadClipper.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index a8a3fad9e..9b9ce4fdf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.clipreads; import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -8,6 +9,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; /** @@ -128,6 +130,39 @@ public class ReadClipper { return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public SAMRecord hardClipSoftClippedBases () { + int readIndex = 0; + int cutLeft = -1; // first position to hard clip (inclusive) + int cutRight = -1; // first position to hard clip (inclusive) + boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail + + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { + if (rightTail) { + cutRight = readIndex; + } + else { + cutLeft = readIndex + cigarElement.getLength() - 1; + } + } + else + rightTail = true; + + if (cigarElement.getOperator().consumesReadBases()) + readIndex += cigarElement.getLength(); + } + + // It is extremely important that we cut the end first otherwise the read coordinates change. + if (cutRight >= 0) + this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1)); + if (cutLeft >= 0) + this.addOp(new ClippingOp(0, cutLeft)); + + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } + + + /** * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. *