From 291d8c7596150bd4e91a94eeef8ed35e758286a1 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 13 Aug 2011 21:02:24 -0400 Subject: [PATCH] Fixed HardClipping and Interval containment * Hard clipping was wrongfully hard clipping unmapped reads while soft clipping then hard clipping mapped reads. Now we throw exception if we try to hard/soft clip unmapped reads and use the soft->hard clip procedure fore every mapped read. * Interval containment needed a <= and >= to make sure it caught the borders right. --- .../sting/utils/clipreads/ClippingOp.java | 76 +++++++++---------- .../sting/utils/sam/ReadUtils.java | 4 +- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 5449906b2..5789b606a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -4,6 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.Vector; @@ -72,48 +73,45 @@ public class ClippingOp { break; case HARDCLIP_BASES: case SOFTCLIP_BASES: - if ( ! clippedRead.getReadUnmappedFlag() ) { + if ( clippedRead.getReadUnmappedFlag() ) { // we can't process unmapped reads - - //System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength()); - int myStop = stop; - if ( (stop + 1 - start) == clippedRead.getReadLength() ) { - // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone - //Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName())); - //break; - myStop--; // just decrement stop - } - - if ( start > 0 && myStop != clippedRead.getReadLength() - 1 ) - throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", - clippedRead.getReadName(), start, myStop)); - - Cigar oldCigar = clippedRead.getCigar(); - - int scLeft = 0, scRight = clippedRead.getReadLength(); - if ( start == 0 ) - scLeft = myStop + 1; - else - scRight = start; - - Cigar newCigar = softClip(oldCigar, scLeft, scRight); - clippedRead.setCigar(newCigar); - - int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar); - int newStart = clippedRead.getAlignmentStart() + newClippedStart; - clippedRead.setAlignmentStart(newStart); - - if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) - clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead); - //System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart); - } else if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) { - // we can hard clip unmapped reads - if ( clippedRead.getReadNegativeStrandFlag() ) - clippedRead = ReadUtils.hardClipBases(clippedRead, 0, start, null); - else - clippedRead = ReadUtils.hardClipBases(clippedRead, start, start + getLength(), null); + throw new UserException("Read Clipper cannot soft/hard clip unmapped reads"); } + + //System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength()); + int myStop = stop; + if ( (stop + 1 - start) == clippedRead.getReadLength() ) { + // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone + //Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName())); + //break; + myStop--; // just decrement stop + } + + if ( start > 0 && myStop != clippedRead.getReadLength() - 1 ) + throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", + clippedRead.getReadName(), start, myStop)); + + Cigar oldCigar = clippedRead.getCigar(); + + int scLeft = 0, scRight = clippedRead.getReadLength(); + if ( start == 0 ) + scLeft = myStop + 1; + else + scRight = start; + + Cigar newCigar = softClip(oldCigar, scLeft, scRight); + clippedRead.setCigar(newCigar); + + int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar); + int newStart = clippedRead.getAlignmentStart() + newClippedStart; + clippedRead.setAlignmentStart(newStart); + + if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) + clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead); + //System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart); + break; + default: throw new IllegalStateException("Unexpected Clipping operator type " + algorithm); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 448fc828f..16a02abdc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -618,8 +618,8 @@ public class ReadUtils { (read.getUnclippedStart() > interval.getStop()) ) return ReadAndIntervalOverlap.NO_OVERLAP; - else if ( (read.getUnclippedStart() > interval.getStart()) && - (read.getUnclippedEnd() < interval.getStop()) ) + else if ( (read.getUnclippedStart() >= interval.getStart()) && + (read.getUnclippedEnd() <= interval.getStop()) ) return ReadAndIntervalOverlap.CONTAINED; else if ( (read.getUnclippedStart() < interval.getStart()) &&