From 5bba44d6934b06f944728cd3aad5ef191373a369 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 16:36:31 -0500 Subject: [PATCH] Added hardClipByReferenceCoordinates UnitTest for the ReadClipper * fixed edge case when requested to hard clip beginning of a read that had hanging soft clipped bases on the left tail. * fixed edge case when requested to hard clip end of a read that had hanging soft clipped bases on the right tail. * fixed AlignmentStart of a clipped read that results in only hard clips and soft clips note: added tests to all these beautiful cases... --- .../sting/utils/clipreads/ClippingOp.java | 7 ++++-- .../sting/utils/clipreads/ReadClipper.java | 9 ++++++++ .../utils/clipreads/ReadClipperUnitTest.java | 23 ++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 06985041e..0bb8c3e6f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -460,17 +460,20 @@ public class ClippingOp { int newShift = 0; int oldShift = 0; + boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift for (CigarElement cigarElement : newCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) newShift += cigarElement.getLength(); - else + else { + readHasStarted = true; break; + } } for (CigarElement cigarElement : oldCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); - else + else if (readHasStarted) break; } return newShift - oldShift; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 4bb309a48..461c9aef3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -63,12 +63,18 @@ public class ReadClipper { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); if ( start > stop ) throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + if ( start > 0 && stop < read.getReadLength() - 1) + throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); + this.addOp(new ClippingOp(start, stop)); GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; @@ -76,6 +82,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index d93e0ac08..193eec661 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -86,14 +87,30 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReferenceCoordinates() { - logger.warn("PASSED"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + for (int i=alnStart; i<=alnEnd; i++) { + if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + if (!clipLeft.isEmpty()) + Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); + } + if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); + } + } + } + logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { - logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); - + logger.warn("PASSED"); } @Test(enabled = true)