From 17efbbf8b18a758439192a1e606b32a277e2ae6a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 3 Jul 2012 16:37:21 -0400 Subject: [PATCH] Fixed ReadClipperUnitTest The behavior of the clipping on weird cigar strings such as 1I1S1H and 9S56H has changed, and the test has to change accordingly. --- .../utils/clipping/ReadClipperTestUtils.java | 11 ++-- .../utils/clipping/ReadClipperUnitTest.java | 53 ++++++++++--------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index 16b141bc3..baa2f6218 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -84,14 +84,14 @@ public class ReadClipperTestUtils { } private static boolean isCigarValid(Cigar cigar) { - if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation) + if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation) - Stack cigarElementStack = new Stack(); // Stack to invert cigar string to find ending operator + Stack cigarElementStack = new Stack(); // Stack to invert cigar string to find ending operator CigarOperator startingOp = null; CigarOperator endingOp = null; // check if it doesn't start with deletions - boolean readHasStarted = false; // search the list of elements for the starting operator + boolean readHasStarted = false; // search the list of elements for the starting operator for (CigarElement cigarElement : cigar.getCigarElements()) { if (!readHasStarted) { if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { @@ -102,19 +102,16 @@ public class ReadClipperTestUtils { cigarElementStack.push(cigarElement); } - readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator while (!cigarElementStack.empty()) { CigarElement cigarElement = cigarElementStack.pop(); if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { - readHasStarted = true; endingOp = cigarElement.getOperator(); break; } } -// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION) if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION) - return true; // we don't accept reads starting or ending in deletions (add any other constraint here) + return true; // we don't accept reads starting or ending in deletions (add any other constraint here) } return false; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index 44c9ab255..a819e41c7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest { final byte LOW_QUAL = 2; final byte HIGH_QUAL = 30; - // create a read for every cigar permutation + /** create a read for every cigar permutation */ for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); byte[] quals = new byte[readLength]; for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) { - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail + + /** create a read with nLowQualBases in the left tail */ + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL; read.setBaseQualities(quals); GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); + checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases); - assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed - assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); - - - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail + /** create a read with nLowQualBases in the right tail */ + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addRight = 0; addRight < nLowQualBases; addRight++) quals[readLength - addRight - 1] = LOW_QUAL; read.setBaseQualities(quals); GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); + checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases); -// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString())); - - assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed - assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString())); - + /** create a read with nLowQualBases on both tails */ if (nLowQualBases <= readLength / 2) { - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) { quals[addBoth] = LOW_QUAL; quals[readLength - addBoth - 1] = LOW_QUAL; } read.setBaseQualities(quals); GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); - - assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed - assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString())); + checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases); } } } @@ -209,8 +198,8 @@ public class ReadClipperUnitTest extends BaseTest { CigarCounter original = new CigarCounter(read); CigarCounter clipped = new CigarCounter(clippedRead); - assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed - original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS + assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed + original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS } } @@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest { } } + private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) { + assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed + assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone + assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped + } + /** * Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd * - * @param original - * @param clipped + * @param original original read + * @param clipped clipped read */ private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) { if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) { @@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest { } } + private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) { + if (read.getCigarString().contains("M")) + Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); + } + + private boolean startsWithInsertion(Cigar cigar) { return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0; }