Fixed ReadClipperUnitTest

The behavior of the clipping on weird cigar strings such as 1I1S1H and 9S56H has changed, and the test has to change accordingly.
This commit is contained in:
Mauricio Carneiro 2012-07-03 16:37:21 -04:00
parent f09141bd2f
commit 17efbbf8b1
2 changed files with 31 additions and 33 deletions

View File

@ -84,14 +84,14 @@ public class ReadClipperTestUtils {
} }
private static boolean isCigarValid(Cigar cigar) { private static boolean isCigarValid(Cigar cigar) {
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation) if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
CigarOperator startingOp = null; CigarOperator startingOp = null;
CigarOperator endingOp = null; CigarOperator endingOp = null;
// check if it doesn't start with deletions // check if it doesn't start with deletions
boolean readHasStarted = false; // search the list of elements for the starting operator boolean readHasStarted = false; // search the list of elements for the starting operator
for (CigarElement cigarElement : cigar.getCigarElements()) { for (CigarElement cigarElement : cigar.getCigarElements()) {
if (!readHasStarted) { if (!readHasStarted) {
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
@ -102,19 +102,16 @@ public class ReadClipperTestUtils {
cigarElementStack.push(cigarElement); cigarElementStack.push(cigarElement);
} }
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
while (!cigarElementStack.empty()) { while (!cigarElementStack.empty()) {
CigarElement cigarElement = cigarElementStack.pop(); CigarElement cigarElement = cigarElementStack.pop();
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
readHasStarted = true;
endingOp = cigarElement.getOperator(); endingOp = cigarElement.getOperator();
break; break;
} }
} }
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION) if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
return true; // we don't accept reads starting or ending in deletions (add any other constraint here) return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
} }
return false; return false;

View File

@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest {
final byte LOW_QUAL = 2; final byte LOW_QUAL = 2;
final byte HIGH_QUAL = 30; final byte HIGH_QUAL = 30;
// create a read for every cigar permutation /** create a read for every cigar permutation */
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength(); int readLength = read.getReadLength();
byte[] quals = new byte[readLength]; byte[] quals = new byte[readLength];
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) { for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
/** create a read with nLowQualBases in the left tail */
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL; quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals); read.setBaseQualities(quals);
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases);
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed /** create a read with nLowQualBases in the right tail */
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone Utils.fillArrayWithByte(quals, HIGH_QUAL);
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
for (int addRight = 0; addRight < nLowQualBases; addRight++) for (int addRight = 0; addRight < nLowQualBases; addRight++)
quals[readLength - addRight - 1] = LOW_QUAL; quals[readLength - addRight - 1] = LOW_QUAL;
read.setBaseQualities(quals); read.setBaseQualities(quals);
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases);
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString())); /** create a read with nLowQualBases on both tails */
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
if (nLowQualBases <= readLength / 2) { if (nLowQualBases <= readLength / 2) {
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) { for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
quals[addBoth] = LOW_QUAL; quals[addBoth] = LOW_QUAL;
quals[readLength - addBoth - 1] = LOW_QUAL; quals[readLength - addBoth - 1] = LOW_QUAL;
} }
read.setBaseQualities(quals); read.setBaseQualities(quals);
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases);
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
} }
} }
} }
@ -209,8 +198,8 @@ public class ReadClipperUnitTest extends BaseTest {
CigarCounter original = new CigarCounter(read); CigarCounter original = new CigarCounter(read);
CigarCounter clipped = new CigarCounter(clippedRead); CigarCounter clipped = new CigarCounter(clippedRead);
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
} }
} }
@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest {
} }
} }
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
}
/** /**
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd * Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
* *
* @param original * @param original original read
* @param clipped * @param clipped clipped read
*/ */
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) { private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) { if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest {
} }
} }
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
if (read.getCigarString().contains("M"))
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
}
private boolean startsWithInsertion(Cigar cigar) { private boolean startsWithInsertion(Cigar cigar) {
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0; return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
} }