Fixed ReadClipperUnitTest

The behavior of the clipping on weird cigar strings such as 1I1S1H and 9S56H has changed, and the test has to change accordingly.
This commit is contained in:
Mauricio Carneiro 2012-07-03 16:37:21 -04:00
parent f09141bd2f
commit 17efbbf8b1
2 changed files with 31 additions and 33 deletions

View File

@ -84,14 +84,14 @@ public class ReadClipperTestUtils {
}
private static boolean isCigarValid(Cigar cigar) {
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
CigarOperator startingOp = null;
CigarOperator endingOp = null;
// check if it doesn't start with deletions
boolean readHasStarted = false; // search the list of elements for the starting operator
boolean readHasStarted = false; // search the list of elements for the starting operator
for (CigarElement cigarElement : cigar.getCigarElements()) {
if (!readHasStarted) {
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
@ -102,19 +102,16 @@ public class ReadClipperTestUtils {
cigarElementStack.push(cigarElement);
}
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
while (!cigarElementStack.empty()) {
CigarElement cigarElement = cigarElementStack.pop();
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
readHasStarted = true;
endingOp = cigarElement.getOperator();
break;
}
}
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
}
return false;

View File

@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest {
final byte LOW_QUAL = 2;
final byte HIGH_QUAL = 30;
// create a read for every cigar permutation
/** create a read for every cigar permutation */
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
byte[] quals = new byte[readLength];
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
/** create a read with nLowQualBases in the left tail */
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases);
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
/** create a read with nLowQualBases in the right tail */
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addRight = 0; addRight < nLowQualBases; addRight++)
quals[readLength - addRight - 1] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases);
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
/** create a read with nLowQualBases on both tails */
if (nLowQualBases <= readLength / 2) {
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
quals[addBoth] = LOW_QUAL;
quals[readLength - addBoth - 1] = LOW_QUAL;
}
read.setBaseQualities(quals);
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases);
}
}
}
@ -209,8 +198,8 @@ public class ReadClipperUnitTest extends BaseTest {
CigarCounter original = new CigarCounter(read);
CigarCounter clipped = new CigarCounter(clippedRead);
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
}
}
@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest {
}
}
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
}
/**
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
*
* @param original
* @param clipped
* @param original original read
* @param clipped clipped read
*/
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest {
}
}
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
if (read.getCigarString().contains("M"))
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
}
private boolean startsWithInsertion(Cigar cigar) {
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
}