Fixed ReadClipperUnitTest
The behavior of the clipping on weird cigar strings such as 1I1S1H and 9S56H has changed, and the test has to change accordingly.
This commit is contained in:
parent
f09141bd2f
commit
17efbbf8b1
|
|
@ -84,14 +84,14 @@ public class ReadClipperTestUtils {
|
|||
}
|
||||
|
||||
private static boolean isCigarValid(Cigar cigar) {
|
||||
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
|
||||
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
|
||||
|
||||
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
|
||||
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
|
||||
CigarOperator startingOp = null;
|
||||
CigarOperator endingOp = null;
|
||||
|
||||
// check if it doesn't start with deletions
|
||||
boolean readHasStarted = false; // search the list of elements for the starting operator
|
||||
boolean readHasStarted = false; // search the list of elements for the starting operator
|
||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||
if (!readHasStarted) {
|
||||
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
||||
|
|
@ -102,19 +102,16 @@ public class ReadClipperTestUtils {
|
|||
cigarElementStack.push(cigarElement);
|
||||
}
|
||||
|
||||
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
|
||||
while (!cigarElementStack.empty()) {
|
||||
CigarElement cigarElement = cigarElementStack.pop();
|
||||
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
||||
readHasStarted = true;
|
||||
endingOp = cigarElement.getOperator();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
final byte LOW_QUAL = 2;
|
||||
final byte HIGH_QUAL = 30;
|
||||
|
||||
// create a read for every cigar permutation
|
||||
/** create a read for every cigar permutation */
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int readLength = read.getReadLength();
|
||||
byte[] quals = new byte[readLength];
|
||||
|
||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||
|
||||
/** create a read with nLowQualBases in the left tail */
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||
quals[addLeft] = LOW_QUAL;
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||
checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases);
|
||||
|
||||
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
|
||||
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
|
||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||
|
||||
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
|
||||
/** create a read with nLowQualBases in the right tail */
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||
checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases);
|
||||
|
||||
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
|
||||
|
||||
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
|
||||
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
|
||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||
|
||||
/** create a read with nLowQualBases on both tails */
|
||||
if (nLowQualBases <= readLength / 2) {
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||
quals[addBoth] = LOW_QUAL;
|
||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||
}
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||
|
||||
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
|
||||
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
|
||||
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||
checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -209,8 +198,8 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
CigarCounter original = new CigarCounter(read);
|
||||
CigarCounter clipped = new CigarCounter(clippedRead);
|
||||
|
||||
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
|
||||
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
|
||||
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
|
||||
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
||||
*
|
||||
* @param original
|
||||
* @param clipped
|
||||
* @param original original read
|
||||
* @param clipped clipped read
|
||||
*/
|
||||
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
||||
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
||||
|
|
@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
|
||||
if (read.getCigarString().contains("M"))
|
||||
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
|
||||
|
||||
private boolean startsWithInsertion(Cigar cigar) {
|
||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue