Added hardClipByReferenceCoordinates UnitTest for the ReadClipper

* fixed edge case when requested to hard clip beginning of a read that had hanging soft clipped bases on the left tail.
* fixed edge case when requested to hard clip end of a read that had hanging soft clipped bases on the right tail.
* fixed AlignmentStart of a clipped read that results in only hard clips and soft clips

note: added tests to all these beautiful cases...
This commit is contained in:
Mauricio Carneiro 2011-12-16 16:36:31 -05:00
parent 5838ba529d
commit 5bba44d693
3 changed files with 34 additions and 5 deletions

View File

@ -460,17 +460,20 @@ public class ClippingOp {
int newShift = 0;
int oldShift = 0;
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
newShift += cigarElement.getLength();
else
else {
readHasStarted = true;
break;
}
}
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
else
else if (readHasStarted)
break;
}
return newShift - oldShift;

View File

@ -63,12 +63,18 @@ public class ReadClipper {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
if ( start > 0 && stop < read.getReadLength() - 1)
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
this.addOp(new ClippingOp(start, stop));
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
@ -76,6 +82,9 @@ public class ReadClipper {
}
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -86,14 +87,30 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true)
public void testHardClipByReferenceCoordinates() {
logger.warn("PASSED");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
for (int i=alnStart; i<=alnEnd; i++) {
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
}
}
}
logger.warn("PASSED");
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
logger.warn("PASSED");
}
@Test(enabled = true)