Added REVERT_SOFTCLIPPED_BASES capability to ReadClipper
* New ClippingOp REVERT_SOFTCLIPPED_BASES turns soft clipped bases into matches.
* Added functionality to clipping op to revert all soft clip bases in a read into matches
* Added revertSoftClipBases function to the ReadClipper for public use
* Wrote systematic unit tests
This commit is contained in:
parent
24585062f8
commit
78d9bf7196
|
|
@ -104,6 +104,10 @@ public class ClippingOp {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case REVERT_SOFTCLIPPED_BASES:
|
||||||
|
read = revertSoftClippedBases(read);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
||||||
}
|
}
|
||||||
|
|
@ -111,6 +115,38 @@ public class ClippingOp {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
|
||||||
|
GATKSAMRecord unclipped;
|
||||||
|
|
||||||
|
// shallow copy of the read bases and quals should be fine here because they are immutable in the original read
|
||||||
|
try {
|
||||||
|
unclipped = (GATKSAMRecord) read.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new ReviewedStingException("Where did the clone go?");
|
||||||
|
}
|
||||||
|
|
||||||
|
Cigar unclippedCigar = new Cigar();
|
||||||
|
int matchesCount = 0;
|
||||||
|
for (CigarElement element : read.getCigar().getCigarElements()) {
|
||||||
|
if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
||||||
|
matchesCount += element.getLength();
|
||||||
|
else if (matchesCount > 0) {
|
||||||
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
|
matchesCount = 0;
|
||||||
|
unclippedCigar.add(element);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
unclippedCigar.add(element);
|
||||||
|
}
|
||||||
|
if (matchesCount > 0)
|
||||||
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
|
|
||||||
|
unclipped.setCigar(unclippedCigar);
|
||||||
|
unclipped.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar));
|
||||||
|
|
||||||
|
return unclipped;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
||||||
*/
|
*/
|
||||||
|
|
@ -472,7 +508,7 @@ public class ClippingOp {
|
||||||
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
||||||
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
|
oldShift += cigarElement.getLength();
|
||||||
else if (readHasStarted)
|
else if (readHasStarted)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,5 +29,10 @@ public enum ClippingRepresentation {
|
||||||
* lossy) operation. Note that this can only be applied to cases where the clipped
|
* lossy) operation. Note that this can only be applied to cases where the clipped
|
||||||
* bases occur at the start or end of a read.
|
* bases occur at the start or end of a read.
|
||||||
*/
|
*/
|
||||||
HARDCLIP_BASES
|
HARDCLIP_BASES,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turn all soft-clipped bases into matches
|
||||||
|
*/
|
||||||
|
REVERT_SOFTCLIPPED_BASES,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -213,4 +213,9 @@ public class ReadClipper {
|
||||||
}
|
}
|
||||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GATKSAMRecord revertSoftClippedBases() {
|
||||||
|
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
|
||||||
|
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -279,9 +279,9 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
||||||
|
|
||||||
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
|
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
||||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||||
expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar()));
|
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
||||||
|
|
||||||
if (! clippedRead.isEmpty()) {
|
if (! clippedRead.isEmpty()) {
|
||||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||||
|
|
@ -293,6 +293,27 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testRevertSoftClippedBases()
|
||||||
|
{
|
||||||
|
for (Cigar cigar: cigarList) {
|
||||||
|
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||||
|
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
|
final GATKSAMRecord unclipped = (new ReadClipper(read)).revertSoftClippedBases();
|
||||||
|
|
||||||
|
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||||
|
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||||
|
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
||||||
|
|
||||||
|
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
||||||
|
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||||
|
|
@ -304,12 +325,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingInsertionLength(cigar) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int leadingInsertionLength(Cigar cigar) {
|
private int leadingCigarElementLength(Cigar cigar, CigarOperator operator) {
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
if (cigarElement.getOperator() == operator)
|
||||||
return cigarElement.getLength();
|
return cigarElement.getLength();
|
||||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue