Fix for the "Removed too many insertions, header is now negative" bug in ReduceReads.
The problem ultimately was that ReadUtils.readStartsWithInsertion() ignores leading hard/softclips, but ReduceReads does not. So I refactored that method to include a boolean argument as to whether or not clips should be ignored. Also rebased so that return type is no longer a Pair. Added unit test to cover this situation.
This commit is contained in:
parent
684c91c2e7
commit
a5a68c09fa
|
|
@ -207,7 +207,7 @@ public class HeaderElement {
|
|||
public void removeInsertionToTheRight() {
|
||||
this.insertionsToTheRight--;
|
||||
if (insertionsToTheRight < 0)
|
||||
throw new ReviewedStingException("Removed too many insertions, header is now negative!");
|
||||
throw new ReviewedStingException("Removed too many insertions, header is now negative at position " + location);
|
||||
}
|
||||
|
||||
public boolean hasInsertionToTheRight() {
|
||||
|
|
|
|||
|
|
@ -1199,7 +1199,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
// Special case for leading insertions before the beginning of the sliding read
|
||||
if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) {
|
||||
if ( (readStart == headerStart || headerStart < 0) && ReadUtils.readStartsWithInsertion(read.getCigar(), false) != null ) {
|
||||
// create a new first element to the window header with no bases added
|
||||
header.addFirst(new HeaderElement(readStart - 1));
|
||||
// this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing
|
||||
|
|
|
|||
|
|
@ -89,6 +89,25 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
return variantRegionBitset;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
//// Test for leading softclips immediately followed by an insertion in the CIGAR ////
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLeadingClipThenInsertion() {
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, 10));
|
||||
read.setMappingQuality(30);
|
||||
read.setCigarString("2S2I6M");
|
||||
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1);
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
//// This section tests the findVariantRegions() method and related functionality ////
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -424,9 +424,9 @@ public class ReadUtils {
|
|||
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||
// end of the read if there is no next element.
|
||||
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar);
|
||||
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
||||
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1);
|
||||
final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar);
|
||||
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null)
|
||||
readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1);
|
||||
|
||||
return readCoord;
|
||||
}
|
||||
|
|
@ -595,25 +595,28 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
|
||||
* if there are any.
|
||||
*
|
||||
* @param read
|
||||
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
||||
* @see #readStartsWithInsertion(net.sf.samtools.Cigar, boolean) with ignoreClipOps set to true
|
||||
*/
|
||||
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
||||
return readStartsWithInsertion(read.getCigar());
|
||||
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead) {
|
||||
return readStartsWithInsertion(cigarForRead, true);
|
||||
}
|
||||
|
||||
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
|
||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
||||
/**
|
||||
* Checks if a read starts with an insertion.
|
||||
*
|
||||
* @param cigarForRead the CIGAR to evaluate
|
||||
* @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning?
|
||||
* @return the element if it's a leading insertion or null otherwise
|
||||
*/
|
||||
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) {
|
||||
for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) {
|
||||
if ( cigarElement.getOperator() == CigarOperator.INSERTION )
|
||||
return cigarElement;
|
||||
|
||||
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
|
||||
else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) )
|
||||
break;
|
||||
}
|
||||
return new Pair<Boolean, CigarElement>(false, null);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue