Fix for the "Removed too many insertions, header is now negative" bug in ReduceReads.
The problem ultimately was that ReadUtils.readStartsWithInsertion() ignores leading hard/softclips, but ReduceReads does not. So I refactored that method to include a boolean argument as to whether or not clips should be ignored. Also rebased so that return type is no longer a Pair. Added unit test to cover this situation.
This commit is contained in:
parent
684c91c2e7
commit
a5a68c09fa
|
|
@ -207,7 +207,7 @@ public class HeaderElement {
|
||||||
public void removeInsertionToTheRight() {
|
public void removeInsertionToTheRight() {
|
||||||
this.insertionsToTheRight--;
|
this.insertionsToTheRight--;
|
||||||
if (insertionsToTheRight < 0)
|
if (insertionsToTheRight < 0)
|
||||||
throw new ReviewedStingException("Removed too many insertions, header is now negative!");
|
throw new ReviewedStingException("Removed too many insertions, header is now negative at position " + location);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasInsertionToTheRight() {
|
public boolean hasInsertionToTheRight() {
|
||||||
|
|
|
||||||
|
|
@ -1199,7 +1199,7 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special case for leading insertions before the beginning of the sliding read
|
// Special case for leading insertions before the beginning of the sliding read
|
||||||
if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) {
|
if ( (readStart == headerStart || headerStart < 0) && ReadUtils.readStartsWithInsertion(read.getCigar(), false) != null ) {
|
||||||
// create a new first element to the window header with no bases added
|
// create a new first element to the window header with no bases added
|
||||||
header.addFirst(new HeaderElement(readStart - 1));
|
header.addFirst(new HeaderElement(readStart - 1));
|
||||||
// this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing
|
// this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,25 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
return variantRegionBitset;
|
return variantRegionBitset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//// Test for leading softclips immediately followed by an insertion in the CIGAR ////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testLeadingClipThenInsertion() {
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte)30, 10));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
read.setCigarString("2S2I6M");
|
||||||
|
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1);
|
||||||
|
slidingWindow.addRead(read);
|
||||||
|
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
//// This section tests the findVariantRegions() method and related functionality ////
|
//// This section tests the findVariantRegions() method and related functionality ////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
||||||
|
|
@ -424,9 +424,9 @@ public class ReadUtils {
|
||||||
// clipping the left tail and first base is insertion, go to the next read coordinate
|
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||||
// with the same reference coordinate. Advance to the next cigar element, or to the
|
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||||
// end of the read if there is no next element.
|
// end of the read if there is no next element.
|
||||||
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar);
|
final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar);
|
||||||
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null)
|
||||||
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1);
|
readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1);
|
||||||
|
|
||||||
return readCoord;
|
return readCoord;
|
||||||
}
|
}
|
||||||
|
|
@ -595,25 +595,28 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
|
* @see #readStartsWithInsertion(net.sf.samtools.Cigar, boolean) with ignoreClipOps set to true
|
||||||
* if there are any.
|
|
||||||
*
|
|
||||||
* @param read
|
|
||||||
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
|
||||||
*/
|
*/
|
||||||
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead) {
|
||||||
return readStartsWithInsertion(read.getCigar());
|
return readStartsWithInsertion(cigarForRead, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
|
/**
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
* Checks if a read starts with an insertion.
|
||||||
|
*
|
||||||
|
* @param cigarForRead the CIGAR to evaluate
|
||||||
|
* @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning?
|
||||||
|
* @return the element if it's a leading insertion or null otherwise
|
||||||
|
*/
|
||||||
|
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) {
|
||||||
|
for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) {
|
||||||
if ( cigarElement.getOperator() == CigarOperator.INSERTION )
|
if ( cigarElement.getOperator() == CigarOperator.INSERTION )
|
||||||
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
return cigarElement;
|
||||||
|
|
||||||
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
|
else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return new Pair<Boolean, CigarElement>(false, null);
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue