Merge pull request #247 from broadinstitute/eb_fix_RR_negative_header_problem

Fix for the "Removed too many insertions, header is now negative" bug in ReduceReads.
This commit is contained in:
Mark DePristo 2013-05-29 18:10:19 -07:00
commit 56b14be4bc
4 changed files with 40 additions and 18 deletions

View File

@ -207,7 +207,7 @@ public class HeaderElement {
public void removeInsertionToTheRight() {
this.insertionsToTheRight--;
if (insertionsToTheRight < 0)
throw new ReviewedStingException("Removed too many insertions, header is now negative!");
throw new ReviewedStingException("Removed too many insertions, header is now negative at position " + location);
}
public boolean hasInsertionToTheRight() {

View File

@ -1199,7 +1199,7 @@ public class SlidingWindow {
}
// Special case for leading insertions before the beginning of the sliding read
if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) {
if ( (readStart == headerStart || headerStart < 0) && ReadUtils.readStartsWithInsertion(read.getCigar(), false) != null ) {
// create a new first element to the window header with no bases added
header.addFirst(new HeaderElement(readStart - 1));
// this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing

View File

@ -89,6 +89,25 @@ public class SlidingWindowUnitTest extends BaseTest {
return variantRegionBitset;
}
//////////////////////////////////////////////////////////////////////////////////////
//// Test for leading softclips immediately followed by an insertion in the CIGAR ////
//////////////////////////////////////////////////////////////////////////////////////
@Test(enabled = true)
public void testLeadingClipThenInsertion() {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10);
read.setReadBases(Utils.dupBytes((byte) 'A', 10));
read.setBaseQualities(Utils.dupBytes((byte)30, 10));
read.setMappingQuality(30);
read.setCigarString("2S2I6M");
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1);
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
}
//////////////////////////////////////////////////////////////////////////////////////
//// This section tests the findVariantRegions() method and related functionality ////
//////////////////////////////////////////////////////////////////////////////////////

View File

@ -424,9 +424,9 @@ public class ReadUtils {
// clipping the left tail and first base is insertion, go to the next read coordinate
// with the same reference coordinate. Advance to the next cigar element, or to the
// end of the read if there is no next element.
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar);
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1);
final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar);
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null)
readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1);
return readCoord;
}
@ -595,25 +595,28 @@ public class ReadUtils {
}
/**
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
* if there are any.
*
* @param read
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
* @see #readStartsWithInsertion(net.sf.samtools.Cigar, boolean) with ignoreClipOps set to true
*/
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
return readStartsWithInsertion(read.getCigar());
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead) {
return readStartsWithInsertion(cigarForRead, true);
}
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
for (CigarElement cigarElement : cigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.INSERTION)
return new Pair<Boolean, CigarElement>(true, cigarElement);
/**
* Checks if a read starts with an insertion.
*
* @param cigarForRead the CIGAR to evaluate
* @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning?
* @return the element if it's a leading insertion or null otherwise
*/
public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) {
for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) {
if ( cigarElement.getOperator() == CigarOperator.INSERTION )
return cigarElement;
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) )
break;
}
return new Pair<Boolean, CigarElement>(false, null);
return null;
}
/**