From a5a68c09fac06d2ebc3ee6b9b94b31bded7cbfdd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 May 2013 14:42:15 -0400 Subject: [PATCH] Fix for the "Removed too many insertions, header is now negative" bug in ReduceReads. The problem ultimately was that ReadUtils.readStartsWithInsertion() ignores leading hard/softclips, but ReduceReads does not. So I refactored that method to include a boolean argument as to whether or not clips should be ignored. Also rebased so that return type is no longer a Pair. Added unit test to cover this situation. --- .../reducereads/HeaderElement.java | 2 +- .../reducereads/SlidingWindow.java | 2 +- .../reducereads/SlidingWindowUnitTest.java | 19 ++++++++++ .../sting/utils/sam/ReadUtils.java | 35 ++++++++++--------- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index 38b9e957b..ba2c2ae56 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -207,7 +207,7 @@ public class HeaderElement { public void removeInsertionToTheRight() { this.insertionsToTheRight--; if (insertionsToTheRight < 0) - throw new ReviewedStingException("Removed too many insertions, header is now negative!"); + throw new ReviewedStingException("Removed too many insertions, header is now negative at position " + location); } public boolean hasInsertionToTheRight() { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 8843d6270..0425af3df 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -1199,7 +1199,7 @@ public class SlidingWindow { } // Special case for leading insertions before the beginning of the sliding read - if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) { + if ( (readStart == headerStart || headerStart < 0) && ReadUtils.readStartsWithInsertion(read.getCigar(), false) != null ) { // create a new first element to the window header with no bases added header.addFirst(new HeaderElement(readStart - 1)); // this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 56ad02084..c9bb2f084 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -89,6 +89,25 @@ public class SlidingWindowUnitTest extends BaseTest { return variantRegionBitset; } + ////////////////////////////////////////////////////////////////////////////////////// + //// Test for leading softclips immediately followed by an insertion in the CIGAR //// + ////////////////////////////////////////////////////////////////////////////////////// + + @Test(enabled = true) + public void testLeadingClipThenInsertion() { + + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10); + read.setReadBases(Utils.dupBytes((byte) 'A', 10)); + read.setBaseQualities(Utils.dupBytes((byte)30, 10)); + read.setMappingQuality(30); + read.setCigarString("2S2I6M"); + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1); + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(null); + + } + ////////////////////////////////////////////////////////////////////////////////////// //// This section tests the findVariantRegions() method and related functionality //// ////////////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 0db3aa043..5b15fdd1b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -424,9 +424,9 @@ public class ReadUtils { // clipping the left tail and first base is insertion, go to the next read coordinate // with the same reference coordinate. Advance to the next cigar element, or to the // end of the read if there is no next element. - Pair firstElementIsInsertion = readStartsWithInsertion(cigar); - if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst()) - readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1); + final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar); + if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null) + readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1); return readCoord; } @@ -595,25 +595,28 @@ public class ReadUtils { } /** - * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips - * if there are any. - * - * @param read - * @return A pair with the answer (true/false) and the element or null if it doesn't exist + * @see #readStartsWithInsertion(net.sf.samtools.Cigar, boolean) with ignoreClipOps set to true */ - public static Pair readStartsWithInsertion(GATKSAMRecord read) { - return readStartsWithInsertion(read.getCigar()); + public static CigarElement readStartsWithInsertion(final Cigar cigarForRead) { + return readStartsWithInsertion(cigarForRead, true); } - public static Pair readStartsWithInsertion(final Cigar cigar) { - for (CigarElement cigarElement : cigar.getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.INSERTION) - return new Pair(true, cigarElement); + /** + * Checks if a read starts with an insertion. + * + * @param cigarForRead the CIGAR to evaluate + * @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning? + * @return the element if it's a leading insertion or null otherwise + */ + public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) { + for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) { + if ( cigarElement.getOperator() == CigarOperator.INSERTION ) + return cigarElement; - else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) + else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) ) break; } - return new Pair(false, null); + return null; } /**