diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index d934bb972..d438de900 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -422,13 +422,24 @@ public class ReadClipper { /** * Reverts only soft clipped bases with quality score greater than or equal to minQual * - * Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR') + * todo -- Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR') -- THIS HAS BEEN REMOVED TEMPORARILY SHOULD HAPPEN INSIDE THE CLIPPING ROUTINE! * * @param read the read * @param minQual the mininum base quality score to revert the base (inclusive) - * @return the read with high quality soft clips reverted + * @return a new read with high quality soft clips reverted */ public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) { + return revertSoftClippedBases(hardClipLowQualitySoftClips(read, minQual)); + } + + /** + * Hard clips away soft clipped bases that are below the given quality threshold + * + * @param read the read + * @param minQual the mininum base quality score to revert the base (inclusive) + * @return a new read without low quality soft clipped bases + */ + public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) { int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart(); if (read.isEmpty() || nLeadingSoftClips > read.getReadLength()) return GATKSAMRecord.emptyRead(read); @@ -457,17 +468,12 @@ public class ReadClipper { } GATKSAMRecord clippedRead = read; - if (right >= 0) { - if (right + 1 < clippedRead.getReadLength()) - clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail - clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing - } - if (left >= 0) { - if (left - 1 > 0) - clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail - clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing - } - return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all + if (right >= 0 && right + 1 < clippedRead.getReadLength()) // only clip if there are softclipped bases (right >= 0) and the first high quality soft clip is not the last base (right+1 < readlength) + clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the right tail + if (left >= 0 && left - 1 > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality soft clip is not the last base (left-1 > 0) + clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the left tail + + return clippedRead; } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index b6cc97c9f..5fbe12eed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -194,12 +194,35 @@ public class GATKSAMRecord extends BAMRecord { } } + /** + * @return whether or not this read has base insertion or deletion qualities (one of the two is sufficient to return true) + */ public boolean hasBaseIndelQualities() { return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null; } + /** + * @return the base deletion quality or null if read doesn't have one + */ + public byte[] getExistingBaseInsertionQualities() { + return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_INSERTION_QUALITIES)); + } + + /** + * @return the base deletion quality or null if read doesn't have one + */ + public byte[] getExistingBaseDeletionQualities() { + return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_DELETION_QUALITIES)); + } + + /** + * Default utility to query the base insertion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45) + * and assigns it to the read. + * + * @return the base insertion quality array + */ public byte[] getBaseInsertionQualities() { - byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) ); + byte [] quals = getExistingBaseInsertionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will @@ -209,8 +232,14 @@ public class GATKSAMRecord extends BAMRecord { return quals; } + /** + * Default utility to query the base deletion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45) + * and assigns it to the read. + * + * @return the base deletion quality array + */ public byte[] getBaseDeletionQualities() { - byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) ); + byte[] quals = getExistingBaseDeletionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will @@ -478,6 +507,7 @@ public class GATKSAMRecord extends BAMRecord { public Object clone() throws CloneNotSupportedException { final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); if (temporaryAttributes != null) { + clone.temporaryAttributes = new HashMap(); for (Object attribute : temporaryAttributes.keySet()) clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index bc918c0a4..44c9ab255 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -45,7 +45,7 @@ import java.util.List; public class ReadClipperUnitTest extends BaseTest { List cigarList; - int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 + int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @BeforeClass public void init() { @@ -92,22 +92,15 @@ public class ReadClipperUnitTest extends BaseTest { int start = read.getSoftStart(); int stop = read.getSoftEnd(); -// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop)); - -// if (ReadUtils.readIsEntirelyInsertion(read)) -// System.out.println("debug"); - for (int i = start; i <= stop; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { -// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString())); Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); - if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. -// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString())); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } @@ -121,7 +114,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); @@ -141,7 +134,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. @@ -165,7 +158,7 @@ public class ReadClipperUnitTest extends BaseTest { byte[] quals = new byte[readLength]; for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) { - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail + Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL; read.setBaseQualities(quals); @@ -252,7 +245,7 @@ public class ReadClipperUnitTest extends BaseTest { final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read); - assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed + assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed if (leadingSoftClips > 0 || tailSoftClips > 0) { final int expectedStart = read.getAlignmentStart() - leadingSoftClips; @@ -265,6 +258,25 @@ public class ReadClipperUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testRevertSoftClippedBasesWithThreshold() { + for (Cigar cigar : cigarList) { + final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP); + final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP); + + final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read); + + assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed + Assert.assertNull(read.getCigar().isValid(null, -1)); + Assert.assertNull(unclipped.getCigar().isValid(null, -1)); + + if (!(leadingSoftClips > 0 || tailSoftClips > 0)) + Assert.assertEquals(read.getCigarString(), unclipped.getCigarString()); + + } + } + private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { if (!read.isEmpty()) {