diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index c28e205bf..74d8a8180 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; import java.io.PrintStream; @@ -300,7 +299,7 @@ public class ClipReadsWalker extends ReadWalker= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read - public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { + private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); @@ -208,15 +210,17 @@ public class ReadClipper { @Requires({"left <= right", // tails cannot overlap "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read - public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { + private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); @@ -235,7 +239,7 @@ public class ReadClipper { * @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped * @return a new read without low quality tails */ - public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { + private GATKSAMRecord hardClipLowQualEnds(byte lowQual) { if (read.isEmpty()) return read; @@ -249,7 +253,8 @@ public class ReadClipper { // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) - return (new GATKSAMRecord(read.getHeader())); + return GATKSAMRecord.emptyRead(read); +// return (new GATKSAMRecord(read.getHeader())); if (rightClipIndex < read.getReadLength() - 1) { this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); @@ -269,7 +274,7 @@ public class ReadClipper { * * @return a new read without the soft clipped bases */ - public GATKSAMRecord hardClipSoftClippedBases () { + private GATKSAMRecord hardClipSoftClippedBases () { if (read.isEmpty()) return read; @@ -314,7 +319,7 @@ public class ReadClipper { * * @return a new read without adaptor sequence */ - public GATKSAMRecord hardClipAdaptorSequence () { + private GATKSAMRecord hardClipAdaptorSequence () { final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read); if (adaptorBoundary == null || !ReadUtils.isInsideRead(read, adaptorBoundary)) @@ -332,7 +337,7 @@ public class ReadClipper { * * @return a new read without leading insertions */ - public GATKSAMRecord hardClipLeadingInsertions() { + private GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; @@ -357,7 +362,7 @@ public class ReadClipper { * * @return a new read with every soft clip turned into a match */ - public GATKSAMRecord revertSoftClippedBases() { + private GATKSAMRecord revertSoftClippedBases() { this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); } @@ -379,7 +384,8 @@ public class ReadClipper { int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index e8df869e6..96713edc2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -45,7 +45,6 @@ import java.util.Map; */ public class GATKSAMRecord extends BAMRecord { public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; - public static final String REDUCED_READ_FILTERED_TAG = "RF"; // the SAMRecord data we're caching private String mReadString = null; @@ -317,6 +316,46 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Creates an empty GATKSAMRecord with the read's header, read group and mate + * information, but empty (not-null) fields: + * - Cigar String + * - Read Bases + * - Base Qualities + * + * Use this method if you want to create a new empty GATKSAMRecord based on + * another GATKSAMRecord + * + * @param read + * @return + */ + public static GATKSAMRecord emptyRead(GATKSAMRecord read) { + GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(), + read.getReferenceIndex(), + 0, + (short) 0, + (short) 0, + 0, + 0, + read.getFlags(), + 0, + read.getMateReferenceIndex(), + read.getMateAlignmentStart(), + read.getInferredInsertSize(), + null); + emptyRead.setCigarString(""); + emptyRead.setReadBases(new byte[0]); + emptyRead.setBaseQualities(new byte[0]); + + SAMReadGroupRecord samRG = read.getReadGroup(); + emptyRead.clearAttributes(); + if (samRG != null) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); + emptyRead.setReadGroup(rg); + } + + return emptyRead; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index d1d5ddd8f..4dad68dc5 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -60,7 +60,7 @@ public class ReadClipperUnitTest extends BaseTest { int alnEnd = read.getAlignmentEnd(); int readLength = alnStart - alnEnd; for (int i=0; i= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); } @@ -73,10 +73,10 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); for (int i=0; i %s", i, read.getCigarString(), clipLeft.getCigarString())); - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1); + GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1); Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString())); } } @@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest { int alnEnd = read.getAlignmentEnd(); if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i=alnStart; i<=alnEnd; i++) { - GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); } @@ -126,9 +126,9 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i=alnStart; i<=alnEnd; i++) { - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); } @@ -154,7 +154,7 @@ public class ReadClipperUnitTest extends BaseTest { for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL; read.setBaseQualities(quals); - GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -162,14 +162,14 @@ public class ReadClipperUnitTest extends BaseTest { assertNoLowQualBases(clipLeft, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions - //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); +// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); // create a read with nLowQualBases in the right tail Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addRight = 0; addRight < nLowQualBases; addRight++) quals[readLength - addRight - 1] = LOW_QUAL; read.setBaseQualities(quals); - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -187,7 +187,7 @@ public class ReadClipperUnitTest extends BaseTest { quals[readLength - addBoth - 1] = LOW_QUAL; } read.setBaseQualities(quals); - GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -214,8 +214,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR); GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR); - ReadClipper lowQualClipper = new ReadClipper(read); - ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); + ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected); } @Test(enabled = true) @@ -224,8 +223,7 @@ public class ReadClipperUnitTest extends BaseTest { // Generate a list of cigars to test for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - ReadClipper readClipper = new ReadClipper(read); - GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases(); + GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read); int sumHardClips = 0; int sumMatches = 0; @@ -276,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest { for (Cigar cigar : cigarList) { if (startsWithInsertion(cigar)) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions(); + GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read); int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION); if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) @@ -300,7 +298,7 @@ public class ReadClipperUnitTest extends BaseTest { final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP); final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - final GATKSAMRecord unclipped = (new ReadClipper(read)).revertSoftClippedBases(); + final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read); if ( leadingSoftClips > 0 || tailSoftClips > 0) { final int expectedStart = read.getAlignmentStart() - leadingSoftClips;