From 4633637af61da1586c892a6b0588171279d2a596 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 26 Dec 2011 01:05:24 -0500 Subject: [PATCH 04/14] Moved ReduceReads to static ReadClipper * all clipping done in ReduceReads is done using the static methods of the ReadClipper now. --- ivy.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ivy.xml b/ivy.xml index ee24bc367..ef885d24d 100644 --- a/ivy.xml +++ b/ivy.xml @@ -78,6 +78,9 @@ + + + From f7a57520253c7c87d8730f740acccd13a0ad586d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 26 Dec 2011 21:55:02 -0500 Subject: [PATCH 06/14] Let this one slip through my commits. --- ivy.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ivy.xml b/ivy.xml index ef885d24d..ee24bc367 100644 --- a/ivy.xml +++ b/ivy.xml @@ -78,9 +78,6 @@ - - - From 17bfe48d5efafb29e18922dd6c3bb681912c3e34 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Dec 2011 01:29:13 -0500 Subject: [PATCH 08/14] Made all class methods private in the ReadClipper * ReadClipperUnitTest now uses static methods * Haplotype caller now uses static methods * Exon Junction Genotyper now uses static methods --- .../sting/gatk/walkers/ClipReadsWalker.java | 3 +- .../sting/utils/clipping/ReadClipper.java | 18 +++++------ .../utils/clipping/ReadClipperUnitTest.java | 30 +++++++++---------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index c28e205bf..74d8a8180 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; import java.io.PrintStream; @@ -300,7 +299,7 @@ public class ClipReadsWalker extends ReadWalker= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read - public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { + private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return new GATKSAMRecord(read.getHeader()); @@ -208,7 +208,7 @@ public class ReadClipper { @Requires({"left <= right", // tails cannot overlap "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read - public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { + private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); @@ -235,7 +235,7 @@ public class ReadClipper { * @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped * @return a new read without low quality tails */ - public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { + private GATKSAMRecord hardClipLowQualEnds(byte lowQual) { if (read.isEmpty()) return read; @@ -269,7 +269,7 @@ public class ReadClipper { * * @return a new read without the soft clipped bases */ - public GATKSAMRecord hardClipSoftClippedBases () { + private GATKSAMRecord hardClipSoftClippedBases () { if (read.isEmpty()) return read; @@ -314,7 +314,7 @@ public class ReadClipper { * * @return a new read without adaptor sequence */ - public GATKSAMRecord hardClipAdaptorSequence () { + private GATKSAMRecord hardClipAdaptorSequence () { final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read); if (adaptorBoundary == null || !ReadUtils.isInsideRead(read, adaptorBoundary)) @@ -332,7 +332,7 @@ public class ReadClipper { * * @return a new read without leading insertions */ - public GATKSAMRecord hardClipLeadingInsertions() { + private GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; @@ -357,7 +357,7 @@ public class ReadClipper { * * @return a new read with every soft clip turned into a match */ - public GATKSAMRecord revertSoftClippedBases() { + private GATKSAMRecord revertSoftClippedBases() { this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index d1d5ddd8f..4dad68dc5 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -60,7 +60,7 @@ public class ReadClipperUnitTest extends BaseTest { int alnEnd = read.getAlignmentEnd(); int readLength = alnStart - alnEnd; for (int i=0; i= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); } @@ -73,10 +73,10 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); for (int i=0; i %s", i, read.getCigarString(), clipLeft.getCigarString())); - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1); + GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1); Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString())); } } @@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest { int alnEnd = read.getAlignmentEnd(); if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i=alnStart; i<=alnEnd; i++) { - GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); } @@ -126,9 +126,9 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); - if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i=alnStart; i<=alnEnd; i++) { - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); } @@ -154,7 +154,7 @@ public class ReadClipperUnitTest extends BaseTest { for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL; read.setBaseQualities(quals); - GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -162,14 +162,14 @@ public class ReadClipperUnitTest extends BaseTest { assertNoLowQualBases(clipLeft, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions - //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); +// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); // create a read with nLowQualBases in the right tail Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addRight = 0; addRight < nLowQualBases; addRight++) quals[readLength - addRight - 1] = LOW_QUAL; read.setBaseQualities(quals); - GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -187,7 +187,7 @@ public class ReadClipperUnitTest extends BaseTest { quals[readLength - addBoth - 1] = LOW_QUAL; } read.setBaseQualities(quals); - GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL); + GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); // Tests @@ -214,8 +214,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR); GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR); - ReadClipper lowQualClipper = new ReadClipper(read); - ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); + ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected); } @Test(enabled = true) @@ -224,8 +223,7 @@ public class ReadClipperUnitTest extends BaseTest { // Generate a list of cigars to test for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - ReadClipper readClipper = new ReadClipper(read); - GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases(); + GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read); int sumHardClips = 0; int sumMatches = 0; @@ -276,7 +274,7 @@ public class ReadClipperUnitTest extends BaseTest { for (Cigar cigar : cigarList) { if (startsWithInsertion(cigar)) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions(); + GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read); int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION); if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) @@ -300,7 +298,7 @@ public class ReadClipperUnitTest extends BaseTest { final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP); final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); - final GATKSAMRecord unclipped = (new ReadClipper(read)).revertSoftClippedBases(); + final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read); if ( leadingSoftClips > 0 || tailSoftClips > 0) { final int expectedStart = read.getAlignmentStart() - leadingSoftClips; From 8259c748f228ba2a5b5175f6b468682bc6da32bb Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Dec 2011 14:54:35 -0500 Subject: [PATCH 10/14] No more Filtered Reads tag. All synthetic reads are marked with the reduced read tag. --- .../src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java | 1 - 1 file changed, 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index e8df869e6..aadfb9ff1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -45,7 +45,6 @@ import java.util.Map; */ public class GATKSAMRecord extends BAMRecord { public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; - public static final String REDUCED_READ_FILTERED_TAG = "RF"; // the SAMRecord data we're caching private String mReadString = null; From f6929119030c31a1263831099b9115bfdd94ec33 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Dec 2011 17:00:47 -0500 Subject: [PATCH 11/14] GATKSAMRecord emptyRead static constructor * Creates an empty GATKSAMRecord with empty (not null) Cigar, bases and quals. Allows empty reads to be probed without breaking. * All ReadClipper utilities now emit empty reads for fully clipped reads --- .../sting/utils/clipping/ClippingOp.java | 4 +- .../sting/utils/clipping/ReadClipper.java | 18 ++++++--- .../sting/utils/sam/GATKSAMRecord.java | 40 +++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 7eb853097..921a0a599 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -286,7 +286,9 @@ public class ClippingOp { @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"}) private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) { if (start == 0 && stop == read.getReadLength() - 1) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); + // If the read is unmapped there is no Cigar string and neither should we create a new cigar string CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index e41e66518..afe7fa975 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -134,7 +134,8 @@ public class ReadClipper { wasClipped = true; ops.clear(); if ( clippedRead.isEmpty() ) - return new GATKSAMRecord( clippedRead.getHeader() ); + return GATKSAMRecord.emptyRead(clippedRead); +// return new GATKSAMRecord( clippedRead.getHeader() ); return clippedRead; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); // this should never happen @@ -186,7 +187,8 @@ public class ReadClipper { "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); @@ -210,13 +212,15 @@ public class ReadClipper { "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); @@ -249,7 +253,8 @@ public class ReadClipper { // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) - return (new GATKSAMRecord(read.getHeader())); + return GATKSAMRecord.emptyRead(read); +// return (new GATKSAMRecord(read.getHeader())); if (rightClipIndex < read.getReadLength() - 1) { this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); @@ -379,7 +384,8 @@ public class ReadClipper { int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index aadfb9ff1..96713edc2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -316,6 +316,46 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Creates an empty GATKSAMRecord with the read's header, read group and mate + * information, but empty (not-null) fields: + * - Cigar String + * - Read Bases + * - Base Qualities + * + * Use this method if you want to create a new empty GATKSAMRecord based on + * another GATKSAMRecord + * + * @param read + * @return + */ + public static GATKSAMRecord emptyRead(GATKSAMRecord read) { + GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(), + read.getReferenceIndex(), + 0, + (short) 0, + (short) 0, + 0, + 0, + read.getFlags(), + 0, + read.getMateReferenceIndex(), + read.getMateAlignmentStart(), + read.getInferredInsertSize(), + null); + emptyRead.setCigarString(""); + emptyRead.setReadBases(new byte[0]); + emptyRead.setBaseQualities(new byte[0]); + + SAMReadGroupRecord samRG = read.getReadGroup(); + emptyRead.clearAttributes(); + if (samRG != null) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); + emptyRead.setReadGroup(rg); + } + + return emptyRead; + } }