From f6929119030c31a1263831099b9115bfdd94ec33 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Dec 2011 17:00:47 -0500 Subject: [PATCH] GATKSAMRecord emptyRead static constructor * Creates an empty GATKSAMRecord with empty (not null) Cigar, bases and quals. Allows empty reads to be probed without breaking. * All ReadClipper utilities now emit empty reads for fully clipped reads --- .../sting/utils/clipping/ClippingOp.java | 4 +- .../sting/utils/clipping/ReadClipper.java | 18 ++++++--- .../sting/utils/sam/GATKSAMRecord.java | 40 +++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 7eb853097..921a0a599 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -286,7 +286,9 @@ public class ClippingOp { @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"}) private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) { if (start == 0 && stop == read.getReadLength() - 1) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); + // If the read is unmapped there is no Cigar string and neither should we create a new cigar string CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index e41e66518..afe7fa975 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -134,7 +134,8 @@ public class ReadClipper { wasClipped = true; ops.clear(); if ( clippedRead.isEmpty() ) - return new GATKSAMRecord( clippedRead.getHeader() ); + return GATKSAMRecord.emptyRead(clippedRead); +// return new GATKSAMRecord( clippedRead.getHeader() ); return clippedRead; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); // this should never happen @@ -186,7 +187,8 @@ public class ReadClipper { "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); @@ -210,13 +212,15 @@ public class ReadClipper { "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); @@ -249,7 +253,8 @@ public class ReadClipper { // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) - return (new GATKSAMRecord(read.getHeader())); + return GATKSAMRecord.emptyRead(read); +// return (new GATKSAMRecord(read.getHeader())); if (rightClipIndex < read.getReadLength() - 1) { this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); @@ -379,7 +384,8 @@ public class ReadClipper { int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) - return new GATKSAMRecord(read.getHeader()); + return GATKSAMRecord.emptyRead(read); +// return new GATKSAMRecord(read.getHeader()); if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index aadfb9ff1..96713edc2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -316,6 +316,46 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Creates an empty GATKSAMRecord with the read's header, read group and mate + * information, but empty (not-null) fields: + * - Cigar String + * - Read Bases + * - Base Qualities + * + * Use this method if you want to create a new empty GATKSAMRecord based on + * another GATKSAMRecord + * + * @param read + * @return + */ + public static GATKSAMRecord emptyRead(GATKSAMRecord read) { + GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(), + read.getReferenceIndex(), + 0, + (short) 0, + (short) 0, + 0, + 0, + read.getFlags(), + 0, + read.getMateReferenceIndex(), + read.getMateAlignmentStart(), + read.getInferredInsertSize(), + null); + emptyRead.setCigarString(""); + emptyRead.setReadBases(new byte[0]); + emptyRead.setBaseQualities(new byte[0]); + + SAMReadGroupRecord samRG = read.getReadGroup(); + emptyRead.clearAttributes(); + if (samRG != null) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); + emptyRead.setReadGroup(rg); + } + + return emptyRead; + } }