From 3cea080aa8a0feab210a0c2e00f73cc30a090ef7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 2 Jul 2012 12:29:31 -0400 Subject: [PATCH] Cache SoftStart() and SoftEnd() in the GATKSAMRecord these are costly operations when done repeatedly on the same read. --- .../sting/utils/clipping/ClippingOp.java | 1 + .../sting/utils/sam/GATKSAMRecord.java | 66 +++++++++++++------ 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index d12d2c2ec..a4383c3ae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -312,6 +312,7 @@ public class ClippingOp { throw new ReviewedStingException("Where did the clone go?"); } + hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End() hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); hardClippedRead.setCigar(cigarShift.cigar); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 5fbe12eed..a925c7577 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -59,6 +59,8 @@ public class GATKSAMRecord extends BAMRecord { private String mReadString = null; private GATKSAMReadGroupRecord mReadGroup = null; private byte[] reducedReadCounts = null; + private int softStart = -1; + private int softEnd = -1; // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; @@ -385,15 +387,17 @@ public class GATKSAMRecord extends BAMRecord { * @return the unclipped start of the read taking soft clips (but not hard clips) into account */ public int getSoftStart() { - int start = this.getUnclippedStart(); - for (CigarElement cigarElement : this.getCigar().getCigarElements()) { - if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) - start += cigarElement.getLength(); - else - break; + if (softStart < 0) { + int start = this.getUnclippedStart(); + for (CigarElement cigarElement : this.getCigar().getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) + start += cigarElement.getLength(); + else + break; + } + softStart = start; } - - return start; + return softStart; } /** @@ -404,23 +408,43 @@ public class GATKSAMRecord extends BAMRecord { * @return the unclipped end of the read taking soft clips (but not hard clips) into account */ public int getSoftEnd() { - int stop = this.getUnclippedStart(); + if (softEnd < 0) { + int stop = this.getUnclippedStart(); - if (ReadUtils.readIsEntirelyInsertion(this)) - return stop; + if (ReadUtils.readIsEntirelyInsertion(this)) + return stop; - int shift = 0; - CigarOperator lastOperator = null; - for (CigarElement cigarElement : this.getCigar().getCigarElements()) { - stop += shift; - lastOperator = cigarElement.getOperator(); - if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) - shift = cigarElement.getLength(); - else - shift = 0; + int shift = 0; + CigarOperator lastOperator = null; + for (CigarElement cigarElement : this.getCigar().getCigarElements()) { + stop += shift; + lastOperator = cigarElement.getOperator(); + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) + shift = cigarElement.getLength(); + else + shift = 0; + } + softEnd = (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + return softEnd; + } - return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; + /** + * If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache + * so that the next call to getSoftStart/End will recalculate it lazily. + */ + public void resetSoftStartAndEnd() { + softStart = -1; + softEnd = -1; + } + + /** + * If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache + * so that the next call to getSoftStart/End will recalculate it lazily. + */ + public void resetSoftStartAndEnd(int softStart, int softEnd) { + this.softStart = softStart; + this.softEnd = softEnd; } /**