Cache SoftStart() and SoftEnd() in the GATKSAMRecord

these are costly operations when done repeatedly on the same read.
This commit is contained in:
Mauricio Carneiro 2012-07-02 12:29:31 -04:00
parent 88a02fa2cb
commit 3cea080aa8
2 changed files with 46 additions and 21 deletions

View File

@ -312,6 +312,7 @@ public class ClippingOp {
throw new ReviewedStingException("Where did the clone go?"); throw new ReviewedStingException("Where did the clone go?");
} }
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases); hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(cigarShift.cigar); hardClippedRead.setCigar(cigarShift.cigar);

View File

@ -59,6 +59,8 @@ public class GATKSAMRecord extends BAMRecord {
private String mReadString = null; private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null; private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null; private byte[] reducedReadCounts = null;
private int softStart = -1;
private int softEnd = -1;
// because some values can be null, we don't want to duplicate effort // because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false; private boolean retrievedReadGroup = false;
@ -385,6 +387,7 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped start of the read taking soft clips (but not hard clips) into account * @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftStart() { public int getSoftStart() {
if (softStart < 0) {
int start = this.getUnclippedStart(); int start = this.getUnclippedStart();
for (CigarElement cigarElement : this.getCigar().getCigarElements()) { for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
@ -392,8 +395,9 @@ public class GATKSAMRecord extends BAMRecord {
else else
break; break;
} }
softStart = start;
return start; }
return softStart;
} }
/** /**
@ -404,6 +408,7 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped end of the read taking soft clips (but not hard clips) into account * @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftEnd() { public int getSoftEnd() {
if (softEnd < 0) {
int stop = this.getUnclippedStart(); int stop = this.getUnclippedStart();
if (ReadUtils.readIsEntirelyInsertion(this)) if (ReadUtils.readIsEntirelyInsertion(this))
@ -419,8 +424,27 @@ public class GATKSAMRecord extends BAMRecord {
else else
shift = 0; shift = 0;
} }
softEnd = (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
return softEnd;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; /**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd() {
softStart = -1;
softEnd = -1;
}
/**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd(int softStart, int softEnd) {
this.softStart = softStart;
this.softEnd = softEnd;
} }
/** /**