Cache SoftStart() and SoftEnd() in the GATKSAMRecord

these are costly operations when done repeatedly on the same read.
This commit is contained in:
Mauricio Carneiro 2012-07-02 12:29:31 -04:00
parent 88a02fa2cb
commit 3cea080aa8
2 changed files with 46 additions and 21 deletions

View File

@ -312,6 +312,7 @@ public class ClippingOp {
throw new ReviewedStingException("Where did the clone go?"); throw new ReviewedStingException("Where did the clone go?");
} }
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases); hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(cigarShift.cigar); hardClippedRead.setCigar(cigarShift.cigar);

View File

@ -59,6 +59,8 @@ public class GATKSAMRecord extends BAMRecord {
private String mReadString = null; private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null; private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null; private byte[] reducedReadCounts = null;
private int softStart = -1;
private int softEnd = -1;
// because some values can be null, we don't want to duplicate effort // because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false; private boolean retrievedReadGroup = false;
@ -385,15 +387,17 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped start of the read taking soft clips (but not hard clips) into account * @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftStart() { public int getSoftStart() {
int start = this.getUnclippedStart(); if (softStart < 0) {
for (CigarElement cigarElement : this.getCigar().getCigarElements()) { int start = this.getUnclippedStart();
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
start += cigarElement.getLength(); if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
else start += cigarElement.getLength();
break; else
break;
}
softStart = start;
} }
return softStart;
return start;
} }
/** /**
@ -404,23 +408,43 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped end of the read taking soft clips (but not hard clips) into account * @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftEnd() { public int getSoftEnd() {
int stop = this.getUnclippedStart(); if (softEnd < 0) {
int stop = this.getUnclippedStart();
if (ReadUtils.readIsEntirelyInsertion(this)) if (ReadUtils.readIsEntirelyInsertion(this))
return stop; return stop;
int shift = 0; int shift = 0;
CigarOperator lastOperator = null; CigarOperator lastOperator = null;
for (CigarElement cigarElement : this.getCigar().getCigarElements()) { for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
stop += shift; stop += shift;
lastOperator = cigarElement.getOperator(); lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength(); shift = cigarElement.getLength();
else else
shift = 0; shift = 0;
}
softEnd = (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
} }
return softEnd;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; /**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd() {
softStart = -1;
softEnd = -1;
}
/**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd(int softStart, int softEnd) {
this.softStart = softStart;
this.softEnd = softEnd;
} }
/** /**