Fixed memory leak and bug with deletions in clipping

The ClippingOp clip cigar function would run into a endless loop if the parameter were out of the reads range, I stopped the bug.
* There is no check to make sure the read coordinate are covered by the read though
When Hard clipping to interval, I added a check for deletions.
NOTE: method works for NA12878 WEx but needs to be more thoroughly tested/optimized
This commit is contained in:
Roger Zurawicki 2011-09-18 19:21:51 -04:00
parent 9a6b1f2681
commit 091c7197cd
2 changed files with 26 additions and 3 deletions

View File

@ -324,6 +324,8 @@ public class ClippingOp {
if (index <= stop && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
else
break;
}
// add the remaining cigar elements
@ -363,6 +365,8 @@ public class ClippingOp {
index += shift;
if (index < start && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
else
break;
}
// check if we are hard clipping indels

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.utils.clipreads;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -56,17 +58,34 @@ public class ReadClipper {
return hardClipByReferenceCoordinates(refStart, -1);
}
private int numDeletions(SAMRecord read) {
int result = 0;
for (CigarElement e: read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
result =+ e.getLength();
}
return result;
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
if (start < 0 || stop > read.getReadLength() - 1)
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add requires statement/check in the Hardclip function
// TODO add check in the Hardclip function
if ( start > stop )
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
stop -= numDeletions(read);
if ( start > stop )
start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);