Fixed memory leak and bug with deletions in clipping
The ClippingOp clip cigar function would run into a endless loop if the parameter were out of the reads range, I stopped the bug. * There is no check to make sure the read coordinate are covered by the read though When Hard clipping to interval, I added a check for deletions. NOTE: method works for NA12878 WEx but needs to be more thoroughly tested/optimized
This commit is contained in:
parent
9a6b1f2681
commit
091c7197cd
|
|
@ -324,6 +324,8 @@ public class ClippingOp {
|
||||||
|
|
||||||
if (index <= stop && cigarElementIterator.hasNext())
|
if (index <= stop && cigarElementIterator.hasNext())
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
|
else
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the remaining cigar elements
|
// add the remaining cigar elements
|
||||||
|
|
@ -363,6 +365,8 @@ public class ClippingOp {
|
||||||
index += shift;
|
index += shift;
|
||||||
if (index < start && cigarElementIterator.hasNext())
|
if (index < start && cigarElementIterator.hasNext())
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
|
else
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we are hard clipping indels
|
// check if we are hard clipping indels
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
package org.broadinstitute.sting.utils.clipreads;
|
package org.broadinstitute.sting.utils.clipreads;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -56,17 +58,34 @@ public class ReadClipper {
|
||||||
return hardClipByReferenceCoordinates(refStart, -1);
|
return hardClipByReferenceCoordinates(refStart, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int numDeletions(SAMRecord read) {
|
||||||
|
int result = 0;
|
||||||
|
for (CigarElement e: read.getCigar().getCigarElements()) {
|
||||||
|
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
|
||||||
|
result =+ e.getLength();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
// TODO add requires statement/check in the Hardclip function
|
// TODO add check in the Hardclip function
|
||||||
if ( start > stop )
|
if ( start > stop )
|
||||||
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||||
|
|
||||||
|
|
||||||
|
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
||||||
|
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
||||||
|
stop -= numDeletions(read);
|
||||||
|
if ( start > stop )
|
||||||
|
start -= numDeletions(read);
|
||||||
|
|
||||||
|
|
||||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||||
this.addOp(new ClippingOp(start, stop));
|
this.addOp(new ClippingOp(start, stop));
|
||||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue