Allowing to clip after AlignmentEnd if end is soft clipped.
Read clipper now identifies and clips even if the requested coordinate is outside the alignment but the read contains soft clipped bases in that region.
This commit is contained in:
parent
90a1f5e15c
commit
7532be7f5a
|
|
@ -632,7 +632,7 @@ public class ReadUtils {
|
||||||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||||
|
|
||||||
int start = getRefCoordSoftUnclippedStart(read);
|
int start = getRefCoordSoftUnclippedStart(read);
|
||||||
int stop = getRefCoordSoftUnclippedStop(read);
|
int stop = getRefCoordSoftUnclippedEnd(read);
|
||||||
|
|
||||||
if ( !read.getReferenceName().equals(interval.getContig()) )
|
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||||
|
|
@ -658,6 +658,7 @@ public class ReadUtils {
|
||||||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||||
int start = read.getUnclippedStart();
|
int start = read.getUnclippedStart();
|
||||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
|
@ -669,51 +670,93 @@ public class ReadUtils {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int getRefCoordSoftUnclippedStop(SAMRecord read) {
|
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||||
int stop = read.getAlignmentEnd();
|
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||||
List<CigarElement> cigarElementList = read.getCigar().getCigarElements();
|
int stop = read.getUnclippedStart();
|
||||||
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1);
|
int shift = 0;
|
||||||
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
stop += lastCigarElement.getLength();
|
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
return stop;
|
shift += cigarElement.getLength();
|
||||||
|
|
||||||
|
return (shift == 0) ? stop : stop+shift-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
||||||
|
* the alignment start of the read.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @param refCoord
|
||||||
|
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||||
|
*/
|
||||||
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
|
||||||
|
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
|
||||||
|
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
|
||||||
|
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
|
||||||
|
* the alignment end of the read.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @param refCoord
|
||||||
|
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||||
|
*/
|
||||||
|
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
|
||||||
|
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
|
||||||
|
if (getRefCoordSoftUnclippedEnd(read) > refCoord)
|
||||||
|
return refCoord - read.getAlignmentEnd() + 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
|
|
||||||
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||||
int readBases = 0;
|
int readBases = 0;
|
||||||
int refBases = 0;
|
int refBases = 0;
|
||||||
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
|
||||||
boolean goalReached = refBases == goal;
|
|
||||||
|
|
||||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
if (refCoord < read.getAlignmentStart()) {
|
||||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||||
CigarElement cigarElement = cigarElementIterator.next();
|
if (readBases < 0)
|
||||||
int shift = 0;
|
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||||
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
|
||||||
// goal -= cigarElement.getLength();
|
|
||||||
//}
|
|
||||||
|
|
||||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
|
||||||
if (refBases + cigarElement.getLength() < goal) {
|
|
||||||
shift = cigarElement.getLength();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
shift = goal - refBases;
|
|
||||||
}
|
|
||||||
refBases += shift;
|
|
||||||
}
|
|
||||||
goalReached = refBases == goal;
|
|
||||||
|
|
||||||
if (cigarElement.getOperator().consumesReadBases()) {
|
|
||||||
readBases += goalReached ? shift : cigarElement.getLength();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if (refCoord > read.getAlignmentEnd()) {
|
||||||
|
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
|
||||||
|
if (readBases < 0)
|
||||||
|
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
||||||
|
boolean goalReached = refBases == goal;
|
||||||
|
|
||||||
if (!goalReached)
|
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||||
|
CigarElement cigarElement = cigarElementIterator.next();
|
||||||
|
int shift = 0;
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||||
|
if (refBases + cigarElement.getLength() < goal) {
|
||||||
|
shift = cigarElement.getLength();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
shift = goal - refBases;
|
||||||
|
}
|
||||||
|
refBases += shift;
|
||||||
|
}
|
||||||
|
goalReached = refBases == goal;
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReadBases()) {
|
||||||
|
readBases += goalReached ? shift : cigarElement.getLength();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!goalReached)
|
||||||
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
}
|
||||||
|
|
||||||
return readBases;
|
return readBases;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue