Use reads that span multiple intervals
* RR will now compress reads that span across multiple intervals correctly and output them in the correct order. * Fixed bug in getReadCoordinateForReferenceCoordinate where if the requested reference coordinate fell inside a deletion in the read the read would be clipped up to one element past the deletion.
This commit is contained in:
parent
84bd355690
commit
3b6e43b7c4
|
|
@ -249,7 +249,7 @@ public class ClippingOp {
|
|||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
||||
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
|
||||
if (start == 0 && stop == read.getReadLength() -1)
|
||||
if (start == 0 && stop == read.getReadLength() - 1)
|
||||
return new SAMRecord(read.getHeader());
|
||||
|
||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -68,25 +69,15 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
if ( start > stop ) {
|
||||
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||
if ( start > stop )
|
||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||
}
|
||||
|
||||
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
||||
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
||||
// stop -= numDeletions(read);
|
||||
// if ( start > stop )
|
||||
// start -= numDeletions(read);
|
||||
|
||||
|
||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class AlignmentStartWithNoTiesComparator implements Comparator<SAMRecord> {
|
||||
@Requires("c1 >= 0 && c2 >= 0")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
private int compareContigs(int c1, int c2) {
|
||||
if (c1 == c2)
|
||||
return 0;
|
||||
else if (c1 > c2)
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Requires("r1 != null && r2 != null")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public int compare(SAMRecord r1, SAMRecord r2) {
|
||||
int result;
|
||||
|
||||
if (r1 == r2)
|
||||
result = 0;
|
||||
|
||||
else if (r1.getReadUnmappedFlag())
|
||||
result = 1;
|
||||
else if (r2.getReadUnmappedFlag())
|
||||
result = -1;
|
||||
else {
|
||||
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
|
||||
|
||||
if (cmpContig != 0)
|
||||
result = cmpContig;
|
||||
|
||||
else {
|
||||
if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
|
||||
else result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -780,11 +780,56 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
|
||||
public enum ClippingTail {
|
||||
LEFT_TAIL,
|
||||
RIGHT_TAIL
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
|
||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
||||
* deletion.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @param tail
|
||||
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
|
||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||
int readCoord = result.getFirst();
|
||||
|
||||
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||
readCoord++;
|
||||
|
||||
return readCoord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||
*
|
||||
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||
* will return the last read base before the deletion. This function returns a
|
||||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||
* a deletion.
|
||||
*
|
||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
|
||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||
* behavior to your needs.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
boolean fallsInsideDeletion = false;
|
||||
|
||||
if (refCoord < read.getAlignmentStart()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||
|
|
@ -806,26 +851,56 @@ public class ReadUtils {
|
|||
int shift = 0;
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
if (refBases + cigarElement.getLength() < goal)
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
else
|
||||
shift = goal - refBases;
|
||||
}
|
||||
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += cigarElement.getLength();
|
||||
|
||||
if (goalReached) {
|
||||
// Is this base's reference position within this cigar element? Or did we use it all?
|
||||
boolean endsWithinCigar = shift < cigarElement.getLength();
|
||||
|
||||
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
||||
|
||||
CigarElement nextCigarElement;
|
||||
|
||||
// if we end inside the current cigar element, we just have to check if it is a deletion
|
||||
if (endsWithinCigar)
|
||||
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||
|
||||
// if we end outside the current cigar element, we need to check if the next element is a deletion.
|
||||
else {
|
||||
nextCigarElement = cigarElementIterator.next();
|
||||
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
||||
}
|
||||
|
||||
// If we reached our goal outside a deletion, add the shift
|
||||
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += shift;
|
||||
|
||||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||
// base before the deletion (see warning in function contracts)
|
||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||
readBases += shift - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
}
|
||||
|
||||
return readBases;
|
||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||
}
|
||||
|
||||
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||
|
|
@ -871,4 +946,19 @@ public class ReadUtils {
|
|||
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||
* comparisons are performed ONLY on the basis of alignment start; any
|
||||
* two SAM records with the same alignment start will be considered equal.
|
||||
*
|
||||
* Unmapped alignments will all be considered equal.
|
||||
*/
|
||||
|
||||
@Requires({"read1 != null", "read2 != null"})
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
|
||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||
return comp.compare(read1, read2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue