Use reads that span multiple intervals
* RR will now compress reads that span across multiple intervals correctly and output them in the correct order. * Fixed bug in getReadCoordinateForReferenceCoordinate where if the requested reference coordinate fell inside a deletion in the read the read would be clipped up to one element past the deletion.
This commit is contained in:
parent
84bd355690
commit
3b6e43b7c4
|
|
@ -249,7 +249,7 @@ public class ClippingOp {
|
||||||
|
|
||||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
||||||
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
|
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
|
||||||
if (start == 0 && stop == read.getReadLength() -1)
|
if (start == 0 && stop == read.getReadLength() - 1)
|
||||||
return new SAMRecord(read.getHeader());
|
return new SAMRecord(read.getHeader());
|
||||||
|
|
||||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
|
@ -68,25 +69,15 @@ public class ReadClipper {
|
||||||
}
|
}
|
||||||
|
|
||||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
if ( start > stop ) {
|
if ( start > stop )
|
||||||
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
|
||||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||||
}
|
|
||||||
|
|
||||||
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
|
||||||
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
|
||||||
// stop -= numDeletions(read);
|
|
||||||
// if ( start > stop )
|
|
||||||
// start -= numDeletions(read);
|
|
||||||
|
|
||||||
|
|
||||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
|
||||||
this.addOp(new ClippingOp(start, stop));
|
this.addOp(new ClippingOp(start, stop));
|
||||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
this.ops = null;
|
this.ops = null;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class AlignmentStartWithNoTiesComparator implements Comparator<SAMRecord> {
|
||||||
|
@Requires("c1 >= 0 && c2 >= 0")
|
||||||
|
@Ensures("result == 0 || result == 1 || result == -1")
|
||||||
|
private int compareContigs(int c1, int c2) {
|
||||||
|
if (c1 == c2)
|
||||||
|
return 0;
|
||||||
|
else if (c1 > c2)
|
||||||
|
return 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("r1 != null && r2 != null")
|
||||||
|
@Ensures("result == 0 || result == 1 || result == -1")
|
||||||
|
public int compare(SAMRecord r1, SAMRecord r2) {
|
||||||
|
int result;
|
||||||
|
|
||||||
|
if (r1 == r2)
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
else if (r1.getReadUnmappedFlag())
|
||||||
|
result = 1;
|
||||||
|
else if (r2.getReadUnmappedFlag())
|
||||||
|
result = -1;
|
||||||
|
else {
|
||||||
|
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
|
||||||
|
|
||||||
|
if (cmpContig != 0)
|
||||||
|
result = cmpContig;
|
||||||
|
|
||||||
|
else {
|
||||||
|
if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
|
||||||
|
else result = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -780,11 +780,56 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public enum ClippingTail {
|
||||||
|
LEFT_TAIL,
|
||||||
|
RIGHT_TAIL
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
|
||||||
|
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
||||||
|
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
||||||
|
* deletion.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @param refCoord
|
||||||
|
* @param tail
|
||||||
|
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||||
|
*/
|
||||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
|
||||||
|
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||||
|
int readCoord = result.getFirst();
|
||||||
|
|
||||||
|
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||||
|
readCoord++;
|
||||||
|
|
||||||
|
return readCoord;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||||
|
*
|
||||||
|
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||||
|
* will return the last read base before the deletion. This function returns a
|
||||||
|
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||||
|
* a deletion.
|
||||||
|
*
|
||||||
|
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
|
||||||
|
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||||
|
* behavior to your needs.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @param refCoord
|
||||||
|
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
|
||||||
|
*/
|
||||||
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||||
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
|
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||||
int readBases = 0;
|
int readBases = 0;
|
||||||
int refBases = 0;
|
int refBases = 0;
|
||||||
|
boolean fallsInsideDeletion = false;
|
||||||
|
|
||||||
if (refCoord < read.getAlignmentStart()) {
|
if (refCoord < read.getAlignmentStart()) {
|
||||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||||
|
|
@ -806,26 +851,56 @@ public class ReadUtils {
|
||||||
int shift = 0;
|
int shift = 0;
|
||||||
|
|
||||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||||
if (refBases + cigarElement.getLength() < goal) {
|
if (refBases + cigarElement.getLength() < goal)
|
||||||
shift = cigarElement.getLength();
|
shift = cigarElement.getLength();
|
||||||
}
|
else
|
||||||
else {
|
|
||||||
shift = goal - refBases;
|
shift = goal - refBases;
|
||||||
}
|
|
||||||
refBases += shift;
|
refBases += shift;
|
||||||
}
|
}
|
||||||
goalReached = refBases == goal;
|
goalReached = refBases == goal;
|
||||||
|
|
||||||
if (cigarElement.getOperator().consumesReadBases()) {
|
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
||||||
readBases += goalReached ? shift : cigarElement.getLength();
|
readBases += cigarElement.getLength();
|
||||||
}
|
|
||||||
|
if (goalReached) {
|
||||||
|
// Is this base's reference position within this cigar element? Or did we use it all?
|
||||||
|
boolean endsWithinCigar = shift < cigarElement.getLength();
|
||||||
|
|
||||||
|
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||||
|
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||||
|
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
||||||
|
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
||||||
|
|
||||||
|
CigarElement nextCigarElement;
|
||||||
|
|
||||||
|
// if we end inside the current cigar element, we just have to check if it is a deletion
|
||||||
|
if (endsWithinCigar)
|
||||||
|
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||||
|
|
||||||
|
// if we end outside the current cigar element, we need to check if the next element is a deletion.
|
||||||
|
else {
|
||||||
|
nextCigarElement = cigarElementIterator.next();
|
||||||
|
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reached our goal outside a deletion, add the shift
|
||||||
|
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||||
|
readBases += shift;
|
||||||
|
|
||||||
|
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||||
|
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||||
|
// base before the deletion (see warning in function contracts)
|
||||||
|
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||||
|
readBases += shift - 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!goalReached)
|
if (!goalReached)
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
}
|
}
|
||||||
|
|
||||||
return readBases;
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||||
|
|
@ -871,4 +946,19 @@ public class ReadUtils {
|
||||||
|
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||||
|
* comparisons are performed ONLY on the basis of alignment start; any
|
||||||
|
* two SAM records with the same alignment start will be considered equal.
|
||||||
|
*
|
||||||
|
* Unmapped alignments will all be considered equal.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Requires({"read1 != null", "read2 != null"})
|
||||||
|
@Ensures("result == 0 || result == 1 || result == -1")
|
||||||
|
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
|
||||||
|
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||||
|
return comp.compare(read1, read2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue