Add support for reads starting with insertion
* Modified cleanCigarShift to allow insertions in the beginning and end of the read
* Allowed cigars starting/ending in insertions in the systematic ReadClipper tests
* Updated all ReadClipper unit tests
* ReduceReads does not hard clip leading insertions by default anymore
* SlidingWindow adjusts start location if read starts with insertion
* SlidingWindow creates an empty element with insertions to the right
* Fixed all potential divide by zero with totalCount() (from BaseCounts)
* Updated all Integration tests
* Added new integration test for multiple interval reducing
This commit is contained in:
parent
3ecb9a0bf7
commit
94791a2a75
|
|
@ -70,27 +70,27 @@ public class ClippingOp {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( read.getReadUnmappedFlag() ) {
|
if (read.getReadUnmappedFlag()) {
|
||||||
// we can't process unmapped reads
|
// we can't process unmapped reads
|
||||||
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
||||||
int myStop = stop;
|
int myStop = stop;
|
||||||
if ( (stop + 1 - start) == read.getReadLength() ) {
|
if ((stop + 1 - start) == read.getReadLength()) {
|
||||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
||||||
//break;
|
//break;
|
||||||
myStop--; // just decrement stop
|
myStop--; // just decrement stop
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( start > 0 && myStop != read.getReadLength() - 1 )
|
if (start > 0 && myStop != read.getReadLength() - 1)
|
||||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
||||||
|
|
||||||
Cigar oldCigar = read.getCigar();
|
Cigar oldCigar = read.getCigar();
|
||||||
|
|
||||||
int scLeft = 0, scRight = read.getReadLength();
|
int scLeft = 0, scRight = read.getReadLength();
|
||||||
if ( start == 0 )
|
if (start == 0)
|
||||||
scLeft = myStop + 1;
|
scLeft = myStop + 1;
|
||||||
else
|
else
|
||||||
scRight = start;
|
scRight = start;
|
||||||
|
|
@ -134,8 +134,7 @@ public class ClippingOp {
|
||||||
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
matchesCount = 0;
|
matchesCount = 0;
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
}
|
||||||
if (matchesCount > 0)
|
if (matchesCount > 0)
|
||||||
|
|
@ -284,10 +283,9 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||||
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||||
if (start == 0 && stop == read.getReadLength() - 1)
|
if (start == 0 && stop == read.getReadLength() - 1)
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
|
||||||
|
|
||||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||||
|
|
@ -296,8 +294,8 @@ public class ClippingOp {
|
||||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||||
// starting or ending the read after applying the hard clip on start/stop.
|
// starting or ending the read after applying the hard clip on start/stop.
|
||||||
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
||||||
byte [] newBases = new byte[newLength];
|
byte[] newBases = new byte[newLength];
|
||||||
byte [] newQuals = new byte[newLength];
|
byte[] newQuals = new byte[newLength];
|
||||||
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
||||||
|
|
||||||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||||
|
|
@ -321,11 +319,11 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"!cigar.isEmpty()"})
|
@Requires({"!cigar.isEmpty()"})
|
||||||
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
|
private CigarShift hardClipCigar(Cigar cigar, int start, int stop) {
|
||||||
Cigar newCigar = new Cigar();
|
Cigar newCigar = new Cigar();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int totalHardClipCount = stop - start + 1;
|
int totalHardClipCount = stop - start + 1;
|
||||||
int alignmentShift = 0; // caused by hard clipping insertions or deletions
|
int alignmentShift = 0; // caused by hard clipping deletions
|
||||||
|
|
||||||
// hard clip the beginning of the cigar string
|
// hard clip the beginning of the cigar string
|
||||||
if (start == 0) {
|
if (start == 0) {
|
||||||
|
|
@ -353,7 +351,7 @@ public class ClippingOp {
|
||||||
// element goes beyond what we need to clip
|
// element goes beyond what we need to clip
|
||||||
else if (index + shift > stop + 1) {
|
else if (index + shift > stop + 1) {
|
||||||
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
|
||||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||||
}
|
}
|
||||||
|
|
@ -388,7 +386,7 @@ public class ClippingOp {
|
||||||
if (index + shift < start)
|
if (index + shift < start)
|
||||||
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
|
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
|
||||||
|
|
||||||
// element goes beyond our clip starting position
|
// element goes beyond our clip starting position
|
||||||
else {
|
else {
|
||||||
int elementLengthAfterChopping = start - index;
|
int elementLengthAfterChopping = start - index;
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
|
||||||
|
|
@ -396,7 +394,7 @@ public class ClippingOp {
|
||||||
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
|
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
totalHardClipCount += elementLengthAfterChopping;
|
totalHardClipCount += elementLengthAfterChopping;
|
||||||
// otherwise, maintain what's left of this last operator
|
// otherwise, maintain what's left of this last operator
|
||||||
else
|
else
|
||||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||||
}
|
}
|
||||||
|
|
@ -408,7 +406,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we are hard clipping indels
|
// check if we are hard clipping indels
|
||||||
while(cigarElementIterator.hasNext()) {
|
while (cigarElementIterator.hasNext()) {
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||||
|
|
||||||
|
|
@ -444,34 +442,30 @@ public class ClippingOp {
|
||||||
boolean readHasStarted = false;
|
boolean readHasStarted = false;
|
||||||
boolean addedHardClips = false;
|
boolean addedHardClips = false;
|
||||||
|
|
||||||
while(!cigarStack.empty()) {
|
while (!cigarStack.empty()) {
|
||||||
CigarElement cigarElement = cigarStack.pop();
|
CigarElement cigarElement = cigarStack.pop();
|
||||||
|
|
||||||
if ( !readHasStarted &&
|
if (!readHasStarted &&
|
||||||
cigarElement.getOperator() != CigarOperator.INSERTION &&
|
// cigarElement.getOperator() != CigarOperator.INSERTION &&
|
||||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
readHasStarted = true;
|
readHasStarted = true;
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
shift += cigarElement.getLength();
|
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
if (readHasStarted) {
|
if (readHasStarted) {
|
||||||
if (i==1) {
|
if (i == 1) {
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
addedHardClips = true;
|
addedHardClips = true;
|
||||||
}
|
}
|
||||||
inverseCigarStack.push(cigarElement);
|
inverseCigarStack.push(cigarElement);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
|
|
@ -498,7 +492,7 @@ public class ClippingOp {
|
||||||
int newShift = 0;
|
int newShift = 0;
|
||||||
int oldShift = 0;
|
int oldShift = 0;
|
||||||
|
|
||||||
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
|
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
|
||||||
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
newShift += cigarElement.getLength();
|
newShift += cigarElement.getLength();
|
||||||
|
|
@ -509,7 +503,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
oldShift += cigarElement.getLength();
|
oldShift += cigarElement.getLength();
|
||||||
else if (readHasStarted)
|
else if (readHasStarted)
|
||||||
break;
|
break;
|
||||||
|
|
@ -522,7 +516,7 @@ public class ClippingOp {
|
||||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
return -clippedLength;
|
return -clippedLength;
|
||||||
|
|
||||||
// Deletions should be added to the total hard clip count
|
// Deletions should be added to the total hard clip count
|
||||||
else if (cigarElement.getOperator() == CigarOperator.DELETION)
|
else if (cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
return cigarElement.getLength();
|
return cigarElement.getLength();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -374,24 +374,43 @@ public class ReadClipper {
|
||||||
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
||||||
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
||||||
*
|
*
|
||||||
|
* Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're clipping the
|
||||||
|
* left of right tail) by specifying either refStart < 0 or refStop < 0.
|
||||||
|
*
|
||||||
* @param refStart first base to clip (inclusive)
|
* @param refStart first base to clip (inclusive)
|
||||||
* @param refStop last base to clip (inclusive)
|
* @param refStop last base to clip (inclusive)
|
||||||
* @return a new read, without the clipped bases
|
* @return a new read, without the clipped bases
|
||||||
*/
|
*/
|
||||||
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
|
@Requires({"!read.getReadUnmappedFlag()", "refStart < 0 || refStop < 0"}) // can't handle unmapped reads, as we're using reference coordinates to clip
|
||||||
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
if (read.isEmpty())
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
return read;
|
||||||
|
|
||||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
int start;
|
||||||
return GATKSAMRecord.emptyRead(read);
|
int stop;
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
// Determine the read coordinate to start and stop hard clipping
|
||||||
|
if (refStart < 0) {
|
||||||
|
if (refStop < 0)
|
||||||
|
throw new ReviewedStingException("Only one of refStart or refStop must be < 0, not both (" + refStart + ", " + refStop + ")");
|
||||||
|
start = 0;
|
||||||
|
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (refStop >= 0)
|
||||||
|
throw new ReviewedStingException("Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
|
||||||
|
start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||||
|
stop = read.getReadLength() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ((start == 0 && stop == read.getReadLength() - 1))
|
||||||
|
// return GATKSAMRecord.emptyRead(read);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
if ( start > stop )
|
if ( start > stop )
|
||||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
throw new ReviewedStingException(String.format("START (%d) > (%d) STOP -- this should never happen -- call Mauricio!", start, stop));
|
||||||
|
|
||||||
if ( start > 0 && stop < read.getReadLength() - 1)
|
if ( start > 0 && stop < read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
||||||
|
|
|
||||||
|
|
@ -238,7 +238,7 @@ public class ArtificialSAMUtils {
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||||
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
|
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
|
|
||||||
|
|
@ -277,7 +276,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
*
|
*
|
||||||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftStart() {
|
public int getSoftStart() {
|
||||||
int start = this.getUnclippedStart();
|
int start = this.getUnclippedStart();
|
||||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||||
|
|
@ -286,17 +284,17 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
||||||
*
|
*
|
||||||
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
|
* Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
|
||||||
*
|
*
|
||||||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftEnd() {
|
public int getSoftEnd() {
|
||||||
int stop = this.getUnclippedStart();
|
int stop = this.getUnclippedStart();
|
||||||
|
|
||||||
|
|
@ -313,6 +311,7 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A HashMap of the SAM spec read flag names
|
* A HashMap of the SAM spec read flag names
|
||||||
* <p/>
|
*
|
||||||
* Note: This is not being used right now, but can be useful in the future
|
* Note: This is not being used right now, but can be useful in the future
|
||||||
*/
|
*/
|
||||||
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
||||||
|
|
@ -79,49 +79,47 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This enum represents all the different ways in which a read can overlap an interval.
|
* This enum represents all the different ways in which a read can overlap an interval.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_CONTIG:
|
* NO_OVERLAP_CONTIG:
|
||||||
* read and interval are in different contigs.
|
* read and interval are in different contigs.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_LEFT:
|
* NO_OVERLAP_LEFT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_RIGHT:
|
* NO_OVERLAP_RIGHT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT:
|
* OVERLAP_LEFT:
|
||||||
* the read starts before the beginning of the interval but ends inside of it
|
* the read starts before the beginning of the interval but ends inside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_RIGHT:
|
* OVERLAP_RIGHT:
|
||||||
* the read starts inside the interval but ends outside of it
|
* the read starts inside the interval but ends outside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT_AND_RIGHT:
|
* OVERLAP_LEFT_AND_RIGHT:
|
||||||
* the read starts before the interval and ends after the interval
|
* the read starts before the interval and ends after the interval
|
||||||
* <p/>
|
*
|
||||||
* |-----------| (interval)
|
* |-----------| (interval)
|
||||||
* <-------------------> (read)
|
* <-------------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_CONTAINED:
|
* OVERLAP_CONTAINED:
|
||||||
* the read starts and ends inside the interval
|
* the read starts and ends inside the interval
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <--------> (read)
|
* <--------> (read)
|
||||||
*/
|
*/
|
||||||
public enum ReadAndIntervalOverlap {
|
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
||||||
NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
||||||
|
|
@ -141,15 +139,15 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* is this base inside the adaptor of the read?
|
* is this base inside the adaptor of the read?
|
||||||
* <p/>
|
*
|
||||||
* There are two cases to treat here:
|
* There are two cases to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
||||||
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
||||||
* <p/>
|
*
|
||||||
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
||||||
*
|
*
|
||||||
* @param read the read to test
|
* @param read the read to test
|
||||||
* @param basePos base position in REFERENCE coordinates (not read coordinates)
|
* @param basePos base position in REFERENCE coordinates (not read coordinates)
|
||||||
* @return whether or not the base is in the adaptor
|
* @return whether or not the base is in the adaptor
|
||||||
*/
|
*/
|
||||||
|
|
@ -166,22 +164,22 @@ public class ReadUtils {
|
||||||
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
||||||
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
||||||
* beginning of the fragment.
|
* beginning of the fragment.
|
||||||
* <p/>
|
*
|
||||||
* There are two cases we need to treat here:
|
* There are two cases we need to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Our read is in the reverse strand :
|
* 1) Our read is in the reverse strand :
|
||||||
* <p/>
|
*
|
||||||
* <----------------------| *
|
* <----------------------| *
|
||||||
* |--------------------->
|
* |--------------------->
|
||||||
* <p/>
|
*
|
||||||
* in these cases, the adaptor boundary is at the mate start (minus one)
|
* in these cases, the adaptor boundary is at the mate start (minus one)
|
||||||
* <p/>
|
*
|
||||||
* 2) Our read is in the forward strand :
|
* 2) Our read is in the forward strand :
|
||||||
* <p/>
|
*
|
||||||
* |----------------------> *
|
* |----------------------> *
|
||||||
* <----------------------|
|
* <----------------------|
|
||||||
* <p/>
|
*
|
||||||
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
||||||
*
|
*
|
||||||
* @param read the read being tested for the adaptor boundary
|
* @param read the read being tested for the adaptor boundary
|
||||||
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
|
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
|
||||||
|
|
@ -264,7 +262,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read starts in INSERTION, returns the first element length.
|
* If a read starts in INSERTION, returns the first element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -272,7 +270,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getFirstInsertionOffset(SAMRecord read) {
|
public final static int getFirstInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(0);
|
CigarElement e = read.getCigar().getCigarElement(0);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -280,7 +278,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read ends in INSERTION, returns the last element length.
|
* If a read ends in INSERTION, returns the last element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -288,7 +286,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getLastInsertionOffset(SAMRecord read) {
|
public final static int getLastInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -297,8 +295,7 @@ public class ReadUtils {
|
||||||
/**
|
/**
|
||||||
* Determines what is the position of the read in relation to the interval.
|
* Determines what is the position of the read in relation to the interval.
|
||||||
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
||||||
*
|
* @param read the read
|
||||||
* @param read the read
|
|
||||||
* @param interval the interval
|
* @param interval the interval
|
||||||
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
||||||
*/
|
*/
|
||||||
|
|
@ -309,30 +306,30 @@ public class ReadUtils {
|
||||||
int uStart = read.getUnclippedStart();
|
int uStart = read.getUnclippedStart();
|
||||||
int uStop = read.getUnclippedEnd();
|
int uStop = read.getUnclippedEnd();
|
||||||
|
|
||||||
if (!read.getReferenceName().equals(interval.getContig()))
|
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||||
|
|
||||||
else if (uStop < interval.getStart())
|
else if ( uStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
||||||
|
|
||||||
else if (uStart > interval.getStop())
|
else if ( uStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
||||||
|
|
||||||
else if (sStop < interval.getStart())
|
else if ( sStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
||||||
|
|
||||||
else if (sStart > interval.getStop())
|
else if ( sStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
||||||
|
|
||||||
else if ((sStart >= interval.getStart()) &&
|
else if ( (sStart >= interval.getStart()) &&
|
||||||
(sStop <= interval.getStop()))
|
(sStop <= interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()) &&
|
else if ( (sStart < interval.getStart()) &&
|
||||||
(sStop > interval.getStop()))
|
(sStop > interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()))
|
else if ( (sStart < interval.getStart()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
@ -340,36 +337,52 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
|
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to take care of
|
||||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
* two corner cases:
|
||||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
*
|
||||||
* deletion.
|
* 1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and fall inside
|
||||||
|
* a deletion return the base after the deletion. If clipping the left tail (beginning of the read) it
|
||||||
|
* doesn't matter because it already returns the previous base by default.
|
||||||
|
*
|
||||||
|
* 2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate and the
|
||||||
|
* read starts with an insertion, and you're requesting the first read based coordinate, it will skip
|
||||||
|
* the leading insertion (because it has the same reference coordinate as the following base).
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
* @param refCoord
|
* @param refCoord
|
||||||
* @param tail
|
* @param tail
|
||||||
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||||
*/
|
*/
|
||||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
||||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||||
int readCoord = result.getFirst();
|
int readCoord = result.getFirst();
|
||||||
|
|
||||||
|
// Corner case one: clipping the right tail and falls on deletion, move to the next
|
||||||
|
// read coordinate. It is not a problem for the left tail because the default answer
|
||||||
|
// from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
|
||||||
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||||
readCoord++;
|
readCoord++;
|
||||||
|
|
||||||
|
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||||
|
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||||
|
// end of the read if there is no next element.
|
||||||
|
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(read);
|
||||||
|
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
||||||
|
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1);
|
||||||
|
|
||||||
return readCoord;
|
return readCoord;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the read coordinate corresponding to the requested reference coordinate.
|
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||||
* <p/>
|
*
|
||||||
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||||
* will return the last read base before the deletion. This function returns a
|
* will return the last read base before the deletion. This function returns a
|
||||||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||||
* a deletion.
|
* a deletion.
|
||||||
* <p/>
|
*
|
||||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
||||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||||
* behavior to your needs.
|
* behavior to your needs.
|
||||||
|
|
@ -421,7 +434,7 @@ public class ReadUtils {
|
||||||
if (endsWithinCigar)
|
if (endsWithinCigar)
|
||||||
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||||
|
|
||||||
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
|
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
|
||||||
else {
|
else {
|
||||||
nextCigarElement = cigarElementIterator.next();
|
nextCigarElement = cigarElementIterator.next();
|
||||||
|
|
||||||
|
|
@ -442,13 +455,13 @@ public class ReadUtils {
|
||||||
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||||
readBases += shift;
|
readBases += shift;
|
||||||
|
|
||||||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||||
// base before the deletion (see warning in function contracts)
|
// base before the deletion (see warning in function contracts)
|
||||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||||
readBases += shift - 1;
|
readBases += shift - 1;
|
||||||
|
|
||||||
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
||||||
else if (fallsInsideDeletion && endsWithinCigar)
|
else if (fallsInsideDeletion && endsWithinCigar)
|
||||||
readBases--;
|
readBases--;
|
||||||
}
|
}
|
||||||
|
|
@ -457,7 +470,6 @@ public class ReadUtils {
|
||||||
if (!goalReached)
|
if (!goalReached)
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
|
||||||
|
|
||||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -465,12 +477,11 @@ public class ReadUtils {
|
||||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||||
* comparisons are performed ONLY on the basis of alignment start; any
|
* comparisons are performed ONLY on the basis of alignment start; any
|
||||||
* two SAM records with the same alignment start will be considered equal.
|
* two SAM records with the same alignment start will be considered equal.
|
||||||
* <p/>
|
*
|
||||||
* Unmapped alignments will all be considered equal.
|
* Unmapped alignments will all be considered equal.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Requires({"read1 != null", "read2 != null"})
|
@Requires({"read1 != null", "read2 != null"})
|
||||||
@Ensures("result == 0 || result == 1 || result == -1")
|
|
||||||
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||||
return comp.compare(read1, read2);
|
return comp.compare(read1, read2);
|
||||||
|
|
@ -479,7 +490,7 @@ public class ReadUtils {
|
||||||
/**
|
/**
|
||||||
* Is a base inside a read?
|
* Is a base inside a read?
|
||||||
*
|
*
|
||||||
* @param read the read to evaluate
|
* @param read the read to evaluate
|
||||||
* @param referenceCoordinate the reference coordinate of the base to test
|
* @param referenceCoordinate the reference coordinate of the base to test
|
||||||
* @return true if it is inside the read, false otherwise.
|
* @return true if it is inside the read, false otherwise.
|
||||||
*/
|
*/
|
||||||
|
|
@ -502,4 +513,22 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
|
||||||
|
* if there are any.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
||||||
|
*/
|
||||||
|
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
|
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
||||||
|
|
||||||
|
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return new Pair<Boolean, CigarElement>(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,9 @@ public class ReadClipperTestUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
||||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
||||||
|
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -190,4 +191,18 @@ public class ReadClipperTestUtils {
|
||||||
return invertedCigar;
|
return invertedCigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether or not the read has any cigar element that is not H or S
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return true if it has any M, I or D, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean readHasNonClippedBases(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,10 +59,11 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
int readLength = alnStart - alnEnd;
|
int readLength = alnStart - alnEnd;
|
||||||
for (int i=0; i<readLength/2; i++) {
|
for (int i = 0; i < readLength / 2; i++) {
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
||||||
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clippedRead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -72,12 +73,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
for (int i=0; i<readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
||||||
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength - 1);
|
||||||
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -86,19 +89,27 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
public void testHardClipByReferenceCoordinates() {
|
public void testHardClipByReferenceCoordinates() {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int alnStart = read.getAlignmentStart();
|
int start = read.getSoftStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int stop = read.getSoftEnd();
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
|
||||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
|
||||||
if (!clipLeft.isEmpty())
|
// if (ReadUtils.readIsEntirelyInsertion(read))
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
// System.out.println("debug");
|
||||||
|
|
||||||
|
for (int i = start; i <= stop; i++) {
|
||||||
|
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
|
||||||
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
|
||||||
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
|
||||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -111,10 +122,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
||||||
if (!clipLeft.isEmpty())
|
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
|
||||||
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -127,10 +142,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -145,43 +162,36 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
byte [] quals = new byte[readLength];
|
byte[] quals = new byte[readLength];
|
||||||
|
|
||||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||||
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||||
// create a read with nLowQualBases in the left tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||||
quals[addLeft] = LOW_QUAL;
|
quals[addLeft] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
|
||||||
assertNoLowQualBases(clipLeft, LOW_QUAL);
|
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
|
||||||
// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
|
||||||
|
|
||||||
// create a read with nLowQualBases in the right tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
|
||||||
assertNoLowQualBases(clipRight, LOW_QUAL);
|
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
|
if (nLowQualBases <= readLength / 2) {
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
|
||||||
|
|
||||||
// create a read with nLowQualBases in the both tails
|
|
||||||
if (nLowQualBases <= readLength/2) {
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||||
quals[addBoth] = LOW_QUAL;
|
quals[addBoth] = LOW_QUAL;
|
||||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||||
|
|
@ -189,83 +199,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
// Make sure the low qualities are gone
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
assertNoLowQualBases(clipBoth, LOW_QUAL);
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
|
|
||||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
|
||||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
|
||||||
final String CIGAR = "1S1M5I1S";
|
|
||||||
|
|
||||||
final byte[] CLIPPED_BASES = {};
|
|
||||||
final byte[] CLIPPED_QUALS = {};
|
|
||||||
final String CLIPPED_CIGAR = "";
|
|
||||||
|
|
||||||
|
|
||||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
|
||||||
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
|
||||||
|
|
||||||
ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHardClipSoftClippedBases() {
|
public void testHardClipSoftClippedBases() {
|
||||||
|
|
||||||
// Generate a list of cigars to test
|
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
||||||
|
CigarCounter original = new CigarCounter(read);
|
||||||
|
CigarCounter clipped = new CigarCounter(clippedRead);
|
||||||
|
|
||||||
int sumHardClips = 0;
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
int sumMatches = 0;
|
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
|
||||||
|
|
||||||
boolean tail = true;
|
|
||||||
for (CigarElement element : read.getCigar().getCigarElements()) {
|
|
||||||
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
|
|
||||||
tail = true;
|
|
||||||
|
|
||||||
// Adds all H, S and D's (next to hard/soft clips).
|
|
||||||
// All these should be hard clips after clipping.
|
|
||||||
if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
|
|
||||||
sumHardClips += element.getLength();
|
|
||||||
|
|
||||||
// this means we're no longer on the tail (insertions can still potentially be the tail because
|
|
||||||
// of the current contract of clipping out hanging insertions
|
|
||||||
else if (element.getOperator() != CigarOperator.INSERTION)
|
|
||||||
tail = false;
|
|
||||||
|
|
||||||
// Adds all matches to verify that they remain the same after clipping
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches += element.getLength();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
|
|
||||||
// Test if clipped read has Soft Clips (shouldn't have any!)
|
|
||||||
Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
|
|
||||||
// Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP)
|
|
||||||
sumHardClips -= element.getLength();
|
|
||||||
|
|
||||||
// Make sure all matches are still there
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches -= element.getLength();
|
|
||||||
}
|
|
||||||
Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
|
|
||||||
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
|
|
||||||
|
|
||||||
|
|
||||||
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,38 +228,39 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
||||||
|
|
||||||
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
|
|
||||||
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
||||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||||
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
||||||
|
|
||||||
if (! clippedRead.isEmpty()) {
|
if (!clippedRead.isEmpty()) {
|
||||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||||
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testRevertSoftClippedBases()
|
public void testRevertSoftClippedBases() {
|
||||||
{
|
for (Cigar cigar : cigarList) {
|
||||||
for (Cigar cigar: cigarList) {
|
|
||||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||||
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
||||||
|
|
||||||
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
|
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||||
|
|
||||||
|
if (leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||||
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||||
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
||||||
|
|
||||||
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
||||||
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -315,12 +268,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
|
|
||||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||||
if (!read.isEmpty()) {
|
if (!read.isEmpty()) {
|
||||||
byte [] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
for (int i=0; i<quals.length; i++)
|
for (int i = 0; i < quals.length; i++)
|
||||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
||||||
|
*
|
||||||
|
* @param original
|
||||||
|
* @param clipped
|
||||||
|
*/
|
||||||
|
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
||||||
|
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
||||||
|
Assert.assertEquals(original.getUnclippedStart(), clipped.getUnclippedStart());
|
||||||
|
Assert.assertEquals(original.getUnclippedEnd(), clipped.getUnclippedEnd());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
@ -335,10 +301,46 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
|
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(Cigar cigar) {
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||||
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class CigarCounter {
|
||||||
|
private HashMap<CigarOperator, Integer> counter;
|
||||||
|
|
||||||
|
public Integer getCounterForOp(CigarOperator operator) {
|
||||||
|
return counter.get(operator);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CigarCounter(GATKSAMRecord read) {
|
||||||
|
CigarOperator[] operators = CigarOperator.values();
|
||||||
|
counter = new HashMap<CigarOperator, Integer>(operators.length);
|
||||||
|
|
||||||
|
for (CigarOperator op : operators)
|
||||||
|
counter.put(op, 0);
|
||||||
|
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
counter.put(cigarElement.getOperator(), counter.get(cigarElement.getOperator()) + cigarElement.getLength());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean assertHardClippingSoftClips(CigarCounter clipped) {
|
||||||
|
for (CigarOperator op : counter.keySet()) {
|
||||||
|
if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
|
||||||
|
int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
|
||||||
|
int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
|
||||||
|
int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
|
Assert.assertEquals(counterTotal, clippedHard);
|
||||||
|
Assert.assertTrue(clippedSoft == 0);
|
||||||
|
} else
|
||||||
|
Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue