Add support for reads starting with insertion
* Modified cleanCigarShift to allow insertions in the beginning and end of the read
* Allowed cigars starting/ending in insertions in the systematic ReadClipper tests
* Updated all ReadClipper unit tests
* ReduceReads does not hard clip leading insertions by default anymore
* SlidingWindow adjusts start location if read starts with insertion
* SlidingWindow creates an empty element with insertions to the right
* Fixed all potential divide by zero with totalCount() (from BaseCounts)
* Updated all Integration tests
* Added new integration test for multiple interval reducing
This commit is contained in:
parent
3ecb9a0bf7
commit
94791a2a75
|
|
@ -70,27 +70,27 @@ public class ClippingOp {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( read.getReadUnmappedFlag() ) {
|
if (read.getReadUnmappedFlag()) {
|
||||||
// we can't process unmapped reads
|
// we can't process unmapped reads
|
||||||
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
||||||
int myStop = stop;
|
int myStop = stop;
|
||||||
if ( (stop + 1 - start) == read.getReadLength() ) {
|
if ((stop + 1 - start) == read.getReadLength()) {
|
||||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
||||||
//break;
|
//break;
|
||||||
myStop--; // just decrement stop
|
myStop--; // just decrement stop
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( start > 0 && myStop != read.getReadLength() - 1 )
|
if (start > 0 && myStop != read.getReadLength() - 1)
|
||||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
||||||
|
|
||||||
Cigar oldCigar = read.getCigar();
|
Cigar oldCigar = read.getCigar();
|
||||||
|
|
||||||
int scLeft = 0, scRight = read.getReadLength();
|
int scLeft = 0, scRight = read.getReadLength();
|
||||||
if ( start == 0 )
|
if (start == 0)
|
||||||
scLeft = myStop + 1;
|
scLeft = myStop + 1;
|
||||||
else
|
else
|
||||||
scRight = start;
|
scRight = start;
|
||||||
|
|
@ -134,8 +134,7 @@ public class ClippingOp {
|
||||||
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
matchesCount = 0;
|
matchesCount = 0;
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
}
|
||||||
if (matchesCount > 0)
|
if (matchesCount > 0)
|
||||||
|
|
@ -284,10 +283,9 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||||
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||||
if (start == 0 && stop == read.getReadLength() - 1)
|
if (start == 0 && stop == read.getReadLength() - 1)
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
|
||||||
|
|
||||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||||
|
|
@ -296,8 +294,8 @@ public class ClippingOp {
|
||||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||||
// starting or ending the read after applying the hard clip on start/stop.
|
// starting or ending the read after applying the hard clip on start/stop.
|
||||||
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
||||||
byte [] newBases = new byte[newLength];
|
byte[] newBases = new byte[newLength];
|
||||||
byte [] newQuals = new byte[newLength];
|
byte[] newQuals = new byte[newLength];
|
||||||
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
||||||
|
|
||||||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||||
|
|
@ -321,11 +319,11 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"!cigar.isEmpty()"})
|
@Requires({"!cigar.isEmpty()"})
|
||||||
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
|
private CigarShift hardClipCigar(Cigar cigar, int start, int stop) {
|
||||||
Cigar newCigar = new Cigar();
|
Cigar newCigar = new Cigar();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int totalHardClipCount = stop - start + 1;
|
int totalHardClipCount = stop - start + 1;
|
||||||
int alignmentShift = 0; // caused by hard clipping insertions or deletions
|
int alignmentShift = 0; // caused by hard clipping deletions
|
||||||
|
|
||||||
// hard clip the beginning of the cigar string
|
// hard clip the beginning of the cigar string
|
||||||
if (start == 0) {
|
if (start == 0) {
|
||||||
|
|
@ -353,7 +351,7 @@ public class ClippingOp {
|
||||||
// element goes beyond what we need to clip
|
// element goes beyond what we need to clip
|
||||||
else if (index + shift > stop + 1) {
|
else if (index + shift > stop + 1) {
|
||||||
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
|
||||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||||
}
|
}
|
||||||
|
|
@ -408,7 +406,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we are hard clipping indels
|
// check if we are hard clipping indels
|
||||||
while(cigarElementIterator.hasNext()) {
|
while (cigarElementIterator.hasNext()) {
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||||
|
|
||||||
|
|
@ -444,34 +442,30 @@ public class ClippingOp {
|
||||||
boolean readHasStarted = false;
|
boolean readHasStarted = false;
|
||||||
boolean addedHardClips = false;
|
boolean addedHardClips = false;
|
||||||
|
|
||||||
while(!cigarStack.empty()) {
|
while (!cigarStack.empty()) {
|
||||||
CigarElement cigarElement = cigarStack.pop();
|
CigarElement cigarElement = cigarStack.pop();
|
||||||
|
|
||||||
if ( !readHasStarted &&
|
if (!readHasStarted &&
|
||||||
cigarElement.getOperator() != CigarOperator.INSERTION &&
|
// cigarElement.getOperator() != CigarOperator.INSERTION &&
|
||||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
readHasStarted = true;
|
readHasStarted = true;
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
shift += cigarElement.getLength();
|
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
if (readHasStarted) {
|
if (readHasStarted) {
|
||||||
if (i==1) {
|
if (i == 1) {
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
addedHardClips = true;
|
addedHardClips = true;
|
||||||
}
|
}
|
||||||
inverseCigarStack.push(cigarElement);
|
inverseCigarStack.push(cigarElement);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
|
|
@ -509,7 +503,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
oldShift += cigarElement.getLength();
|
oldShift += cigarElement.getLength();
|
||||||
else if (readHasStarted)
|
else if (readHasStarted)
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -374,24 +374,43 @@ public class ReadClipper {
|
||||||
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
||||||
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
||||||
*
|
*
|
||||||
|
* Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're clipping the
|
||||||
|
* left of right tail) by specifying either refStart < 0 or refStop < 0.
|
||||||
|
*
|
||||||
* @param refStart first base to clip (inclusive)
|
* @param refStart first base to clip (inclusive)
|
||||||
* @param refStop last base to clip (inclusive)
|
* @param refStop last base to clip (inclusive)
|
||||||
* @return a new read, without the clipped bases
|
* @return a new read, without the clipped bases
|
||||||
*/
|
*/
|
||||||
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
|
@Requires({"!read.getReadUnmappedFlag()", "refStart < 0 || refStop < 0"}) // can't handle unmapped reads, as we're using reference coordinates to clip
|
||||||
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
if (read.isEmpty())
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
return read;
|
||||||
|
|
||||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
int start;
|
||||||
return GATKSAMRecord.emptyRead(read);
|
int stop;
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
// Determine the read coordinate to start and stop hard clipping
|
||||||
|
if (refStart < 0) {
|
||||||
|
if (refStop < 0)
|
||||||
|
throw new ReviewedStingException("Only one of refStart or refStop must be < 0, not both (" + refStart + ", " + refStop + ")");
|
||||||
|
start = 0;
|
||||||
|
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (refStop >= 0)
|
||||||
|
throw new ReviewedStingException("Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
|
||||||
|
start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||||
|
stop = read.getReadLength() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ((start == 0 && stop == read.getReadLength() - 1))
|
||||||
|
// return GATKSAMRecord.emptyRead(read);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
if ( start > stop )
|
if ( start > stop )
|
||||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
throw new ReviewedStingException(String.format("START (%d) > (%d) STOP -- this should never happen -- call Mauricio!", start, stop));
|
||||||
|
|
||||||
if ( start > 0 && stop < read.getReadLength() - 1)
|
if ( start > 0 && stop < read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
||||||
|
|
|
||||||
|
|
@ -238,7 +238,7 @@ public class ArtificialSAMUtils {
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||||
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
|
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
|
|
||||||
|
|
@ -277,7 +276,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
*
|
*
|
||||||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftStart() {
|
public int getSoftStart() {
|
||||||
int start = this.getUnclippedStart();
|
int start = this.getUnclippedStart();
|
||||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||||
|
|
@ -286,17 +284,17 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
||||||
*
|
*
|
||||||
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
|
* Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
|
||||||
*
|
*
|
||||||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftEnd() {
|
public int getSoftEnd() {
|
||||||
int stop = this.getUnclippedStart();
|
int stop = this.getUnclippedStart();
|
||||||
|
|
||||||
|
|
@ -313,6 +311,7 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A HashMap of the SAM spec read flag names
|
* A HashMap of the SAM spec read flag names
|
||||||
* <p/>
|
*
|
||||||
* Note: This is not being used right now, but can be useful in the future
|
* Note: This is not being used right now, but can be useful in the future
|
||||||
*/
|
*/
|
||||||
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
||||||
|
|
@ -79,49 +79,47 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This enum represents all the different ways in which a read can overlap an interval.
|
* This enum represents all the different ways in which a read can overlap an interval.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_CONTIG:
|
* NO_OVERLAP_CONTIG:
|
||||||
* read and interval are in different contigs.
|
* read and interval are in different contigs.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_LEFT:
|
* NO_OVERLAP_LEFT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_RIGHT:
|
* NO_OVERLAP_RIGHT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT:
|
* OVERLAP_LEFT:
|
||||||
* the read starts before the beginning of the interval but ends inside of it
|
* the read starts before the beginning of the interval but ends inside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_RIGHT:
|
* OVERLAP_RIGHT:
|
||||||
* the read starts inside the interval but ends outside of it
|
* the read starts inside the interval but ends outside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT_AND_RIGHT:
|
* OVERLAP_LEFT_AND_RIGHT:
|
||||||
* the read starts before the interval and ends after the interval
|
* the read starts before the interval and ends after the interval
|
||||||
* <p/>
|
*
|
||||||
* |-----------| (interval)
|
* |-----------| (interval)
|
||||||
* <-------------------> (read)
|
* <-------------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_CONTAINED:
|
* OVERLAP_CONTAINED:
|
||||||
* the read starts and ends inside the interval
|
* the read starts and ends inside the interval
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <--------> (read)
|
* <--------> (read)
|
||||||
*/
|
*/
|
||||||
public enum ReadAndIntervalOverlap {
|
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
||||||
NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
||||||
|
|
@ -141,12 +139,12 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* is this base inside the adaptor of the read?
|
* is this base inside the adaptor of the read?
|
||||||
* <p/>
|
*
|
||||||
* There are two cases to treat here:
|
* There are two cases to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
||||||
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
||||||
* <p/>
|
*
|
||||||
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
||||||
*
|
*
|
||||||
* @param read the read to test
|
* @param read the read to test
|
||||||
|
|
@ -166,21 +164,21 @@ public class ReadUtils {
|
||||||
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
||||||
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
||||||
* beginning of the fragment.
|
* beginning of the fragment.
|
||||||
* <p/>
|
*
|
||||||
* There are two cases we need to treat here:
|
* There are two cases we need to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Our read is in the reverse strand :
|
* 1) Our read is in the reverse strand :
|
||||||
* <p/>
|
*
|
||||||
* <----------------------| *
|
* <----------------------| *
|
||||||
* |--------------------->
|
* |--------------------->
|
||||||
* <p/>
|
*
|
||||||
* in these cases, the adaptor boundary is at the mate start (minus one)
|
* in these cases, the adaptor boundary is at the mate start (minus one)
|
||||||
* <p/>
|
*
|
||||||
* 2) Our read is in the forward strand :
|
* 2) Our read is in the forward strand :
|
||||||
* <p/>
|
*
|
||||||
* |----------------------> *
|
* |----------------------> *
|
||||||
* <----------------------|
|
* <----------------------|
|
||||||
* <p/>
|
*
|
||||||
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
||||||
*
|
*
|
||||||
* @param read the read being tested for the adaptor boundary
|
* @param read the read being tested for the adaptor boundary
|
||||||
|
|
@ -264,7 +262,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read starts in INSERTION, returns the first element length.
|
* If a read starts in INSERTION, returns the first element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -272,7 +270,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getFirstInsertionOffset(SAMRecord read) {
|
public final static int getFirstInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(0);
|
CigarElement e = read.getCigar().getCigarElement(0);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -280,7 +278,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read ends in INSERTION, returns the last element length.
|
* If a read ends in INSERTION, returns the last element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -288,7 +286,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getLastInsertionOffset(SAMRecord read) {
|
public final static int getLastInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -297,7 +295,6 @@ public class ReadUtils {
|
||||||
/**
|
/**
|
||||||
* Determines what is the position of the read in relation to the interval.
|
* Determines what is the position of the read in relation to the interval.
|
||||||
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
||||||
*
|
|
||||||
* @param read the read
|
* @param read the read
|
||||||
* @param interval the interval
|
* @param interval the interval
|
||||||
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
||||||
|
|
@ -309,30 +306,30 @@ public class ReadUtils {
|
||||||
int uStart = read.getUnclippedStart();
|
int uStart = read.getUnclippedStart();
|
||||||
int uStop = read.getUnclippedEnd();
|
int uStop = read.getUnclippedEnd();
|
||||||
|
|
||||||
if (!read.getReferenceName().equals(interval.getContig()))
|
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||||
|
|
||||||
else if (uStop < interval.getStart())
|
else if ( uStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
||||||
|
|
||||||
else if (uStart > interval.getStop())
|
else if ( uStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
||||||
|
|
||||||
else if (sStop < interval.getStart())
|
else if ( sStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
||||||
|
|
||||||
else if (sStart > interval.getStop())
|
else if ( sStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
||||||
|
|
||||||
else if ((sStart >= interval.getStart()) &&
|
else if ( (sStart >= interval.getStart()) &&
|
||||||
(sStop <= interval.getStop()))
|
(sStop <= interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()) &&
|
else if ( (sStart < interval.getStart()) &&
|
||||||
(sStop > interval.getStop()))
|
(sStop > interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()))
|
else if ( (sStart < interval.getStart()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
@ -340,36 +337,52 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
|
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to take care of
|
||||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
* two corner cases:
|
||||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
*
|
||||||
* deletion.
|
* 1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and fall inside
|
||||||
|
* a deletion return the base after the deletion. If clipping the left tail (beginning of the read) it
|
||||||
|
* doesn't matter because it already returns the previous base by default.
|
||||||
|
*
|
||||||
|
* 2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate and the
|
||||||
|
* read starts with an insertion, and you're requesting the first read based coordinate, it will skip
|
||||||
|
* the leading insertion (because it has the same reference coordinate as the following base).
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
* @param refCoord
|
* @param refCoord
|
||||||
* @param tail
|
* @param tail
|
||||||
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||||
*/
|
*/
|
||||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
||||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||||
int readCoord = result.getFirst();
|
int readCoord = result.getFirst();
|
||||||
|
|
||||||
|
// Corner case one: clipping the right tail and falls on deletion, move to the next
|
||||||
|
// read coordinate. It is not a problem for the left tail because the default answer
|
||||||
|
// from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
|
||||||
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||||
readCoord++;
|
readCoord++;
|
||||||
|
|
||||||
|
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||||
|
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||||
|
// end of the read if there is no next element.
|
||||||
|
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(read);
|
||||||
|
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
||||||
|
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1);
|
||||||
|
|
||||||
return readCoord;
|
return readCoord;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the read coordinate corresponding to the requested reference coordinate.
|
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||||
* <p/>
|
*
|
||||||
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||||
* will return the last read base before the deletion. This function returns a
|
* will return the last read base before the deletion. This function returns a
|
||||||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||||
* a deletion.
|
* a deletion.
|
||||||
* <p/>
|
*
|
||||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
||||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||||
* behavior to your needs.
|
* behavior to your needs.
|
||||||
|
|
@ -457,7 +470,6 @@ public class ReadUtils {
|
||||||
if (!goalReached)
|
if (!goalReached)
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
|
||||||
|
|
||||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -465,12 +477,11 @@ public class ReadUtils {
|
||||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||||
* comparisons are performed ONLY on the basis of alignment start; any
|
* comparisons are performed ONLY on the basis of alignment start; any
|
||||||
* two SAM records with the same alignment start will be considered equal.
|
* two SAM records with the same alignment start will be considered equal.
|
||||||
* <p/>
|
*
|
||||||
* Unmapped alignments will all be considered equal.
|
* Unmapped alignments will all be considered equal.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Requires({"read1 != null", "read2 != null"})
|
@Requires({"read1 != null", "read2 != null"})
|
||||||
@Ensures("result == 0 || result == 1 || result == -1")
|
|
||||||
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||||
return comp.compare(read1, read2);
|
return comp.compare(read1, read2);
|
||||||
|
|
@ -502,4 +513,22 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
|
||||||
|
* if there are any.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
||||||
|
*/
|
||||||
|
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
|
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
||||||
|
|
||||||
|
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return new Pair<Boolean, CigarElement>(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,8 @@ public class ReadClipperTestUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
||||||
|
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
||||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -190,4 +191,18 @@ public class ReadClipperTestUtils {
|
||||||
return invertedCigar;
|
return invertedCigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether or not the read has any cigar element that is not H or S
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return true if it has any M, I or D, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean readHasNonClippedBases(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,10 +59,11 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
int readLength = alnStart - alnEnd;
|
int readLength = alnStart - alnEnd;
|
||||||
for (int i=0; i<readLength/2; i++) {
|
for (int i = 0; i < readLength / 2; i++) {
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
||||||
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clippedRead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -72,12 +73,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
for (int i=0; i<readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
||||||
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength - 1);
|
||||||
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -86,19 +89,27 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
public void testHardClipByReferenceCoordinates() {
|
public void testHardClipByReferenceCoordinates() {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int alnStart = read.getAlignmentStart();
|
int start = read.getSoftStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int stop = read.getSoftEnd();
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
|
||||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
|
||||||
if (!clipLeft.isEmpty())
|
// if (ReadUtils.readIsEntirelyInsertion(read))
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
// System.out.println("debug");
|
||||||
|
|
||||||
|
for (int i = start; i <= stop; i++) {
|
||||||
|
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
|
||||||
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
|
||||||
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
|
||||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -111,10 +122,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
||||||
if (!clipLeft.isEmpty())
|
|
||||||
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -127,10 +142,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -145,43 +162,36 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
byte [] quals = new byte[readLength];
|
byte[] quals = new byte[readLength];
|
||||||
|
|
||||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||||
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||||
// create a read with nLowQualBases in the left tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||||
quals[addLeft] = LOW_QUAL;
|
quals[addLeft] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
|
||||||
assertNoLowQualBases(clipLeft, LOW_QUAL);
|
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
|
||||||
// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
|
||||||
|
|
||||||
// create a read with nLowQualBases in the right tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
|
||||||
assertNoLowQualBases(clipRight, LOW_QUAL);
|
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
|
if (nLowQualBases <= readLength / 2) {
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
|
||||||
|
|
||||||
// create a read with nLowQualBases in the both tails
|
|
||||||
if (nLowQualBases <= readLength/2) {
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||||
quals[addBoth] = LOW_QUAL;
|
quals[addBoth] = LOW_QUAL;
|
||||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||||
|
|
@ -189,83 +199,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
// Make sure the low qualities are gone
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
assertNoLowQualBases(clipBoth, LOW_QUAL);
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
|
|
||||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
|
||||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
|
||||||
final String CIGAR = "1S1M5I1S";
|
|
||||||
|
|
||||||
final byte[] CLIPPED_BASES = {};
|
|
||||||
final byte[] CLIPPED_QUALS = {};
|
|
||||||
final String CLIPPED_CIGAR = "";
|
|
||||||
|
|
||||||
|
|
||||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
|
||||||
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
|
||||||
|
|
||||||
ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHardClipSoftClippedBases() {
|
public void testHardClipSoftClippedBases() {
|
||||||
|
|
||||||
// Generate a list of cigars to test
|
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
||||||
|
CigarCounter original = new CigarCounter(read);
|
||||||
|
CigarCounter clipped = new CigarCounter(clippedRead);
|
||||||
|
|
||||||
int sumHardClips = 0;
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
int sumMatches = 0;
|
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
|
||||||
|
|
||||||
boolean tail = true;
|
|
||||||
for (CigarElement element : read.getCigar().getCigarElements()) {
|
|
||||||
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
|
|
||||||
tail = true;
|
|
||||||
|
|
||||||
// Adds all H, S and D's (next to hard/soft clips).
|
|
||||||
// All these should be hard clips after clipping.
|
|
||||||
if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
|
|
||||||
sumHardClips += element.getLength();
|
|
||||||
|
|
||||||
// this means we're no longer on the tail (insertions can still potentially be the tail because
|
|
||||||
// of the current contract of clipping out hanging insertions
|
|
||||||
else if (element.getOperator() != CigarOperator.INSERTION)
|
|
||||||
tail = false;
|
|
||||||
|
|
||||||
// Adds all matches to verify that they remain the same after clipping
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches += element.getLength();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
|
|
||||||
// Test if clipped read has Soft Clips (shouldn't have any!)
|
|
||||||
Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
|
|
||||||
// Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP)
|
|
||||||
sumHardClips -= element.getLength();
|
|
||||||
|
|
||||||
// Make sure all matches are still there
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches -= element.getLength();
|
|
||||||
}
|
|
||||||
Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
|
|
||||||
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
|
|
||||||
|
|
||||||
|
|
||||||
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,38 +228,39 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
||||||
|
|
||||||
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
|
|
||||||
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
||||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||||
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
||||||
|
|
||||||
if (! clippedRead.isEmpty()) {
|
if (!clippedRead.isEmpty()) {
|
||||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||||
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testRevertSoftClippedBases()
|
public void testRevertSoftClippedBases() {
|
||||||
{
|
for (Cigar cigar : cigarList) {
|
||||||
for (Cigar cigar: cigarList) {
|
|
||||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||||
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
||||||
|
|
||||||
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
|
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||||
|
|
||||||
|
if (leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||||
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||||
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
||||||
|
|
||||||
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
||||||
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -315,12 +268,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
|
|
||||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||||
if (!read.isEmpty()) {
|
if (!read.isEmpty()) {
|
||||||
byte [] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
for (int i=0; i<quals.length; i++)
|
for (int i = 0; i < quals.length; i++)
|
||||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
||||||
|
*
|
||||||
|
* @param original
|
||||||
|
* @param clipped
|
||||||
|
*/
|
||||||
|
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
||||||
|
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
||||||
|
Assert.assertEquals(original.getUnclippedStart(), clipped.getUnclippedStart());
|
||||||
|
Assert.assertEquals(original.getUnclippedEnd(), clipped.getUnclippedEnd());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
@ -335,10 +301,46 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
|
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(Cigar cigar) {
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||||
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class CigarCounter {
|
||||||
|
private HashMap<CigarOperator, Integer> counter;
|
||||||
|
|
||||||
|
public Integer getCounterForOp(CigarOperator operator) {
|
||||||
|
return counter.get(operator);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CigarCounter(GATKSAMRecord read) {
|
||||||
|
CigarOperator[] operators = CigarOperator.values();
|
||||||
|
counter = new HashMap<CigarOperator, Integer>(operators.length);
|
||||||
|
|
||||||
|
for (CigarOperator op : operators)
|
||||||
|
counter.put(op, 0);
|
||||||
|
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
counter.put(cigarElement.getOperator(), counter.get(cigarElement.getOperator()) + cigarElement.getLength());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean assertHardClippingSoftClips(CigarCounter clipped) {
|
||||||
|
for (CigarOperator op : counter.keySet()) {
|
||||||
|
if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
|
||||||
|
int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
|
||||||
|
int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
|
||||||
|
int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
|
Assert.assertEquals(counterTotal, clippedHard);
|
||||||
|
Assert.assertTrue(clippedSoft == 0);
|
||||||
|
} else
|
||||||
|
Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue