diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
index 921a0a599..fb133d902 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
@@ -70,27 +70,27 @@ public class ClippingOp {
break;
case SOFTCLIP_BASES:
- if ( read.getReadUnmappedFlag() ) {
+ if (read.getReadUnmappedFlag()) {
// we can't process unmapped reads
throw new UserException("Read Clipper cannot soft clip unmapped reads");
}
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
int myStop = stop;
- if ( (stop + 1 - start) == read.getReadLength() ) {
+ if ((stop + 1 - start) == read.getReadLength()) {
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
//break;
myStop--; // just decrement stop
}
- if ( start > 0 && myStop != read.getReadLength() - 1 )
+ if (start > 0 && myStop != read.getReadLength() - 1)
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
Cigar oldCigar = read.getCigar();
int scLeft = 0, scRight = read.getReadLength();
- if ( start == 0 )
+ if (start == 0)
scLeft = myStop + 1;
else
scRight = start;
@@ -134,8 +134,7 @@ public class ClippingOp {
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
matchesCount = 0;
unclippedCigar.add(element);
- }
- else
+ } else
unclippedCigar.add(element);
}
if (matchesCount > 0)
@@ -284,10 +283,9 @@ public class ClippingOp {
}
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
- private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
+ private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1)
return GATKSAMRecord.emptyRead(read);
-// return new GATKSAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
@@ -296,8 +294,8 @@ public class ClippingOp {
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
- byte [] newBases = new byte[newLength];
- byte [] newQuals = new byte[newLength];
+ byte[] newBases = new byte[newLength];
+ byte[] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
@@ -321,11 +319,11 @@ public class ClippingOp {
}
@Requires({"!cigar.isEmpty()"})
- private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
+ private CigarShift hardClipCigar(Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
- int alignmentShift = 0; // caused by hard clipping insertions or deletions
+ int alignmentShift = 0; // caused by hard clipping deletions
// hard clip the beginning of the cigar string
if (start == 0) {
@@ -353,7 +351,7 @@ public class ClippingOp {
// element goes beyond what we need to clip
else if (index + shift > stop + 1) {
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
- alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
+ alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
@@ -388,7 +386,7 @@ public class ClippingOp {
if (index + shift < start)
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
- // element goes beyond our clip starting position
+ // element goes beyond our clip starting position
else {
int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
@@ -396,7 +394,7 @@ public class ClippingOp {
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += elementLengthAfterChopping;
- // otherwise, maintain what's left of this last operator
+ // otherwise, maintain what's left of this last operator
else
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
@@ -408,7 +406,7 @@ public class ClippingOp {
}
// check if we are hard clipping indels
- while(cigarElementIterator.hasNext()) {
+ while (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
@@ -444,34 +442,30 @@ public class ClippingOp {
boolean readHasStarted = false;
boolean addedHardClips = false;
- while(!cigarStack.empty()) {
+ while (!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
- if ( !readHasStarted &&
- cigarElement.getOperator() != CigarOperator.INSERTION &&
+ if (!readHasStarted &&
+// cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
- else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
+ else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClip += cigarElement.getLength();
- else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
- shift += cigarElement.getLength();
-
- else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
+ else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
totalHardClip += cigarElement.getLength();
if (readHasStarted) {
- if (i==1) {
+ if (i == 1) {
if (!addedHardClips) {
if (totalHardClip > 0)
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
addedHardClips = true;
}
inverseCigarStack.push(cigarElement);
- }
- else {
+ } else {
if (!addedHardClips) {
if (totalHardClip > 0)
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
@@ -498,7 +492,7 @@ public class ClippingOp {
int newShift = 0;
int oldShift = 0;
- boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
+ boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
newShift += cigarElement.getLength();
@@ -509,7 +503,7 @@ public class ClippingOp {
}
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
- if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
+ if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
oldShift += cigarElement.getLength();
else if (readHasStarted)
break;
@@ -522,7 +516,7 @@ public class ClippingOp {
if (cigarElement.getOperator() == CigarOperator.INSERTION)
return -clippedLength;
- // Deletions should be added to the total hard clip count
+ // Deletions should be added to the total hard clip count
else if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength();
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
index afe7fa975..7a664bd61 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
@@ -374,24 +374,43 @@ public class ReadClipper {
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
*
+ * Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're clipping the
+ * left of right tail) by specifying either refStart < 0 or refStop < 0.
+ *
* @param refStart first base to clip (inclusive)
* @param refStop last base to clip (inclusive)
* @return a new read, without the clipped bases
*/
- @Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
+ @Requires({"!read.getReadUnmappedFlag()", "refStart < 0 || refStop < 0"}) // can't handle unmapped reads, as we're using reference coordinates to clip
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
- int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
- int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
+ if (read.isEmpty())
+ return read;
- if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
- return GATKSAMRecord.emptyRead(read);
-// return new GATKSAMRecord(read.getHeader());
+ int start;
+ int stop;
+
+ // Determine the read coordinate to start and stop hard clipping
+ if (refStart < 0) {
+ if (refStop < 0)
+ throw new ReviewedStingException("Only one of refStart or refStop must be < 0, not both (" + refStart + ", " + refStop + ")");
+ start = 0;
+ stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
+ }
+ else {
+ if (refStop >= 0)
+ throw new ReviewedStingException("Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
+ start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
+ stop = read.getReadLength() - 1;
+ }
+
+// if ((start == 0 && stop == read.getReadLength() - 1))
+// return GATKSAMRecord.emptyRead(read);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop )
- throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
+ throw new ReviewedStingException(String.format("START (%d) > (%d) STOP -- this should never happen -- call Mauricio!", start, stop));
if ( start > 0 && stop < read.getReadLength() - 1)
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
index cedd56bdf..542adea77 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@@ -238,7 +238,7 @@ public class ArtificialSAMUtils {
*/
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
- return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
+ return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 96713edc2..5e0802fa6 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -24,7 +24,6 @@
package org.broadinstitute.sting.utils.sam;
-import com.google.java.contract.Ensures;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.NGSPlatform;
@@ -277,7 +276,6 @@ public class GATKSAMRecord extends BAMRecord {
*
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/
- @Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftStart() {
int start = this.getUnclippedStart();
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
@@ -286,17 +284,17 @@ public class GATKSAMRecord extends BAMRecord {
else
break;
}
+
return start;
}
/**
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
*
- * Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
+ * Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
*
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/
- @Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftEnd() {
int stop = this.getUnclippedStart();
@@ -313,6 +311,7 @@ public class GATKSAMRecord extends BAMRecord {
else
shift = 0;
}
+
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
index f2e54713f..d52814ef7 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@@ -58,7 +58,7 @@ public class ReadUtils {
/**
* A HashMap of the SAM spec read flag names
- *
+ *
* Note: This is not being used right now, but can be useful in the future
*/
private static final Map readFlagNames = new HashMap();
@@ -79,49 +79,47 @@ public class ReadUtils {
/**
* This enum represents all the different ways in which a read can overlap an interval.
- *
+ *
* NO_OVERLAP_CONTIG:
* read and interval are in different contigs.
- *
+ *
* NO_OVERLAP_LEFT:
* the read does not overlap the interval.
- *
- * |----------------| (interval)
- * <----------------> (read)
- *
+ *
+ * |----------------| (interval)
+ * <----------------> (read)
+ *
* NO_OVERLAP_RIGHT:
* the read does not overlap the interval.
- *
- * |----------------| (interval)
- * <----------------> (read)
- *
+ *
+ * |----------------| (interval)
+ * <----------------> (read)
+ *
* OVERLAP_LEFT:
* the read starts before the beginning of the interval but ends inside of it
- *
- * |----------------| (interval)
- * <----------------> (read)
- *
+ *
+ * |----------------| (interval)
+ * <----------------> (read)
+ *
* OVERLAP_RIGHT:
* the read starts inside the interval but ends outside of it
- *
- * |----------------| (interval)
- * <----------------> (read)
- *
+ *
+ * |----------------| (interval)
+ * <----------------> (read)
+ *
* OVERLAP_LEFT_AND_RIGHT:
* the read starts before the interval and ends after the interval
- *
- * |-----------| (interval)
- * <-------------------> (read)
- *
+ *
+ * |-----------| (interval)
+ * <-------------------> (read)
+ *
* OVERLAP_CONTAINED:
* the read starts and ends inside the interval
- *
- * |----------------| (interval)
- * <--------> (read)
+ *
+ * |----------------| (interval)
+ * <--------> (read)
*/
- public enum ReadAndIntervalOverlap {
- NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED
- }
+ public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/**
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
@@ -141,15 +139,15 @@ public class ReadUtils {
/**
* is this base inside the adaptor of the read?
- *
+ *
* There are two cases to treat here:
- *
+ *
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
- *
+ *
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
*
- * @param read the read to test
+ * @param read the read to test
* @param basePos base position in REFERENCE coordinates (not read coordinates)
* @return whether or not the base is in the adaptor
*/
@@ -166,22 +164,22 @@ public class ReadUtils {
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
* beginning of the fragment.
- *
+ *
* There are two cases we need to treat here:
- *
+ *
* 1) Our read is in the reverse strand :
- *
- * <----------------------| *
- * |--------------------->
- *
- * in these cases, the adaptor boundary is at the mate start (minus one)
- *
+ *
+ * <----------------------| *
+ * |--------------------->
+ *
+ * in these cases, the adaptor boundary is at the mate start (minus one)
+ *
* 2) Our read is in the forward strand :
- *
- * |----------------------> *
- * <----------------------|
- *
- * in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
+ *
+ * |----------------------> *
+ * <----------------------|
+ *
+ * in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
*
* @param read the read being tested for the adaptor boundary
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
@@ -264,7 +262,7 @@ public class ReadUtils {
/**
* If a read starts in INSERTION, returns the first element length.
- *
+ *
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
*
* @param read
@@ -272,7 +270,7 @@ public class ReadUtils {
*/
public final static int getFirstInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(0);
- if (e.getOperator() == CigarOperator.I)
+ if ( e.getOperator() == CigarOperator.I )
return e.getLength();
else
return 0;
@@ -280,7 +278,7 @@ public class ReadUtils {
/**
* If a read ends in INSERTION, returns the last element length.
- *
+ *
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
*
* @param read
@@ -288,7 +286,7 @@ public class ReadUtils {
*/
public final static int getLastInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
- if (e.getOperator() == CigarOperator.I)
+ if ( e.getOperator() == CigarOperator.I )
return e.getLength();
else
return 0;
@@ -297,8 +295,7 @@ public class ReadUtils {
/**
* Determines what is the position of the read in relation to the interval.
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
- *
- * @param read the read
+ * @param read the read
* @param interval the interval
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
*/
@@ -309,30 +306,30 @@ public class ReadUtils {
int uStart = read.getUnclippedStart();
int uStop = read.getUnclippedEnd();
- if (!read.getReferenceName().equals(interval.getContig()))
+ if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
- else if (uStop < interval.getStart())
+ else if ( uStop < interval.getStart() )
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
- else if (uStart > interval.getStop())
+ else if ( uStart > interval.getStop() )
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
- else if (sStop < interval.getStart())
+ else if ( sStop < interval.getStart() )
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
- else if (sStart > interval.getStop())
+ else if ( sStart > interval.getStop() )
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
- else if ((sStart >= interval.getStart()) &&
- (sStop <= interval.getStop()))
+ else if ( (sStart >= interval.getStart()) &&
+ (sStop <= interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
- else if ((sStart < interval.getStart()) &&
- (sStop > interval.getStop()))
+ else if ( (sStart < interval.getStart()) &&
+ (sStop > interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
- else if ((sStart < interval.getStart()))
+ else if ( (sStart < interval.getStart()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT;
else
@@ -340,36 +337,52 @@ public class ReadUtils {
}
/**
- * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
- * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
- * the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
- * deletion.
+ * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to take care of
+ * two corner cases:
+ *
+ * 1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and fall inside
+ * a deletion return the base after the deletion. If clipping the left tail (beginning of the read) it
+ * doesn't matter because it already returns the previous base by default.
+ *
+ * 2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate and the
+ * read starts with an insertion, and you're requesting the first read based coordinate, it will skip
+ * the leading insertion (because it has the same reference coordinate as the following base).
*
* @param read
* @param refCoord
* @param tail
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
*/
- @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
+ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord);
int readCoord = result.getFirst();
+ // Corner case one: clipping the right tail and falls on deletion, move to the next
+ // read coordinate. It is not a problem for the left tail because the default answer
+ // from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
readCoord++;
+ // clipping the left tail and first base is insertion, go to the next read coordinate
+ // with the same reference coordinate. Advance to the next cigar element, or to the
+ // end of the read if there is no next element.
+ Pair firstElementIsInsertion = readStartsWithInsertion(read);
+ if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
+ readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1);
+
return readCoord;
}
/**
* Returns the read coordinate corresponding to the requested reference coordinate.
- *
+ *
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
* will return the last read base before the deletion. This function returns a
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
* a deletion.
- *
+ *
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
* behavior to your needs.
@@ -421,7 +434,7 @@ public class ReadUtils {
if (endsWithinCigar)
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
- // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
+ // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
else {
nextCigarElement = cigarElementIterator.next();
@@ -442,13 +455,13 @@ public class ReadUtils {
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
readBases += shift;
- // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
- // to add the shift of the current cigar element but go back to it's last element to return the last
- // base before the deletion (see warning in function contracts)
+ // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
+ // to add the shift of the current cigar element but go back to it's last element to return the last
+ // base before the deletion (see warning in function contracts)
else if (fallsInsideDeletion && !endsWithinCigar)
readBases += shift - 1;
- // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
+ // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
else if (fallsInsideDeletion && endsWithinCigar)
readBases--;
}
@@ -457,7 +470,6 @@ public class ReadUtils {
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
-
return new Pair(readBases, fallsInsideDeletion);
}
@@ -465,12 +477,11 @@ public class ReadUtils {
* Compares two SAMRecords only the basis on alignment start. Note that
* comparisons are performed ONLY on the basis of alignment start; any
* two SAM records with the same alignment start will be considered equal.
- *
+ *
* Unmapped alignments will all be considered equal.
*/
@Requires({"read1 != null", "read2 != null"})
- @Ensures("result == 0 || result == 1 || result == -1")
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
AlignmentStartComparator comp = new AlignmentStartComparator();
return comp.compare(read1, read2);
@@ -479,7 +490,7 @@ public class ReadUtils {
/**
* Is a base inside a read?
*
- * @param read the read to evaluate
+ * @param read the read to evaluate
* @param referenceCoordinate the reference coordinate of the base to test
* @return true if it is inside the read, false otherwise.
*/
@@ -502,4 +513,22 @@ public class ReadUtils {
}
+ /**
+ * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
+ * if there are any.
+ *
+ * @param read
+ * @return A pair with the answer (true/false) and the element or null if it doesn't exist
+ */
+ public static Pair readStartsWithInsertion(GATKSAMRecord read) {
+ for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
+ if (cigarElement.getOperator() == CigarOperator.INSERTION)
+ return new Pair(true, cigarElement);
+
+ else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
+ break;
+ }
+ return new Pair(false, null);
+ }
+
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
index 18108e0a1..16b141bc3 100644
--- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
@@ -112,8 +112,9 @@ public class ReadClipperTestUtils {
}
}
- if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
- return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
+// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
+ if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
+ return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
}
return false;
@@ -190,4 +191,18 @@ public class ReadClipperTestUtils {
return invertedCigar;
}
+ /**
+ * Checks whether or not the read has any cigar element that is not H or S
+ *
+ * @param read
+ * @return true if it has any M, I or D, false otherwise
+ */
+ public static boolean readHasNonClippedBases(GATKSAMRecord read) {
+ for (CigarElement cigarElement : read.getCigar().getCigarElements())
+ if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
+ return true;
+ return false;
+ }
+
+
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java
index 4dad68dc5..bc918c0a4 100644
--- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java
@@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import java.util.HashMap;
import java.util.List;
/**
@@ -59,10 +59,11 @@ public class ReadClipperUnitTest extends BaseTest {
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
int readLength = alnStart - alnEnd;
- for (int i=0; i= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
+ assertUnclippedLimits(read, clippedRead);
}
}
}
@@ -72,12 +73,14 @@ public class ReadClipperUnitTest extends BaseTest {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
- for (int i=0; i %s", i, read.getCigarString(), clipLeft.getCigarString()));
+ Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
+ assertUnclippedLimits(read, clipLeft);
- GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1);
+ GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength - 1);
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
+ assertUnclippedLimits(read, clipRight);
}
}
}
@@ -86,19 +89,27 @@ public class ReadClipperUnitTest extends BaseTest {
public void testHardClipByReferenceCoordinates() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
- int alnStart = read.getAlignmentStart();
- int alnEnd = read.getAlignmentEnd();
- for (int i=alnStart; i<=alnEnd; i++) {
- if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
- GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
- if (!clipLeft.isEmpty())
- Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
+ int start = read.getSoftStart();
+ int stop = read.getSoftEnd();
+
+// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
+
+// if (ReadUtils.readIsEntirelyInsertion(read))
+// System.out.println("debug");
+
+ for (int i = start; i <= stop; i++) {
+ GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
+ if (!clipLeft.isEmpty()) {
+// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
+ Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
+ assertUnclippedLimits(read, clipLeft);
}
- if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
- GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
- if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
- Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
+ GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
+ if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
+// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
+ Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
+ assertUnclippedLimits(read, clipRight);
}
}
}
@@ -111,10 +122,14 @@ public class ReadClipperUnitTest extends BaseTest {
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
- for (int i=alnStart; i<=alnEnd; i++) {
- GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
- if (!clipLeft.isEmpty())
- Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
+ for (int i = alnStart; i <= alnEnd; i++) {
+ GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
+
+ if (!clipLeft.isEmpty()) {
+// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
+ Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
+ assertUnclippedLimits(read, clipLeft);
+ }
}
}
}
@@ -127,10 +142,12 @@ public class ReadClipperUnitTest extends BaseTest {
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
- for (int i=alnStart; i<=alnEnd; i++) {
+ for (int i = alnStart; i <= alnEnd; i++) {
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
- if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
+ if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
+ assertUnclippedLimits(read, clipRight);
+ }
}
}
}
@@ -145,43 +162,36 @@ public class ReadClipperUnitTest extends BaseTest {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
- byte [] quals = new byte[readLength];
+ byte[] quals = new byte[readLength];
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
-
- // create a read with nLowQualBases in the left tail
- Utils.fillArrayWithByte(quals, HIGH_QUAL);
+ Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
- // Tests
+ assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
+ assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
+ Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
+ String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
- // Make sure the low qualities are gone
- assertNoLowQualBases(clipLeft, LOW_QUAL);
- // Can't run this test with the current contract of no hanging insertions
-// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
-
- // create a read with nLowQualBases in the right tail
- Utils.fillArrayWithByte(quals, HIGH_QUAL);
+ Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
for (int addRight = 0; addRight < nLowQualBases; addRight++)
quals[readLength - addRight - 1] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
- // Tests
+// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
- // Make sure the low qualities are gone
- assertNoLowQualBases(clipRight, LOW_QUAL);
+ assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
+ assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
+ Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
+ String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
- // Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
- //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
-
- // create a read with nLowQualBases in the both tails
- if (nLowQualBases <= readLength/2) {
- Utils.fillArrayWithByte(quals, HIGH_QUAL);
+ if (nLowQualBases <= readLength / 2) {
+ Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
quals[addBoth] = LOW_QUAL;
quals[readLength - addBoth - 1] = LOW_QUAL;
@@ -189,83 +199,25 @@ public class ReadClipperUnitTest extends BaseTest {
read.setBaseQualities(quals);
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
- // Tests
-
- // Make sure the low qualities are gone
- assertNoLowQualBases(clipBoth, LOW_QUAL);
-
- // Can't run this test with the current contract of no hanging insertions
- //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
+ assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
+ assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
+ Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
+ String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
}
}
-// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
}
-
- // ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
- final byte[] BASES = {'A','C','G','T','A','C','G','T'};
- final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
- final String CIGAR = "1S1M5I1S";
-
- final byte[] CLIPPED_BASES = {};
- final byte[] CLIPPED_QUALS = {};
- final String CLIPPED_CIGAR = "";
-
-
- GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
- GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
-
- ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected);
}
@Test(enabled = true)
public void testHardClipSoftClippedBases() {
-
- // Generate a list of cigars to test
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
+ CigarCounter original = new CigarCounter(read);
+ CigarCounter clipped = new CigarCounter(clippedRead);
- int sumHardClips = 0;
- int sumMatches = 0;
-
- boolean tail = true;
- for (CigarElement element : read.getCigar().getCigarElements()) {
- // Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
- if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
- tail = true;
-
- // Adds all H, S and D's (next to hard/soft clips).
- // All these should be hard clips after clipping.
- if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
- sumHardClips += element.getLength();
-
- // this means we're no longer on the tail (insertions can still potentially be the tail because
- // of the current contract of clipping out hanging insertions
- else if (element.getOperator() != CigarOperator.INSERTION)
- tail = false;
-
- // Adds all matches to verify that they remain the same after clipping
- if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
- sumMatches += element.getLength();
- }
-
- for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
- // Test if clipped read has Soft Clips (shouldn't have any!)
- Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
-
- // Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
- if (element.getOperator() == CigarOperator.HARD_CLIP)
- sumHardClips -= element.getLength();
-
- // Make sure all matches are still there
- if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
- sumMatches -= element.getLength();
- }
- Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
- Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
-
-
-// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
+ assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
+ original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
}
}
@@ -276,38 +228,39 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
+ assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
+
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
- if (! clippedRead.isEmpty()) {
+ if (!clippedRead.isEmpty()) {
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
- }
- else
+ } else
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
}
}
}
@Test(enabled = true)
- public void testRevertSoftClippedBases()
- {
- for (Cigar cigar: cigarList) {
+ public void testRevertSoftClippedBases() {
+ for (Cigar cigar : cigarList) {
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
- if ( leadingSoftClips > 0 || tailSoftClips > 0) {
+ assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
+
+ if (leadingSoftClips > 0 || tailSoftClips > 0) {
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
- }
- else
+ } else
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
}
}
@@ -315,12 +268,25 @@ public class ReadClipperUnitTest extends BaseTest {
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
if (!read.isEmpty()) {
- byte [] quals = read.getBaseQualities();
- for (int i=0; i 0;
}
@@ -335,10 +301,46 @@ public class ReadClipperUnitTest extends BaseTest {
return 0;
}
- private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
+ private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(Cigar cigar) {
for (CigarElement cigarElement : cigar.getCigarElements())
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
return true;
return false;
}
+
+ private class CigarCounter {
+ private HashMap counter;
+
+ public Integer getCounterForOp(CigarOperator operator) {
+ return counter.get(operator);
+ }
+
+ public CigarCounter(GATKSAMRecord read) {
+ CigarOperator[] operators = CigarOperator.values();
+ counter = new HashMap(operators.length);
+
+ for (CigarOperator op : operators)
+ counter.put(op, 0);
+
+ for (CigarElement cigarElement : read.getCigar().getCigarElements())
+ counter.put(cigarElement.getOperator(), counter.get(cigarElement.getOperator()) + cigarElement.getLength());
+ }
+
+ public boolean assertHardClippingSoftClips(CigarCounter clipped) {
+ for (CigarOperator op : counter.keySet()) {
+ if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
+ int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
+ int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
+ int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);
+
+ Assert.assertEquals(counterTotal, clippedHard);
+ Assert.assertTrue(clippedSoft == 0);
+ } else
+ Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
+ }
+ return true;
+ }
+
+ }
+
}
\ No newline at end of file