From 5c9b659c02aef41527ee07cd9c2e472a23de977b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 28 Sep 2011 12:10:47 -0400 Subject: [PATCH 1/3] clipping both ends of the reads was modifying the original read This goes against the ReadClipper contract, and was affecting the second part of the read that spans over multiple intervals. Fixed. --- .../broadinstitute/sting/utils/clipreads/ReadClipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 83db20238..275f476dc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -93,8 +93,9 @@ public class ReadClipper { public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (left == right) return new SAMRecord(read.getHeader()); - this.read = hardClipByReferenceCoordinates(right, -1); - return hardClipByReferenceCoordinates(-1, left); + SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); + ReadClipper clipper = new ReadClipper(leftTailRead); + return clipper.hardClipByReferenceCoordinatesLeftTail(left); } public SAMRecord hardClipLowQualEnds(byte lowQual) { From 3c7b7f74ef235ed9933c8227b2c89cb68b3374b0 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 28 Sep 2011 14:41:30 -0400 Subject: [PATCH 2/3] Optimized interval iteration Using a TreedSet to manipulate getToolkit.getIntervals() and being smart about which intervals to test makes interval clipping O(1) instead of O(n). --- .../sting/utils/GenomeLocComparator.java | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/utils/GenomeLocComparator.java diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocComparator.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocComparator.java new file mode 100644 index 000000000..7aa9fdd65 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocComparator.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.utils; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + +import java.util.Comparator; + +/** + * + * @author Mauricio Carneiro + * @since 9/28/11 + */ +public class GenomeLocComparator implements Comparator { + /** + * compares genomeLoc's contigs + * + * @param gl1 the genome loc to compare contigs + * @param gl2 the genome loc to compare contigs + * @return 0 if equal, -1 if gl2.contig is greater, 1 if gl1.contig is greater + */ + @Requires("gl2 != null") + @Ensures("result == 0 || result == 1 || result == -1") + public final int compareContigs( GenomeLoc gl1, GenomeLoc gl2 ) { + if (gl1.contigIndex == gl2.contigIndex) + return 0; + else if (gl1.contigIndex > gl2.contigIndex) + return 1; + return -1; + } + + @Requires("gl2 != null") + @Ensures("result == 0 || result == 1 || result == -1") + public int compare ( GenomeLoc gl1, GenomeLoc gl2 ) { + int result = 0; + + if ( gl1 == gl2 ) { + result = 0; + } + else if(GenomeLoc.isUnmapped(gl1)) + result = 1; + else if(GenomeLoc.isUnmapped(gl2)) + result = -1; + else { + final int cmpContig = compareContigs(gl1, gl2); + + if ( cmpContig != 0 ) { + result = cmpContig; + } else { + if ( gl1.getStart() < gl2.getStart() ) result = -1; + if ( gl1.getStart() > gl2.getStart() ) result = 1; + } + } + + return result; + } +} From ff2f4df043eae05bfc338898155a34bb50becd06 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 28 Sep 2011 16:07:00 -0400 Subject: [PATCH 3/3] Fixed hardclipping inside indel (right tail) when hard clipping the right tail of a read falls inside a deletion, clipping should fall back to the last base before the deletion to follow the ReadClipper's contract. --- .../src/org/broadinstitute/sting/utils/sam/ReadUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 14ebbaa6f..e0a3a5a53 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -893,6 +893,10 @@ public class ReadUtils { // base before the deletion (see warning in function contracts) else if (fallsInsideDeletion && !endsWithinCigar) readBases += shift - 1; + + // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion + else if (fallsInsideDeletion && endsWithinCigar) + readBases--; } }