diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index 9e7ee9dac..d934bb972 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -135,10 +136,9 @@ public class ReadClipper { ops.clear(); if ( clippedRead.isEmpty() ) return GATKSAMRecord.emptyRead(clippedRead); -// return new GATKSAMRecord( clippedRead.getHeader() ); return clippedRead; } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // this should never happen + throw new RuntimeException(e); // this should never happen } } } @@ -188,7 +188,6 @@ public class ReadClipper { private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return GATKSAMRecord.emptyRead(read); -// return new GATKSAMRecord(read.getHeader()); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); @@ -213,14 +212,12 @@ public class ReadClipper { private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return GATKSAMRecord.emptyRead(read); -// return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) return GATKSAMRecord.emptyRead(read); -// return new GATKSAMRecord(read.getHeader()); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); @@ -402,17 +399,77 @@ public class ReadClipper { /** * Turns soft clipped bases into matches - * * @return a new read with every soft clip turned into a match */ private GATKSAMRecord revertSoftClippedBases() { - this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates + if (read.isEmpty()) + return read; + + this.addOp(new ClippingOp(0, 0)); return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); } + + /** + * Reverts ALL soft-clipped bases + * + * @param read the read + * @return the read with all soft-clipped bases turned into matches + */ public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { return (new ReadClipper(read)).revertSoftClippedBases(); } + /** + * Reverts only soft clipped bases with quality score greater than or equal to minQual + * + * Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR') + * + * @param read the read + * @param minQual the mininum base quality score to revert the base (inclusive) + * @return the read with high quality soft clips reverted + */ + public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) { + int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart(); + if (read.isEmpty() || nLeadingSoftClips > read.getReadLength()) + return GATKSAMRecord.emptyRead(read); + + byte [] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); + int left = -1; + + if (nLeadingSoftClips > 0) { + for (int i = nLeadingSoftClips - 1; i >= 0; i--) { + if (quals[i] >= minQual) + left = i; + else + break; + } + } + + int right = -1; + int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd(); + if (nTailingSoftClips > 0) { + for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) { + if (quals[i] >= minQual) + right = i; + else + break; + } + } + + GATKSAMRecord clippedRead = read; + if (right >= 0) { + if (right + 1 < clippedRead.getReadLength()) + clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail + clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing + } + if (left >= 0) { + if (left - 1 > 0) + clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail + clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing + } + return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all + } + /** * Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail * and hardClipByReferenceCoordinatesRightTail. Should not be used directly. @@ -446,9 +503,6 @@ public class ReadClipper { stop = read.getReadLength() - 1; } -// if ((start == 0 && stop == read.getReadLength() - 1)) -// return GATKSAMRecord.emptyRead(read); - if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 7d3477a7b..c6d14a8bc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -428,8 +428,8 @@ public class GATKSAMRecord extends BAMRecord { * Use this method if you want to create a new empty GATKSAMRecord based on * another GATKSAMRecord * - * @param read - * @return + * @param read a read to copy the header from + * @return a read with no bases but safe for the GATK */ public static GATKSAMRecord emptyRead(GATKSAMRecord read) { GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(), diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUnclippedStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUnclippedStartWithNoTiesComparator.java new file mode 100644 index 000000000..f961857e1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUnclippedStartWithNoTiesComparator.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.utils.sam; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMRecord; + +import java.util.Comparator; + +public class ReadUnclippedStartWithNoTiesComparator implements Comparator { + @Requires("c1 >= 0 && c2 >= 0") + @Ensures("result == 0 || result == 1 || result == -1") + private int compareContigs(int c1, int c2) { + if (c1 == c2) + return 0; + else if (c1 > c2) + return 1; + return -1; + } + + @Requires("r1 != null && r2 != null") + @Ensures("result == 0 || result == 1 || result == -1") + public int compare(SAMRecord r1, SAMRecord r2) { + int result; + + if (r1 == r2) + result = 0; + + else if (r1.getReadUnmappedFlag()) + result = 1; + else if (r2.getReadUnmappedFlag()) + result = -1; + else { + final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex()); + + if (cmpContig != 0) + result = cmpContig; + + else { + if (r1.getUnclippedStart() < r2.getUnclippedStart()) + result = -1; + else + result = 1; + } + } + + return result; + } +} \ No newline at end of file