diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index e197bb973..f4c057c15 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; -import org.apache.lucene.messages.NLS; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -409,6 +408,17 @@ public class MathUtils { return Math.sqrt(rms); } + public static double rms(Collection l) { + if (l.size() == 0) + return 0.0; + + double rms = 0.0; + for (Double i : l) + rms += i*i; + rms /= l.size(); + return Math.sqrt(rms); + } + public static double distanceSquared( final double[] x, final double[] y ) { double dist = 0.0; for(int iii = 0; iii < x.length; iii++) { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index d899613d7..faa3cf34e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -5,15 +5,11 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.apache.poi.hssf.record.PageBreakRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.io.PipedOutputStream; -import java.lang.reflect.Array; import java.util.Iterator; -import java.util.List; +import java.util.Stack; import java.util.Vector; /** @@ -256,13 +252,17 @@ public class ClippingOp { if (start == 0 && stop == read.getReadLength() -1) return new SAMRecord(read.getHeader()); - int newLength = read.getReadLength() - (stop - start + 1); + CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop); + + // the cigar may force a shift left or right (or both) in case we are left with insertions + // starting or ending the read after applying the hard clip on start/stop. + int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd; byte [] newBases = new byte[newLength]; byte [] newQuals = new byte[newLength]; - int copyStart = (start == 0) ? stop + 1 : 0; + int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart; System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); - System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop); + System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); SAMRecord hardClippedRead; try { @@ -273,19 +273,20 @@ public class ClippingOp { hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); - hardClippedRead.setCigar(newCigar); + hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) - hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar)); + hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); return hardClippedRead; } @Requires({"!cigar.isEmpty()"}) - private Cigar hardClipCigar (Cigar cigar, int start, int stop) { + private CigarShift hardClipCigar (Cigar cigar, int start, int stop) { Cigar newCigar = new Cigar(); int index = 0; int totalHardClipCount = stop - start + 1; + int alignmentShift = 0; // caused by hard clipping insertions or deletions // hard clip the beginning of the cigar string if (start == 0) { @@ -307,15 +308,19 @@ public class ClippingOp { // we're still clipping or just finished perfectly if (index + shift == stop + 1) { - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } // element goes beyond what we need to clip else if (index + shift > stop + 1) { int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1); - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1); + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator())); } index += shift; + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift); + if (index <= stop && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); } @@ -345,6 +350,8 @@ public class ClippingOp { // element goes beyond our clip starting position else { int elementLengthAfterChopping = start - index; + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index)); + // if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) totalHardClipCount += elementLengthAfterChopping; @@ -356,9 +363,67 @@ public class ClippingOp { if (index < start && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); } - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + + // check if we are hard clipping indels + while(cigarElementIterator.hasNext()) { + cigarElement = cigarElementIterator.next(); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); + } + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } - return newCigar; + return cleanHardClippedCigar(newCigar); + } + + /** + * Checks if a hard clipped cigar left a read starting or ending with insertions/deletions + * and cleans it up accordingly. + * + * @param cigar + * @return + */ + private CigarShift cleanHardClippedCigar(Cigar cigar) { + Cigar cleanCigar = new Cigar(); + int shiftFromStart = 0; + int shiftFromEnd = 0; + Stack cigarStack = new Stack(); + Stack inverseCigarStack = new Stack(); + + for (CigarElement cigarElement : cigar.getCigarElements()) + cigarStack.push(cigarElement); + + for (int i = 1; i <= 2; i++) { + int shift = 0; + boolean readHasStarted = false; + + while(!cigarStack.empty()) { + CigarElement cigarElement = cigarStack.pop(); + + if ( !readHasStarted && + cigarElement.getOperator() != CigarOperator.INSERTION && + cigarElement.getOperator() != CigarOperator.DELETION && + cigarElement.getOperator() != CigarOperator.HARD_CLIP) + readHasStarted = true; + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION) + shift += cigarElement.getLength(); + + if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) { + if (i==1) + inverseCigarStack.push(cigarElement); + else + cleanCigar.add(cigarElement); + } + } + // first pass (i=1) is from end to start of the cigar elements + if (i == 1) { + shiftFromEnd = shift; + cigarStack = inverseCigarStack; + } + // second pass (i=2) is from start to end with the end already cleaned + else { + shiftFromStart = shift; + } + } + return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { @@ -379,6 +444,34 @@ public class ClippingOp { else break; } + return shift; } -} + + private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { + if (cigarElement.getOperator() == CigarOperator.INSERTION) { + int cigarElementLength = cigarElement.getLength(); + if (clippedLength >= cigarElementLength) + return -cigarElement.getLength(); + else + return -clippedLength; + } + + if (cigarElement.getOperator() == CigarOperator.DELETION) + return cigarElement.getLength(); + + return 0; + } + + private class CigarShift { + private Cigar cigar; + private int shiftFromStart; + private int shiftFromEnd; + + private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) { + this.cigar = cigar; + this.shiftFromStart = shiftFromStart; + this.shiftFromEnd = shiftFromEnd; + } + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 6248849f9..be045309a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; -import org.broad.tribble.util.PositionalStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.jets3t.service.multi.ThreadedStorageService; import java.util.ArrayList; import java.util.List; @@ -50,13 +48,26 @@ public class ReadClipper { return read; } - public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { + return hardClipByReferenceCoordinates(-1, refStop); + } + + public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { + return hardClipByReferenceCoordinates(refStart, -1); + } + + private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); - System.out.println("Clipping start/stop: " + start + "/" + stop); + if (start < 0 || stop > read.getReadLength() - 1) + throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + + //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); - return clipRead(ClippingRepresentation.HARDCLIP_BASES); + SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + this.ops = null; + return clippedRead; } public SAMRecord hardClipByReadCoordinates(int start, int stop) { @@ -64,10 +75,12 @@ public class ReadClipper { return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + @Requires("left <= right") public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { - this.read = hardClipByReferenceCoordinates(-1, left); - this.ops = null; // reset the operations - return hardClipByReferenceCoordinates(right, -1); + if (left == right) + return new SAMRecord(read.getHeader()); + this.read = hardClipByReferenceCoordinates(right, -1); + return hardClipByReferenceCoordinates(-1, left); } public SAMRecord hardClipLowQualEnds(byte lowQual) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 6be37cf52..62bbb0307 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -632,7 +632,7 @@ public class ReadUtils { public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { int start = getRefCoordSoftUnclippedStart(read); - int stop = getRefCoordSoftUnclippedStop(read); + int stop = getRefCoordSoftUnclippedEnd(read); if ( !read.getReferenceName().equals(interval.getContig()) ) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; @@ -658,6 +658,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -669,51 +670,98 @@ public class ReadUtils { return start; } - public static int getRefCoordSoftUnclippedStop(SAMRecord read) { - int stop = read.getAlignmentEnd(); - List cigarElementList = read.getCigar().getCigarElements(); - CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1); - if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP) - stop += lastCigarElement.getLength(); - return stop; + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + int stop = read.getUnclippedStart(); + int shift = 0; + CigarOperator lastOperator = null; + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + stop += shift; + lastOperator = cigarElement.getOperator(); + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) + shift = cigarElement.getLength(); + else + shift = 0; + } + return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; + } + + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before + * the alignment start of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedStart(read) <= refCoord) + return refCoord - getRefCoordSoftUnclippedStart(read) + 1; + return -1; } + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after + * the alignment end of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedEnd(read) >= refCoord) + return refCoord - getRefCoordSoftUnclippedStart(read) + 1; + return -1; + } - @Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"}) + + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; - int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases - boolean goalReached = refBases == goal; - Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); - while (!goalReached && cigarElementIterator.hasNext()) { - CigarElement cigarElement = cigarElementIterator.next(); - int shift = 0; - //if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) { - // goal -= cigarElement.getLength(); - //} - - if (cigarElement.getOperator().consumesReferenceBases()) { - if (refBases + cigarElement.getLength() < goal) { - shift = cigarElement.getLength(); - } - else { - shift = goal - refBases; - } - refBases += shift; - } - goalReached = refBases == goal; - - if (cigarElement.getOperator().consumesReadBases()) { - readBases += goalReached ? shift : cigarElement.getLength(); - } + if (refCoord < read.getAlignmentStart()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); } + else if (refCoord > read.getAlignmentEnd()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); + } + else { + int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases + boolean goalReached = refBases == goal; - if (!goalReached) - throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); + while (!goalReached && cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + int shift = 0; + + if (cigarElement.getOperator().consumesReferenceBases()) { + if (refBases + cigarElement.getLength() < goal) { + shift = cigarElement.getLength(); + } + else { + shift = goal - refBases; + } + refBases += shift; + } + goalReached = refBases == goal; + + if (cigarElement.getOperator().consumesReadBases()) { + readBases += goalReached ? shift : cigarElement.getLength(); + } + } + + if (!goalReached) + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + } return readBases; } diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar similarity index 62% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.jar rename to settings/repository/edu.mit.broad/picard-private-parts-2034.jar index ae11e636b..11f59420c 100644 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar and b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml similarity index 64% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.xml rename to settings/repository/edu.mit.broad/picard-private-parts-2034.xml index e7c7e3a21..1a60a2015 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml deleted file mode 100644 index 52d4900c5..000000000 --- a/settings/repository/net.sf/picard-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.49.895.jar b/settings/repository/net.sf/picard-1.52.944.jar similarity index 82% rename from settings/repository/net.sf/picard-1.49.895.jar rename to settings/repository/net.sf/picard-1.52.944.jar index 3ee1f2090..e147ebbba 100644 Binary files a/settings/repository/net.sf/picard-1.49.895.jar and b/settings/repository/net.sf/picard-1.52.944.jar differ diff --git a/settings/repository/net.sf/picard-1.52.944.xml b/settings/repository/net.sf/picard-1.52.944.xml new file mode 100644 index 000000000..61fac6644 --- /dev/null +++ b/settings/repository/net.sf/picard-1.52.944.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml deleted file mode 100644 index 0436ce881..000000000 --- a/settings/repository/net.sf/sam-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.49.895.jar b/settings/repository/net.sf/sam-1.52.944.jar similarity index 76% rename from settings/repository/net.sf/sam-1.49.895.jar rename to settings/repository/net.sf/sam-1.52.944.jar index c55ab0b72..7037c7a96 100644 Binary files a/settings/repository/net.sf/sam-1.49.895.jar and b/settings/repository/net.sf/sam-1.52.944.jar differ diff --git a/settings/repository/net.sf/sam-1.52.944.xml b/settings/repository/net.sf/sam-1.52.944.xml new file mode 100644 index 000000000..2229395b2 --- /dev/null +++ b/settings/repository/net.sf/sam-1.52.944.xml @@ -0,0 +1,3 @@ + + +