Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
bdf04b8057
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.lucene.messages.NLS;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -409,6 +408,17 @@ public class MathUtils {
|
|||
return Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public static double rms(Collection<Double> l) {
|
||||
if (l.size() == 0)
|
||||
return 0.0;
|
||||
|
||||
double rms = 0.0;
|
||||
for (Double i : l)
|
||||
rms += i*i;
|
||||
rms /= l.size();
|
||||
return Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public static double distanceSquared( final double[] x, final double[] y ) {
|
||||
double dist = 0.0;
|
||||
for(int iii = 0; iii < x.length; iii++) {
|
||||
|
|
|
|||
|
|
@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.poi.hssf.record.PageBreakRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.PipedOutputStream;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
|
|
@ -256,13 +252,17 @@ public class ClippingOp {
|
|||
if (start == 0 && stop == read.getReadLength() -1)
|
||||
return new SAMRecord(read.getHeader());
|
||||
|
||||
int newLength = read.getReadLength() - (stop - start + 1);
|
||||
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
|
||||
|
||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||
// starting or ending the read after applying the hard clip on start/stop.
|
||||
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
||||
byte [] newBases = new byte[newLength];
|
||||
byte [] newQuals = new byte[newLength];
|
||||
int copyStart = (start == 0) ? stop + 1 : 0;
|
||||
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
||||
|
||||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
|
||||
|
||||
SAMRecord hardClippedRead;
|
||||
try {
|
||||
|
|
@ -273,19 +273,20 @@ public class ClippingOp {
|
|||
|
||||
hardClippedRead.setBaseQualities(newQuals);
|
||||
hardClippedRead.setReadBases(newBases);
|
||||
hardClippedRead.setCigar(newCigar);
|
||||
hardClippedRead.setCigar(cigarShift.cigar);
|
||||
if (start == 0)
|
||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar));
|
||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
|
||||
|
||||
return hardClippedRead;
|
||||
|
||||
}
|
||||
|
||||
@Requires({"!cigar.isEmpty()"})
|
||||
private Cigar hardClipCigar (Cigar cigar, int start, int stop) {
|
||||
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
|
||||
Cigar newCigar = new Cigar();
|
||||
int index = 0;
|
||||
int totalHardClipCount = stop - start + 1;
|
||||
int alignmentShift = 0; // caused by hard clipping insertions or deletions
|
||||
|
||||
// hard clip the beginning of the cigar string
|
||||
if (start == 0) {
|
||||
|
|
@ -307,15 +308,19 @@ public class ClippingOp {
|
|||
|
||||
// we're still clipping or just finished perfectly
|
||||
if (index + shift == stop + 1) {
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
}
|
||||
// element goes beyond what we need to clip
|
||||
else if (index + shift > stop + 1) {
|
||||
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||
}
|
||||
index += shift;
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
|
||||
|
||||
if (index <= stop && cigarElementIterator.hasNext())
|
||||
cigarElement = cigarElementIterator.next();
|
||||
}
|
||||
|
|
@ -345,6 +350,8 @@ public class ClippingOp {
|
|||
// element goes beyond our clip starting position
|
||||
else {
|
||||
int elementLengthAfterChopping = start - index;
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
|
||||
|
||||
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
totalHardClipCount += elementLengthAfterChopping;
|
||||
|
|
@ -356,9 +363,67 @@ public class ClippingOp {
|
|||
if (index < start && cigarElementIterator.hasNext())
|
||||
cigarElement = cigarElementIterator.next();
|
||||
}
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
|
||||
// check if we are hard clipping indels
|
||||
while(cigarElementIterator.hasNext()) {
|
||||
cigarElement = cigarElementIterator.next();
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||
}
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
}
|
||||
return newCigar;
|
||||
return cleanHardClippedCigar(newCigar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
|
||||
* and cleans it up accordingly.
|
||||
*
|
||||
* @param cigar
|
||||
* @return
|
||||
*/
|
||||
private CigarShift cleanHardClippedCigar(Cigar cigar) {
|
||||
Cigar cleanCigar = new Cigar();
|
||||
int shiftFromStart = 0;
|
||||
int shiftFromEnd = 0;
|
||||
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
|
||||
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
|
||||
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
cigarStack.push(cigarElement);
|
||||
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
int shift = 0;
|
||||
boolean readHasStarted = false;
|
||||
|
||||
while(!cigarStack.empty()) {
|
||||
CigarElement cigarElement = cigarStack.pop();
|
||||
|
||||
if ( !readHasStarted &&
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION &&
|
||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
readHasStarted = true;
|
||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
shift += cigarElement.getLength();
|
||||
|
||||
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
||||
if (i==1)
|
||||
inverseCigarStack.push(cigarElement);
|
||||
else
|
||||
cleanCigar.add(cigarElement);
|
||||
}
|
||||
}
|
||||
// first pass (i=1) is from end to start of the cigar elements
|
||||
if (i == 1) {
|
||||
shiftFromEnd = shift;
|
||||
cigarStack = inverseCigarStack;
|
||||
}
|
||||
// second pass (i=2) is from start to end with the end already cleaned
|
||||
else {
|
||||
shiftFromStart = shift;
|
||||
}
|
||||
}
|
||||
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
|
||||
}
|
||||
|
||||
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
|
||||
|
|
@ -379,6 +444,34 @@ public class ClippingOp {
|
|||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return shift;
|
||||
}
|
||||
}
|
||||
|
||||
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
|
||||
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||
int cigarElementLength = cigarElement.getLength();
|
||||
if (clippedLength >= cigarElementLength)
|
||||
return -cigarElement.getLength();
|
||||
else
|
||||
return -clippedLength;
|
||||
}
|
||||
|
||||
if (cigarElement.getOperator() == CigarOperator.DELETION)
|
||||
return cigarElement.getLength();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private class CigarShift {
|
||||
private Cigar cigar;
|
||||
private int shiftFromStart;
|
||||
private int shiftFromEnd;
|
||||
|
||||
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
|
||||
this.cigar = cigar;
|
||||
this.shiftFromStart = shiftFromStart;
|
||||
this.shiftFromEnd = shiftFromEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.jets3t.service.multi.ThreadedStorageService;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -50,13 +48,26 @@ public class ReadClipper {
|
|||
return read;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
|
||||
return hardClipByReferenceCoordinates(-1, refStop);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
|
||||
return hardClipByReferenceCoordinates(refStart, -1);
|
||||
}
|
||||
|
||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||
|
||||
System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
return clippedRead;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
|
|
@ -64,10 +75,12 @@ public class ReadClipper {
|
|||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
@Requires("left <= right")
|
||||
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||
this.read = hardClipByReferenceCoordinates(-1, left);
|
||||
this.ops = null; // reset the operations
|
||||
return hardClipByReferenceCoordinates(right, -1);
|
||||
if (left == right)
|
||||
return new SAMRecord(read.getHeader());
|
||||
this.read = hardClipByReferenceCoordinates(right, -1);
|
||||
return hardClipByReferenceCoordinates(-1, left);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipLowQualEnds(byte lowQual) {
|
||||
|
|
|
|||
|
|
@ -632,7 +632,7 @@ public class ReadUtils {
|
|||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||
|
||||
int start = getRefCoordSoftUnclippedStart(read);
|
||||
int stop = getRefCoordSoftUnclippedStop(read);
|
||||
int stop = getRefCoordSoftUnclippedEnd(read);
|
||||
|
||||
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||
|
|
@ -658,6 +658,7 @@ public class ReadUtils {
|
|||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||
int start = read.getUnclippedStart();
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
|
|
@ -669,51 +670,98 @@ public class ReadUtils {
|
|||
return start;
|
||||
}
|
||||
|
||||
public static int getRefCoordSoftUnclippedStop(SAMRecord read) {
|
||||
int stop = read.getAlignmentEnd();
|
||||
List<CigarElement> cigarElementList = read.getCigar().getCigarElements();
|
||||
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1);
|
||||
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||
stop += lastCigarElement.getLength();
|
||||
return stop;
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||
int stop = read.getUnclippedStart();
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
}
|
||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
||||
* the alignment start of the read.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
|
||||
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
|
||||
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
|
||||
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
|
||||
* the alignment end of the read.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||
*/
|
||||
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
|
||||
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
|
||||
if (getRefCoordSoftUnclippedEnd(read) >= refCoord)
|
||||
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
|
||||
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
int shift = 0;
|
||||
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
||||
// goal -= cigarElement.getLength();
|
||||
//}
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
shift = goal - refBases;
|
||||
}
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
if (refCoord < read.getAlignmentStart()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||
if (readBases < 0)
|
||||
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||
}
|
||||
else if (refCoord > read.getAlignmentEnd()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
|
||||
if (readBases < 0)
|
||||
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||
}
|
||||
else {
|
||||
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
int shift = 0;
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
shift = goal - refBases;
|
||||
}
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
}
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
}
|
||||
|
||||
return readBases;
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2034" status="integration" publication="20110718185300" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.52.944" status="release" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.52.944" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue