Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-08-30 11:08:23 -04:00
commit bdf04b8057
12 changed files with 233 additions and 69 deletions

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.apache.lucene.messages.NLS;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -409,6 +408,17 @@ public class MathUtils {
return Math.sqrt(rms);
}
public static double rms(Collection<Double> l) {
if (l.size() == 0)
return 0.0;
double rms = 0.0;
for (Double i : l)
rms += i*i;
rms /= l.size();
return Math.sqrt(rms);
}
public static double distanceSquared( final double[] x, final double[] y ) {
double dist = 0.0;
for(int iii = 0; iii < x.length; iii++) {

View File

@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.poi.hssf.record.PageBreakRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PipedOutputStream;
import java.lang.reflect.Array;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.Vector;
/**
@ -256,13 +252,17 @@ public class ClippingOp {
if (start == 0 && stop == read.getReadLength() -1)
return new SAMRecord(read.getHeader());
int newLength = read.getReadLength() - (stop - start + 1);
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
byte [] newBases = new byte[newLength];
byte [] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 : 0;
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
SAMRecord hardClippedRead;
try {
@ -273,19 +273,20 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(newCigar);
hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar));
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
return hardClippedRead;
}
@Requires({"!cigar.isEmpty()"})
private Cigar hardClipCigar (Cigar cigar, int start, int stop) {
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
int alignmentShift = 0; // caused by hard clipping insertions or deletions
// hard clip the beginning of the cigar string
if (start == 0) {
@ -307,15 +308,19 @@ public class ClippingOp {
// we're still clipping or just finished perfectly
if (index + shift == stop + 1) {
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
// element goes beyond what we need to clip
else if (index + shift > stop + 1) {
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
index += shift;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
if (index <= stop && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
}
@ -345,6 +350,8 @@ public class ClippingOp {
// element goes beyond our clip starting position
else {
int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += elementLengthAfterChopping;
@ -356,9 +363,67 @@ public class ClippingOp {
if (index < start && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
}
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
// check if we are hard clipping indels
while(cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
return newCigar;
return cleanHardClippedCigar(newCigar);
}
/**
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
* and cleans it up accordingly.
*
* @param cigar
* @return
*/
private CigarShift cleanHardClippedCigar(Cigar cigar) {
Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
for (int i = 1; i <= 2; i++) {
int shift = 0;
boolean readHasStarted = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
if ( !readHasStarted &&
cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
inverseCigarStack.push(cigarElement);
else
cleanCigar.add(cigarElement);
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
}
// second pass (i=2) is from start to end with the end already cleaned
else {
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
}
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
@ -379,6 +444,34 @@ public class ClippingOp {
else
break;
}
return shift;
}
}
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength();
if (clippedLength >= cigarElementLength)
return -cigarElement.getLength();
else
return -clippedLength;
}
if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength();
return 0;
}
private class CigarShift {
private Cigar cigar;
private int shiftFromStart;
private int shiftFromEnd;
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
this.cigar = cigar;
this.shiftFromStart = shiftFromStart;
this.shiftFromEnd = shiftFromEnd;
}
}
}

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.jets3t.service.multi.ThreadedStorageService;
import java.util.ArrayList;
import java.util.List;
@ -50,13 +48,26 @@ public class ReadClipper {
return read;
}
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
System.out.println("Clipping start/stop: " + start + "/" + stop);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
}
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
@ -64,10 +75,12 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
this.read = hardClipByReferenceCoordinates(-1, left);
this.ops = null; // reset the operations
return hardClipByReferenceCoordinates(right, -1);
if (left == right)
return new SAMRecord(read.getHeader());
this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
}
public SAMRecord hardClipLowQualEnds(byte lowQual) {

View File

@ -632,7 +632,7 @@ public class ReadUtils {
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
int start = getRefCoordSoftUnclippedStart(read);
int stop = getRefCoordSoftUnclippedStop(read);
int stop = getRefCoordSoftUnclippedEnd(read);
if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
@ -658,6 +658,7 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
@ -669,51 +670,98 @@ public class ReadUtils {
return start;
}
public static int getRefCoordSoftUnclippedStop(SAMRecord read) {
int stop = read.getAlignmentEnd();
List<CigarElement> cigarElementList = read.getCigar().getCigarElements();
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1);
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP)
stop += lastCigarElement.getLength();
return stop;
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
int stop = read.getUnclippedStart();
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
* the alignment start of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
* the alignment end of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedEnd(read) >= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
}
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
while (!goalReached && cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
int shift = 0;
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
// goal -= cigarElement.getLength();
//}
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
if (refCoord < read.getAlignmentStart()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
if (readBases < 0)
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
}
else if (refCoord > read.getAlignmentEnd()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
if (readBases < 0)
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
}
else {
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
while (!goalReached && cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
return readBases;
}

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2034" status="integration" publication="20110718185300" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.52.944" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.52.944" status="release" />
</ivy-module>