Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-08-30 11:08:23 -04:00
commit bdf04b8057
12 changed files with 233 additions and 69 deletions

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.lucene.messages.NLS;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -409,6 +408,17 @@ public class MathUtils {
return Math.sqrt(rms); return Math.sqrt(rms);
} }
public static double rms(Collection<Double> l) {
if (l.size() == 0)
return 0.0;
double rms = 0.0;
for (Double i : l)
rms += i*i;
rms /= l.size();
return Math.sqrt(rms);
}
public static double distanceSquared( final double[] x, final double[] y ) { public static double distanceSquared( final double[] x, final double[] y ) {
double dist = 0.0; double dist = 0.0;
for(int iii = 0; iii < x.length; iii++) { for(int iii = 0; iii < x.length; iii++) {

View File

@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.poi.hssf.record.PageBreakRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PipedOutputStream;
import java.lang.reflect.Array;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.Stack;
import java.util.Vector; import java.util.Vector;
/** /**
@ -256,13 +252,17 @@ public class ClippingOp {
if (start == 0 && stop == read.getReadLength() -1) if (start == 0 && stop == read.getReadLength() -1)
return new SAMRecord(read.getHeader()); return new SAMRecord(read.getHeader());
int newLength = read.getReadLength() - (stop - start + 1); CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
byte [] newBases = new byte[newLength]; byte [] newBases = new byte[newLength];
byte [] newQuals = new byte[newLength]; byte [] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 : 0; int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
SAMRecord hardClippedRead; SAMRecord hardClippedRead;
try { try {
@ -273,19 +273,20 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases); hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(newCigar); hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0) if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar)); hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
return hardClippedRead; return hardClippedRead;
} }
@Requires({"!cigar.isEmpty()"}) @Requires({"!cigar.isEmpty()"})
private Cigar hardClipCigar (Cigar cigar, int start, int stop) { private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar(); Cigar newCigar = new Cigar();
int index = 0; int index = 0;
int totalHardClipCount = stop - start + 1; int totalHardClipCount = stop - start + 1;
int alignmentShift = 0; // caused by hard clipping insertions or deletions
// hard clip the beginning of the cigar string // hard clip the beginning of the cigar string
if (start == 0) { if (start == 0) {
@ -307,15 +308,19 @@ public class ClippingOp {
// we're still clipping or just finished perfectly // we're still clipping or just finished perfectly
if (index + shift == stop + 1) { if (index + shift == stop + 1) {
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} }
// element goes beyond what we need to clip // element goes beyond what we need to clip
else if (index + shift > stop + 1) { else if (index + shift > stop + 1) {
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1); int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator())); newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
} }
index += shift; index += shift;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
if (index <= stop && cigarElementIterator.hasNext()) if (index <= stop && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next(); cigarElement = cigarElementIterator.next();
} }
@ -345,6 +350,8 @@ public class ClippingOp {
// element goes beyond our clip starting position // element goes beyond our clip starting position
else { else {
int elementLengthAfterChopping = start - index; int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later // if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += elementLengthAfterChopping; totalHardClipCount += elementLengthAfterChopping;
@ -356,9 +363,67 @@ public class ClippingOp {
if (index < start && cigarElementIterator.hasNext()) if (index < start && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next(); cigarElement = cigarElementIterator.next();
} }
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
// check if we are hard clipping indels
while(cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} }
return newCigar; return cleanHardClippedCigar(newCigar);
}
/**
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
* and cleans it up accordingly.
*
* @param cigar
* @return
*/
private CigarShift cleanHardClippedCigar(Cigar cigar) {
Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
for (int i = 1; i <= 2; i++) {
int shift = 0;
boolean readHasStarted = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
if ( !readHasStarted &&
cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
inverseCigarStack.push(cigarElement);
else
cleanCigar.add(cigarElement);
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
}
// second pass (i=2) is from start to end with the end already cleaned
else {
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
} }
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
@ -379,6 +444,34 @@ public class ClippingOp {
else else
break; break;
} }
return shift; return shift;
} }
}
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength();
if (clippedLength >= cigarElementLength)
return -cigarElement.getLength();
else
return -clippedLength;
}
if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength();
return 0;
}
private class CigarShift {
private Cigar cigar;
private int shiftFromStart;
private int shiftFromEnd;
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
this.cigar = cigar;
this.shiftFromStart = shiftFromStart;
this.shiftFromEnd = shiftFromEnd;
}
}
}

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar; import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broad.tribble.util.PositionalStream; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.jets3t.service.multi.ThreadedStorageService;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -50,13 +48,26 @@ public class ReadClipper {
return read; return read;
} }
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
System.out.println("Clipping start/stop: " + start + "/" + stop); if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop)); this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
} }
public SAMRecord hardClipByReadCoordinates(int start, int stop) { public SAMRecord hardClipByReadCoordinates(int start, int stop) {
@ -64,10 +75,12 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES); return clipRead(ClippingRepresentation.HARDCLIP_BASES);
} }
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
this.read = hardClipByReferenceCoordinates(-1, left); if (left == right)
this.ops = null; // reset the operations return new SAMRecord(read.getHeader());
return hardClipByReferenceCoordinates(right, -1); this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
} }
public SAMRecord hardClipLowQualEnds(byte lowQual) { public SAMRecord hardClipLowQualEnds(byte lowQual) {

View File

@ -632,7 +632,7 @@ public class ReadUtils {
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
int start = getRefCoordSoftUnclippedStart(read); int start = getRefCoordSoftUnclippedStart(read);
int stop = getRefCoordSoftUnclippedStop(read); int stop = getRefCoordSoftUnclippedEnd(read);
if ( !read.getReferenceName().equals(interval.getContig()) ) if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
@ -658,6 +658,7 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT; return ReadAndIntervalOverlap.OVERLAP_RIGHT;
} }
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
public static int getRefCoordSoftUnclippedStart(SAMRecord read) { public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
int start = read.getUnclippedStart(); int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) { for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
@ -669,51 +670,98 @@ public class ReadUtils {
return start; return start;
} }
public static int getRefCoordSoftUnclippedStop(SAMRecord read) { @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
int stop = read.getAlignmentEnd(); public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
List<CigarElement> cigarElementList = read.getCigar().getCigarElements(); int stop = read.getUnclippedStart();
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1); int shift = 0;
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP) CigarOperator lastOperator = null;
stop += lastCigarElement.getLength(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
return stop; stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
* the alignment start of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
} }
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
* the alignment end of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedEnd(read) >= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
}
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"}) @Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0; int readBases = 0;
int refBases = 0; int refBases = 0;
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator(); if (refCoord < read.getAlignmentStart()) {
while (!goalReached && cigarElementIterator.hasNext()) { readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
CigarElement cigarElement = cigarElementIterator.next(); if (readBases < 0)
int shift = 0; throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
// goal -= cigarElement.getLength();
//}
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
} }
else if (refCoord > read.getAlignmentEnd()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
if (readBases < 0)
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
}
else {
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
if (!goalReached) Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); while (!goalReached && cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
return readBases; return readBases;
} }

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0"> <ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" /> <info organisation="edu.mit.broad" module="picard-private-parts" revision="2034" status="integration" publication="20110718185300" />
</ivy-module> </ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.52.944" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.52.944" status="release" />
</ivy-module>