Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
ff5749599d
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -135,10 +136,9 @@ public class ReadClipper {
|
||||||
ops.clear();
|
ops.clear();
|
||||||
if ( clippedRead.isEmpty() )
|
if ( clippedRead.isEmpty() )
|
||||||
return GATKSAMRecord.emptyRead(clippedRead);
|
return GATKSAMRecord.emptyRead(clippedRead);
|
||||||
// return new GATKSAMRecord( clippedRead.getHeader() );
|
|
||||||
return clippedRead;
|
return clippedRead;
|
||||||
} catch (CloneNotSupportedException e) {
|
} catch (CloneNotSupportedException e) {
|
||||||
throw new RuntimeException(e); // this should never happen
|
throw new RuntimeException(e); // this should never happen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -188,7 +188,6 @@ public class ReadClipper {
|
||||||
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
|
||||||
this.addOp(new ClippingOp(start, stop));
|
this.addOp(new ClippingOp(start, stop));
|
||||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
|
@ -213,14 +212,12 @@ public class ReadClipper {
|
||||||
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||||
if (read.isEmpty() || left == right)
|
if (read.isEmpty() || left == right)
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
||||||
|
|
||||||
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
|
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
|
||||||
// make the left cut index no longer part of the read. In that case, clip the read entirely.
|
// make the left cut index no longer part of the read. In that case, clip the read entirely.
|
||||||
if (left > leftTailRead.getAlignmentEnd())
|
if (left > leftTailRead.getAlignmentEnd())
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
|
||||||
ReadClipper clipper = new ReadClipper(leftTailRead);
|
ReadClipper clipper = new ReadClipper(leftTailRead);
|
||||||
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
|
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
|
||||||
|
|
@ -402,17 +399,77 @@ public class ReadClipper {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Turns soft clipped bases into matches
|
* Turns soft clipped bases into matches
|
||||||
*
|
|
||||||
* @return a new read with every soft clip turned into a match
|
* @return a new read with every soft clip turned into a match
|
||||||
*/
|
*/
|
||||||
private GATKSAMRecord revertSoftClippedBases() {
|
private GATKSAMRecord revertSoftClippedBases() {
|
||||||
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
|
if (read.isEmpty())
|
||||||
|
return read;
|
||||||
|
|
||||||
|
this.addOp(new ClippingOp(0, 0));
|
||||||
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
|
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverts ALL soft-clipped bases
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return the read with all soft-clipped bases turned into matches
|
||||||
|
*/
|
||||||
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
|
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
|
||||||
return (new ReadClipper(read)).revertSoftClippedBases();
|
return (new ReadClipper(read)).revertSoftClippedBases();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverts only soft clipped bases with quality score greater than or equal to minQual
|
||||||
|
*
|
||||||
|
* Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @param minQual the mininum base quality score to revert the base (inclusive)
|
||||||
|
* @return the read with high quality soft clips reverted
|
||||||
|
*/
|
||||||
|
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
|
||||||
|
int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||||
|
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
|
||||||
|
return GATKSAMRecord.emptyRead(read);
|
||||||
|
|
||||||
|
byte [] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||||
|
int left = -1;
|
||||||
|
|
||||||
|
if (nLeadingSoftClips > 0) {
|
||||||
|
for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
|
||||||
|
if (quals[i] >= minQual)
|
||||||
|
left = i;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int right = -1;
|
||||||
|
int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
|
||||||
|
if (nTailingSoftClips > 0) {
|
||||||
|
for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {
|
||||||
|
if (quals[i] >= minQual)
|
||||||
|
right = i;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GATKSAMRecord clippedRead = read;
|
||||||
|
if (right >= 0) {
|
||||||
|
if (right + 1 < clippedRead.getReadLength())
|
||||||
|
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail
|
||||||
|
clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing
|
||||||
|
}
|
||||||
|
if (left >= 0) {
|
||||||
|
if (left - 1 > 0)
|
||||||
|
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail
|
||||||
|
clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing
|
||||||
|
}
|
||||||
|
return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
||||||
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
||||||
|
|
@ -446,9 +503,6 @@ public class ReadClipper {
|
||||||
stop = read.getReadLength() - 1;
|
stop = read.getReadLength() - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ((start == 0 && stop == read.getReadLength() - 1))
|
|
||||||
// return GATKSAMRecord.emptyRead(read);
|
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ public class FragmentUtils {
|
||||||
GATKSAMRecord firstRead = overlappingPair.get(0);
|
GATKSAMRecord firstRead = overlappingPair.get(0);
|
||||||
GATKSAMRecord secondRead = overlappingPair.get(1);
|
GATKSAMRecord secondRead = overlappingPair.get(1);
|
||||||
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||||
firstRead = overlappingPair.get(1);
|
firstRead = overlappingPair.get(1); // swap them
|
||||||
secondRead = overlappingPair.get(0);
|
secondRead = overlappingPair.get(0);
|
||||||
}
|
}
|
||||||
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||||
|
|
@ -177,12 +177,13 @@ public class FragmentUtils {
|
||||||
quals[iii] = secondReadQuals[iii-firstReadStop];
|
quals[iii] = secondReadQuals[iii-firstReadStop];
|
||||||
}
|
}
|
||||||
|
|
||||||
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
||||||
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
returnRead.setAlignmentStart( firstRead.getUnclippedStart() );
|
||||||
returnRead.setReadBases( bases );
|
returnRead.setReadBases( bases );
|
||||||
returnRead.setBaseQualities( quals );
|
returnRead.setBaseQualities( quals );
|
||||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||||
returnRead.setReferenceName( firstRead.getReferenceName() );
|
returnRead.setReferenceName( firstRead.getReferenceName() );
|
||||||
|
returnRead.setReadName( firstRead.getReadName() );
|
||||||
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
||||||
final ArrayList<CigarElement> cList = new ArrayList<CigarElement>();
|
final ArrayList<CigarElement> cList = new ArrayList<CigarElement>();
|
||||||
cList.add(c);
|
cList.add(c);
|
||||||
|
|
|
||||||
|
|
@ -428,8 +428,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
* Use this method if you want to create a new empty GATKSAMRecord based on
|
* Use this method if you want to create a new empty GATKSAMRecord based on
|
||||||
* another GATKSAMRecord
|
* another GATKSAMRecord
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read a read to copy the header from
|
||||||
* @return
|
* @return a read with no bases but safe for the GATK
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
||||||
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class ReadUnclippedStartWithNoTiesComparator implements Comparator<SAMRecord> {
|
||||||
|
@Requires("c1 >= 0 && c2 >= 0")
|
||||||
|
@Ensures("result == 0 || result == 1 || result == -1")
|
||||||
|
private int compareContigs(int c1, int c2) {
|
||||||
|
if (c1 == c2)
|
||||||
|
return 0;
|
||||||
|
else if (c1 > c2)
|
||||||
|
return 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("r1 != null && r2 != null")
|
||||||
|
@Ensures("result == 0 || result == 1 || result == -1")
|
||||||
|
public int compare(SAMRecord r1, SAMRecord r2) {
|
||||||
|
int result;
|
||||||
|
|
||||||
|
if (r1 == r2)
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
else if (r1.getReadUnmappedFlag())
|
||||||
|
result = 1;
|
||||||
|
else if (r2.getReadUnmappedFlag())
|
||||||
|
result = -1;
|
||||||
|
else {
|
||||||
|
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
|
||||||
|
|
||||||
|
if (cmpContig != 0)
|
||||||
|
result = cmpContig;
|
||||||
|
|
||||||
|
else {
|
||||||
|
if (r1.getUnclippedStart() < r2.getUnclippedStart())
|
||||||
|
result = -1;
|
||||||
|
else
|
||||||
|
result = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue