getRefCoordSoftUnclippedStart and End refactor
These functions are methods of the read, and supplement getAlignmentStart() and getUnclippedStart() by calculating the unclipped start counting only soft clips. * Removed from ReadUtils * Added to GATKSAMRecord * Changed name to getSoftStart() and getSoftEnd * Updated third party code accordingly.
This commit is contained in:
parent
07128a2ad2
commit
cadff40247
|
|
@ -24,10 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.BAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
|
@ -273,5 +271,52 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
setReadGroup(rg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the reference coordinate for the beginning of the read taking into account soft clips but not hard clips.
|
||||
*
|
||||
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
|
||||
*
|
||||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
||||
public int getSoftStart() {
|
||||
int start = this.getUnclippedStart();
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
start += cigarElement.getLength();
|
||||
else
|
||||
break;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
||||
*
|
||||
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
|
||||
*
|
||||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
||||
public int getSoftEnd() {
|
||||
int stop = this.getUnclippedStart();
|
||||
|
||||
if (ReadUtils.readIsEntirelyInsertion(this))
|
||||
return stop;
|
||||
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
}
|
||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,8 +299,8 @@ public class ReadUtils {
|
|||
*/
|
||||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) {
|
||||
|
||||
int sStart = getRefCoordSoftUnclippedStart(read);
|
||||
int sStop = getRefCoordSoftUnclippedEnd(read);
|
||||
int sStart = read.getSoftStart();
|
||||
int sStop = read.getSoftEnd();
|
||||
int uStart = read.getUnclippedStart();
|
||||
int uStop = read.getUnclippedEnd();
|
||||
|
||||
|
|
@ -334,40 +334,6 @@ public class ReadUtils {
|
|||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||
}
|
||||
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
|
||||
int start = read.getUnclippedStart();
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
start += cigarElement.getLength();
|
||||
else
|
||||
break;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) {
|
||||
int stop = read.getUnclippedStart();
|
||||
|
||||
if (readIsEntirelyInsertion(read))
|
||||
return stop;
|
||||
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
}
|
||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
|
||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
||||
|
|
@ -407,14 +373,14 @@ public class ReadUtils {
|
|||
* @param refCoord
|
||||
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
|
||||
*/
|
||||
@Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"})
|
||||
@Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
|
||||
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
|
||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
boolean fallsInsideDeletion = false;
|
||||
|
||||
int goal = refCoord - getRefCoordSoftUnclippedStart(read); // The goal is to move this many reference bases
|
||||
int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -90,13 +89,13 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
|
|
@ -111,7 +110,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
|
|
@ -127,7 +126,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
|
|
|
|||
Loading…
Reference in New Issue