getRefCoordSoftUnclippedStart and End refactor

These functions are methods of the read, and supplement getAlignmentStart() and getUnclippedStart() by calculating the unclipped start counting only soft clips.

* Removed from ReadUtils
* Added to GATKSAMRecord
* Changed name to getSoftStart() and getSoftEnd
* Updated third party code accordingly.
This commit is contained in:
Mauricio Carneiro 2011-12-20 14:58:18 -05:00
parent 07128a2ad2
commit cadff40247
3 changed files with 57 additions and 47 deletions

View File

@ -24,10 +24,8 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.BAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import com.google.java.contract.Ensures;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.NGSPlatform;
import java.util.HashMap;
@ -273,5 +271,52 @@ public class GATKSAMRecord extends BAMRecord {
setReadGroup(rg);
}
/**
* Calculates the reference coordinate for the beginning of the read taking into account soft clips but not hard clips.
*
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
*
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftStart() {
int start = this.getUnclippedStart();
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
start += cigarElement.getLength();
else
break;
}
return start;
}
/**
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
*
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
*
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftEnd() {
int stop = this.getUnclippedStart();
if (ReadUtils.readIsEntirelyInsertion(this))
return stop;
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
}

View File

@ -299,8 +299,8 @@ public class ReadUtils {
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) {
int sStart = getRefCoordSoftUnclippedStart(read);
int sStop = getRefCoordSoftUnclippedEnd(read);
int sStart = read.getSoftStart();
int sStop = read.getSoftEnd();
int uStart = read.getUnclippedStart();
int uStop = read.getUnclippedEnd();
@ -334,40 +334,6 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
start += cigarElement.getLength();
else
break;
}
return start;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) {
int stop = read.getUnclippedStart();
if (readIsEntirelyInsertion(read))
return stop;
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
@ -407,14 +373,14 @@ public class ReadUtils {
* @param refCoord
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
@Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"})
@Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
boolean fallsInsideDeletion = false;
int goal = refCoord - getRefCoordSoftUnclippedStart(read); // The goal is to move this many reference bases
int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -90,13 +89,13 @@ public class ReadClipperUnitTest extends BaseTest {
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
for (int i=alnStart; i<=alnEnd; i++) {
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
@ -111,7 +110,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
@ -127,7 +126,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.