Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7c58d8e37d
|
|
@ -247,7 +247,7 @@ public class ClippingOp {
|
||||||
return newCigar;
|
return newCigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||||
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
||||||
if (start == 0 && stop == read.getReadLength() - 1)
|
if (start == 0 && stop == read.getReadLength() - 1)
|
||||||
return new GATKSAMRecord(read.getHeader());
|
return new GATKSAMRecord(read.getHeader());
|
||||||
|
|
@ -373,6 +373,10 @@ public class ClippingOp {
|
||||||
while(cigarElementIterator.hasNext()) {
|
while(cigarElementIterator.hasNext()) {
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||||
|
|
||||||
|
// if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
|
totalHardClipCount += cigarElement.getLength();
|
||||||
}
|
}
|
||||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,7 @@ public class ReadClipper {
|
||||||
return hardClipByReferenceCoordinates(refStart, -1);
|
return hardClipByReferenceCoordinates(refStart, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires("!read.getReadUnmappedFlag()")
|
||||||
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
|
|
||||||
|
|
@ -252,77 +252,56 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testHardClipSoftClippedBases() {
|
public void testHardClipSoftClippedBases() {
|
||||||
|
|
||||||
// Generate a list of cigars to test
|
// Generate a list of cigars to test
|
||||||
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
|
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
|
||||||
//logger.warn("Testing Cigar: "+cigar.toString());
|
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar));
|
readClipper = new ReadClipper(read);
|
||||||
|
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
|
||||||
|
|
||||||
int clipStart = 0;
|
int sumHardClips = 0;
|
||||||
int clipEnd = 0;
|
int sumMatches = 0;
|
||||||
boolean expectEmptyRead = false;
|
|
||||||
|
|
||||||
List<CigarElement> cigarElements = cigar.getCigarElements();
|
boolean tail = true;
|
||||||
int CigarListLength = cigarElements.size();
|
for (CigarElement element : read.getCigar().getCigarElements()) {
|
||||||
|
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
|
||||||
|
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
|
tail = true;
|
||||||
|
|
||||||
// It will know what needs to be clipped based on the start and end of the string, hardclips and softclips
|
// Adds all H, S and D's (next to hard/soft clips).
|
||||||
// are added to the amount to clip
|
// All these should be hard clips after clipping.
|
||||||
if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) {
|
if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
|
||||||
//clipStart += cigarElements.get(0).getLength();
|
sumHardClips += element.getLength();
|
||||||
if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
|
|
||||||
clipStart += cigarElements.get(1).getLength();
|
// this means we're no longer on the tail (insertions can still potentially be the tail because
|
||||||
// Check for leading indel
|
// of the current contract of clipping out hanging insertions
|
||||||
if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) {
|
else if (element.getOperator() != CigarOperator.INSERTION)
|
||||||
expectEmptyRead = true;
|
tail = false;
|
||||||
}
|
|
||||||
}
|
// Adds all matches to verify that they remain the same after clipping
|
||||||
// Check for leading indel
|
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
||||||
else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
|
sumMatches += element.getLength();
|
||||||
expectEmptyRead = true;
|
|
||||||
}
|
|
||||||
} else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
|
|
||||||
clipStart += cigarElements.get(0).getLength();
|
|
||||||
// Check for leading indel
|
|
||||||
if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
|
|
||||||
expectEmptyRead = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//Check for leading indel
|
|
||||||
else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
|
|
||||||
expectEmptyRead = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) {
|
for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
|
||||||
//clipEnd += cigarElements.get(CigarListLength - 1).getLength();
|
// Test if clipped read has Soft Clips (shouldn't have any!)
|
||||||
if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP)
|
Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
|
||||||
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
|
|
||||||
} else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
|
|
||||||
clipEnd += cigarElements.get(CigarListLength - 1).getLength();
|
|
||||||
|
|
||||||
String readBases = readClipper.read.getReadString();
|
// Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
|
||||||
String baseQuals = readClipper.read.getBaseQualityString();
|
if (element.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
|
sumHardClips -= element.getLength();
|
||||||
|
|
||||||
// "*" is the default empty-sequence-string and for our test it needs to be changed to ""
|
// Make sure all matches are still there
|
||||||
if (readBases.equals("*"))
|
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
||||||
readBases = "";
|
sumMatches -= element.getLength();
|
||||||
if (baseQuals.equals("*"))
|
}
|
||||||
baseQuals = "";
|
Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
|
||||||
|
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
|
||||||
|
|
||||||
logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
|
|
||||||
cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd),
|
logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
||||||
baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
|
|
||||||
//if (expectEmptyRead)
|
|
||||||
// testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
|
|
||||||
//else
|
|
||||||
ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
|
|
||||||
readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
|
|
||||||
baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
|
|
||||||
logger.warn("Cigar: " + cigar.toString() + " PASSED!");
|
|
||||||
}
|
}
|
||||||
// We will use testParameter in the following way
|
|
||||||
// Right tail, left tail,
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue