Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-12-15 12:52:46 -05:00
commit 7c58d8e37d
3 changed files with 44 additions and 60 deletions

View File

@ -247,7 +247,7 @@ public class ClippingOp {
return newCigar; return newCigar;
} }
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"}) @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) { private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1) if (start == 0 && stop == read.getReadLength() - 1)
return new GATKSAMRecord(read.getHeader()); return new GATKSAMRecord(read.getHeader());
@ -373,6 +373,10 @@ public class ClippingOp {
while(cigarElementIterator.hasNext()) { while(cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next(); cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
// if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += cigarElement.getLength();
} }
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} }

View File

@ -58,6 +58,7 @@ public class ReadClipper {
return hardClipByReferenceCoordinates(refStart, -1); return hardClipByReferenceCoordinates(refStart, -1);
} }
@Requires("!read.getReadUnmappedFlag()")
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);

View File

@ -252,77 +252,56 @@ public class ReadClipperUnitTest extends BaseTest {
} }
@Test(enabled = false) @Test(enabled = true)
public void testHardClipSoftClippedBases() { public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test // Generate a list of cigars to test
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) { for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
//logger.warn("Testing Cigar: "+cigar.toString()); GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar)); readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
int clipStart = 0; int sumHardClips = 0;
int clipEnd = 0; int sumMatches = 0;
boolean expectEmptyRead = false;
List<CigarElement> cigarElements = cigar.getCigarElements(); boolean tail = true;
int CigarListLength = cigarElements.size(); for (CigarElement element : read.getCigar().getCigarElements()) {
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
tail = true;
// It will know what needs to be clipped based on the start and end of the string, hardclips and softclips // Adds all H, S and D's (next to hard/soft clips).
// are added to the amount to clip // All these should be hard clips after clipping.
if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) { if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
//clipStart += cigarElements.get(0).getLength(); sumHardClips += element.getLength();
if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(1).getLength(); // this means we're no longer on the tail (insertions can still potentially be the tail because
// Check for leading indel // of the current contract of clipping out hanging insertions
if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) { else if (element.getOperator() != CigarOperator.INSERTION)
expectEmptyRead = true; tail = false;
}
} // Adds all matches to verify that they remain the same after clipping
// Check for leading indel if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { sumMatches += element.getLength();
expectEmptyRead = true;
}
} else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(0).getLength();
// Check for leading indel
if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
//Check for leading indel
else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
} }
if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) { for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
//clipEnd += cigarElements.get(CigarListLength - 1).getLength(); // Test if clipped read has Soft Clips (shouldn't have any!)
if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP) Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
} else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 1).getLength();
String readBases = readClipper.read.getReadString(); // Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
String baseQuals = readClipper.read.getBaseQualityString(); if (element.getOperator() == CigarOperator.HARD_CLIP)
sumHardClips -= element.getLength();
// "*" is the default empty-sequence-string and for our test it needs to be changed to "" // Make sure all matches are still there
if (readBases.equals("*")) if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
readBases = ""; sumMatches -= element.getLength();
if (baseQuals.equals("*")) }
baseQuals = ""; Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd), logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
//if (expectEmptyRead)
// testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
//else
ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
logger.warn("Cigar: " + cigar.toString() + " PASSED!");
} }
// We will use testParameter in the following way
// Right tail, left tail,
} }
} }