Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-12-15 12:52:46 -05:00
commit 7c58d8e37d
3 changed files with 44 additions and 60 deletions

View File

@ -247,7 +247,7 @@ public class ClippingOp {
return newCigar;
}
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1)
return new GATKSAMRecord(read.getHeader());
@ -373,6 +373,10 @@ public class ClippingOp {
while(cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
// if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += cigarElement.getLength();
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}

View File

@ -58,6 +58,7 @@ public class ReadClipper {
return hardClipByReferenceCoordinates(refStart, -1);
}
@Requires("!read.getReadUnmappedFlag()")
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);

View File

@ -252,77 +252,56 @@ public class ReadClipperUnitTest extends BaseTest {
}
@Test(enabled = false)
@Test(enabled = true)
public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
//logger.warn("Testing Cigar: "+cigar.toString());
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar));
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
int clipStart = 0;
int clipEnd = 0;
boolean expectEmptyRead = false;
int sumHardClips = 0;
int sumMatches = 0;
List<CigarElement> cigarElements = cigar.getCigarElements();
int CigarListLength = cigarElements.size();
boolean tail = true;
for (CigarElement element : read.getCigar().getCigarElements()) {
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
tail = true;
// It will know what needs to be clipped based on the start and end of the string, hardclips and softclips
// are added to the amount to clip
if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) {
//clipStart += cigarElements.get(0).getLength();
if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(1).getLength();
// Check for leading indel
if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
// Check for leading indel
else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
} else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(0).getLength();
// Check for leading indel
if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
//Check for leading indel
else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
// Adds all H, S and D's (next to hard/soft clips).
// All these should be hard clips after clipping.
if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
sumHardClips += element.getLength();
// this means we're no longer on the tail (insertions can still potentially be the tail because
// of the current contract of clipping out hanging insertions
else if (element.getOperator() != CigarOperator.INSERTION)
tail = false;
// Adds all matches to verify that they remain the same after clipping
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
sumMatches += element.getLength();
}
if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) {
//clipEnd += cigarElements.get(CigarListLength - 1).getLength();
if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
} else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 1).getLength();
for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
// Test if clipped read has Soft Clips (shouldn't have any!)
Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
String readBases = readClipper.read.getReadString();
String baseQuals = readClipper.read.getBaseQualityString();
// Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
if (element.getOperator() == CigarOperator.HARD_CLIP)
sumHardClips -= element.getLength();
// "*" is the default empty-sequence-string and for our test it needs to be changed to ""
if (readBases.equals("*"))
readBases = "";
if (baseQuals.equals("*"))
baseQuals = "";
// Make sure all matches are still there
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
sumMatches -= element.getLength();
}
Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
//if (expectEmptyRead)
// testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
//else
ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
logger.warn("Cigar: " + cigar.toString() + " PASSED!");
logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
}
// We will use testParameter in the following way
// Right tail, left tail,
}
}