Bugfix for ReadClipper with ReducedReads

-- The previous version of the read clipping operations wouldn't modify the reduced reads counts, so hardClipToRegion would result in a read with, say, 50 bp of sequence and base qualities but 250 bp of reduced read counts.  Updated the hardClip operation to handle reduce reads, and added a unit test to make sure this works properly.  Also had to update GATKSAMRecord.emptyRead() to set the reduced count to new byte[0] if the template read is a reduced read
-- Update md5s, where the new code recovers a TP variant with count 2 that was missed previously
This commit is contained in:
Mark DePristo 2013-04-29 10:05:18 -04:00
parent 5dd73ba2d1
commit 0387ea8df9
4 changed files with 42 additions and 2 deletions

View File

@ -188,7 +188,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("3c87eb93ffe3a0166aca753050b981e1"));
Arrays.asList("0df626cd0d76aca8a05a545d0b36bf23"));
executeTest("HC calling on a ReducedRead BAM", spec);
}

View File

@ -378,7 +378,13 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION);
hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION);
}
if (read.isReducedRead()) {
final byte[] reducedCounts = new byte[newLength];
System.arraycopy(read.getReducedReadCounts(), copyStart, reducedCounts, 0, newLength);
hardClippedRead.setReducedReadCounts(reducedCounts);
}
return hardClippedRead;
}

View File

@ -393,6 +393,22 @@ public class GATKSAMRecord extends BAMRecord {
setAttribute(REDUCED_READ_CONSENSUS_TAG, counts);
}
/**
* Set the reduced read counts tag for this record to counts
*
* Note that this function does not set the REDUCED_READ_CONSENSUS_TAG value, it's purely for manipulating
* the underlying reduced reads count
*
* TODO -- this function needs to be fixed when the RR spec is set to 2.0
*
* @param counts the count array
*/
public void setReducedReadCounts(final byte[] counts) {
if ( counts.length != getReadBases().length ) throw new IllegalArgumentException("Reduced counts length " + counts.length + " != bases length " + getReadBases().length);
retrievedReduceReadCounts = true;
reducedReadCounts = counts;
}
/**
* The number of bases corresponding the i'th base of the reduced read.
*
@ -678,6 +694,7 @@ public class GATKSAMRecord extends BAMRecord {
emptyRead.setCigarString("");
emptyRead.setReadBases(new byte[0]);
emptyRead.setBaseQualities(new byte[0]);
if ( read.isReducedRead() ) emptyRead.setReducedReadCounts(new byte[0]);
SAMReadGroupRecord samRG = read.getReadGroup();
emptyRead.clearAttributes();

View File

@ -35,6 +35,7 @@ import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
@ -348,4 +349,20 @@ public class ReadClipperUnitTest extends BaseTest {
}
@Test(enabled = true)
public void testHardClipReducedRead() {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("10M");
final byte[] counts = new byte[read.getReadLength()];
for ( int i = 0; i < counts.length; i++ ) counts[i] = (byte)i;
read.setReducedReadCounts(counts);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
int readLength = read.getReadLength();
for (int i = 0; i < readLength / 2; i++) {
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
final byte[] expectedReducedCounts = Arrays.copyOfRange(counts, i + 1, readLength - i - 1);
Assert.assertEquals(clippedRead.getReducedReadCounts(), expectedReducedCounts);
}
}
}