Fixed up edge condition for clipping long reads in the HMM.

MD5s change because some reads were incorrectly getting clipped before.

[delivers #62584746]
This commit is contained in:
Eric Banks 2014-01-01 00:34:11 -05:00
parent 9355598129
commit bb4c4b1fcd
5 changed files with 15 additions and 7 deletions

View File

@ -262,7 +262,7 @@ public class PairHMMIndelErrorModel {
* @return true if the read needs to be clipped, false otherwise
*/
protected static boolean mustClipDownstream(final GATKSAMRecord read, final int refWindowStop) {
return ( !read.isEmpty() && read.getSoftStart() < refWindowStop && read.getSoftStart() + read.getReadLength() > refWindowStop );
return ( !read.isEmpty() && read.getSoftStart() < refWindowStop && read.getSoftStart() + read.getReadLength() - 1 > refWindowStop );
}
/**
@ -316,7 +316,7 @@ public class PairHMMIndelErrorModel {
// if the read extends beyond the downstream (right) end of the reference window, clip it
if ( mustClipDownstream(read, refWindowStop) )
read = ReadClipper.hardClipByReadCoordinates(read, read.getSoftStart() + read.getReadLength() - refWindowStop + 1, read.getReadLength() - 1);
read = ReadClipper.hardClipByReadCoordinates(read, refWindowStop - read.getSoftStart() + 1, read.getReadLength() - 1);
// if the read extends beyond the upstream (left) end of the reference window, clip it
if ( mustClipUpstream(read, refWindowStart) )

View File

@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("3d12bdb816d27bf7c9efb4c13dc2aec7"));
Arrays.asList("e10c49fcf9a128745c2b050a52798e58"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("a2c8e83f37cd1e114b42af4b873f57bc"));
Arrays.asList("903af514f70db9238064da311c4ea0de"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}

View File

@ -260,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("150b31ba05113ca1996b548be5170d6d"));
Arrays.asList("c4248f02103e37e89b0f22c0d9c98492"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -279,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("7d0ee85cd89f4addd84c5511daaaa5c5"));
Arrays.asList("96c7862d55e933b274cabe45c9c443d9"));
executeTest(String.format("test calling with BAQ"), spec);
}

View File

@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("ec0977e3fd3e2ac29c9821f0ca830455"));
Arrays.asList("710d379607129935b1b7b6960ca7b213"));
executeTest("test MultiSample Pilot1", spec);
}

View File

@ -130,4 +130,12 @@ public class PairHMMIndelErrorModelUnitTest extends BaseTest {
final boolean result = PairHMMIndelErrorModel.mustClipDownstream(read, refWindowEnd);
Assert.assertEquals(result, read.getSoftStart() < refWindowEnd && read.getSoftStart() + readLength > refWindowEnd);
}
@Test
public void clipDownstreamAtBorderTest() {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, 5, 10);
read.setCigarString("10M");
Assert.assertEquals(PairHMMIndelErrorModel.mustClipDownstream(read, 13), true);
Assert.assertEquals(PairHMMIndelErrorModel.mustClipDownstream(read, 14), false);
}
}