Bug fix for the case of merging two VCs when a deletion deletes the padding base for a consecutive indel. Added unit test to cover this case.

This commit is contained in:
Ryan Poplin 2012-07-30 12:09:23 -04:00
parent 7630c929a7
commit 7a73042cd3
3 changed files with 16 additions and 9 deletions

View File

@ -423,16 +423,17 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart();
byte[] refBases = ( new byte[]{} );
byte[] altBases = ( new byte[]{} );
byte[] refBases = new byte[]{};
byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte);
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0;

View File

@ -353,6 +353,16 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make();

View File

@ -30,10 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
// TODO -- Ryan, do you know why the md5s changed just for the rank sum tests?
final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6";
final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88";
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5);
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -46,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
}
}