From 6230315ff2ae7e7a000d064d98b403d7ef6383ec Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 27 Jul 2011 22:51:21 -0400 Subject: [PATCH 1/2] Along with my half-written commit message from earlier, I also forgot to commit the integration test updates. This is what happens when you try to do things 30 seconds before you leave for the day. To finish up from before: complex events weren't being padded with the reference base as per the VCF spec. They are now. --- .../VariantContextIntegrationTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index a344817a0..ced2bf00b 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -19,14 +19,14 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); + expectations.put("-L 1:1-10000 --printPerLocus", "e9d96677a57bc3a10fb6d9ba942c19f0"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "8a1174d2b18b98e624abbe93e6af8fdd"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "3933f1fae5453c54c3f791a23de07599"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "c9cf2f01bf045a58dcc7649fd6ea2396"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "a103d856e8bc558c949c6e3f184e8913"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5f2265ac6c6d80d64dc6e69a05c1250b"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "06a3ae4c0afa23b429a9491ab7707f3c"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); } @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("529f936aa6c303658b23caf4e527782f")); + Arrays.asList("2532234d2c934a5e14849655dd7b5f4f")); executeTest("testLargeScaleConversion", spec); } } From 1afc49a2973b35eab207e6d7e13707975742dace Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 28 Jul 2011 13:55:58 -0400 Subject: [PATCH 2/2] There are some really 'interesting' (but apparently valid) records in the Mus musculus dbSNP file. Generalized the handling of complex cases in the dbSNP adaptor to handle it all. I just grabbed the actual Mus musculus dbSNP file as a test, ran it whole genome, and confirmed that we finally produce a valid VCF on it. Should be the last commit needed on this adaptor. --- .../sting/gatk/refdata/VariantContextAdaptors.java | 11 +++++++---- .../variantcontext/VariantContextIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index dedd2f26e..ba9a10d8b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -112,26 +112,29 @@ public class VariantContextAdaptors { alleles.add(refAllele); // add all of the alt alleles + boolean sawNullAllele = false; for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } - alleles.add(Allele.create(alt, false)); + Allele altAllele = Allele.create(alt, false); + alleles.add(altAllele); + if ( altAllele.isNull() ) + sawNullAllele = true; } Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - boolean vcIsDeletion = DbSNPHelper.isDeletion(dbsnp) || DbSNPHelper.isComplexIndel(dbsnp); - if ( vcIsDeletion ) { + if ( sawNullAllele ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (vcIsDeletion ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); return vc; } else return null; // can't handle anything else diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index ced2bf00b..6ed00f0ea 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("2532234d2c934a5e14849655dd7b5f4f")); + Arrays.asList("045a5b02c86aeb9301dc0b724da0c8f7")); executeTest("testLargeScaleConversion", spec); } }