From e24c8d00d56afbeeb0bd8c2f4ec7c87cbe25bda6 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 10 Sep 2009 14:28:43 +0000 Subject: [PATCH] So, the VCF spec allows for an optional meta field in the header representing the date. However, using this field means that integration tests run on the vcf file will fail the MD5 test (which is what happened to the VariantFiltration test this morning after working just fine yesterday). After consulting our resident expert (Aaron), we're going to (temporarily) remove the date from the vcf output until we can come up with a better solution. However, this shouldn't cause any short-term problems because the data truly is optional. VF test's MD5s are updated. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1580 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variantstovcf/VariantsToVCF.java | 6 +++-- .../VariantFiltrationIntegrationTest.java | 22 +++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 145b844ed..2e5a5407b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -46,10 +46,12 @@ public class VariantsToVCF extends RefWalker { Map metaData = new HashMap(); List additionalColumns = new ArrayList(); - Calendar cal = Calendar.getInstance(); + // Don't output the data for now because it kills our unit test MD5s and is optional + // TODO - figure out what to do here + //Calendar cal = Calendar.getInstance(); + //metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH))); metaData.put("format", "VCRv3.2"); - metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH))); metaData.put("source", "VariantsToVCF"); metaData.put("reference", args.referenceFile.getAbsolutePath()); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index abe61d420..8832f13b8 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -9,63 +9,63 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testIntervals() { - String[] md5DoC = {"c6ac824171bc2abcd6a896ba07d0c072", "21c8e1f9dc65fdfb39347547f9b04011"}; + String[] md5DoC = {"492599c4d7d6dfca29659a7be3e3b7d4", "21c8e1f9dc65fdfb39347547f9b04011"}; WalkerTestSpec spec1 = new WalkerTestSpec( "-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5DoC)); executeTest("testDoCFilter", spec1); - String[] md5AlleleBalance = {"06fa8901c6823766b89ec28acc435c47", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; + String[] md5AlleleBalance = {"33ce9e974efa7c095e3124f0ecad14b3", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; WalkerTestSpec spec2 = new WalkerTestSpec( "-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5AlleleBalance)); executeTest("testAlleleBalanceFilter", spec2); - String[] md5Strand = {"3f77d329a2f9223423a19665cf7eebbd", "0f7db0aad764268ee8fa3b857df8d87d"}; + String[] md5Strand = {"f30ca865290e1c3537a8a30e4c3a5df2", "0f7db0aad764268ee8fa3b857df8d87d"}; WalkerTestSpec spec3 = new WalkerTestSpec( "-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Strand)); executeTest("testStrandFilter", spec3); - String[] md5Lod = {"2c4251fc21998705458a9d25386f522f", "7e0c4f2b0fda85fd2891eee76c396a55"}; + String[] md5Lod = {"948963861d9d2260e9f5ed6447aa30cb", "7e0c4f2b0fda85fd2891eee76c396a55"}; WalkerTestSpec spec4 = new WalkerTestSpec( "-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Lod)); executeTest("testLodFilter", spec4); - String[] md5MQ0 = {"8d0dbd951241177dddc4aab8b747ed86", "3203de335621851bccf596242b079e23"}; + String[] md5MQ0 = {"57ac3b2df0590fe189e4462560cba686", "3203de335621851bccf596242b079e23"}; WalkerTestSpec spec5 = new WalkerTestSpec( "-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ0)); executeTest("testMappingQuality0Filter", spec5); - String[] md5MQ = {"638719c44704fbac599dfc957a3dce06", "07c5e5b8b3b49a53a6617e63c9e56c71"}; + String[] md5MQ = {"b8306b693dfee52113d8ecb405ddf25a", "ecc777feedea61f7b570d114c2ab89b1"}; WalkerTestSpec spec6 = new WalkerTestSpec( - "-T VariantFiltration -X MappingQuality:min=5 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", + "-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ)); executeTest("testRMSMappingQualityFilter", spec6); - String[] md5OnOff = {"638719c44704fbac599dfc957a3dce06", "07c5e5b8b3b49a53a6617e63c9e56c71"}; + String[] md5OnOff = {"8f35d20e9b53ed5c5a161b15501705bf", "67f2e1bc025833b0fa31f47195198997"}; WalkerTestSpec spec7 = new WalkerTestSpec( - "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.8 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", + "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5OnOff)); executeTest("testOnOffGenotypeFilter", spec7); - String[] md5Clusters = {"22d6c46e3971573159d195c8d93e9293", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; + String[] md5Clusters = {"f5c7c5da0198c4aaafdfcfb3c356eedc", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; WalkerTestSpec spec8 = new WalkerTestSpec( "-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Clusters)); executeTest("testClusteredSnpsFilter", spec8); - String[] md5Indels = {"3d9ebd5ada1c355e44245a64bc801b11", "8e0e915a1cb63d7049e0671ed00101fe"}; + String[] md5Indels = {"30bf4c764f6dfd006d919ecaceee0166", "8e0e915a1cb63d7049e0671ed00101fe"}; WalkerTestSpec spec9 = new WalkerTestSpec( "-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2,