diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java index ca310e4ab..363ec2412 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -107,7 +107,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("5120f5f419cb32b37d5c56101f4c5da2")); + Arrays.asList("d584454019f21729757ed7685ff8d02c")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -141,7 +141,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("8436bf4acb81f2c03bb5ecf8a514d90a")); + Arrays.asList("fe6a7fdd3f7c1c77b31c2676aeb0ed8d")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index c4b994f71..153b5ffaa 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe //TODO the old MD5 is kept for the record. //TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach. // executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2"); - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "8449ea4fa85eac8834b049a0e3daee5a"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "6fcffeb383100c1a11f0f0bff0a6e426"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 54a5fe803..f69a353a0 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","2a0dcd92f3a07f45b7927e66404bc37e"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","62ef94ffa0670c6467cf90b27cc59a5d"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index ae5fc6850..12d775bb5 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -78,7 +78,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("f78975d22ac27a841d807765242d3c3f")); + Arrays.asList("8893a0ef99744757333ec6a85c31b753")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -105,7 +105,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("e89c2a0d30a409087af25fc06fd46404")); + Arrays.asList("92cbf13fd3ca797c2cacccf12890f40d")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -115,7 +115,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("af5475e45b6ffca990dd035448e6b0fe")); + Arrays.asList("618d6cdd823c9f88c03b58fc7ebf47b4")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -125,7 +125,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("f9b4ed435a53198d30199e8a5c98252d")); + Arrays.asList("889e43aa0d2c9ce07064817268c965a4")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -181,7 +181,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("0369f9c03df86486709a35a5a2f23192")); + Arrays.asList("8861c367dd20505e120af174a9d4f9f4")); executeTest("test minIndelFraction 0.0", spec); } @@ -189,7 +189,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("8bd1f903445255f24bd9ef677e1ba9a8")); + Arrays.asList("a3bb6d95764d6f5a7d2f78e09699d4ef")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 36c1ba0e1..822be96b5 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -94,7 +94,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("45f867c73dc3008e874eb3c009cd9674")); + Arrays.asList("06d166ecf40563c234902d1b340a3ff1")); executeTest("test SLOD", spec); } @@ -102,7 +102,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("905e9a2ae183fc152101314774707372")); + Arrays.asList("0538b7363d271f1f37d73dc3bd5b0071")); executeTest("test NDA", spec); } @@ -110,7 +110,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("ac14f19fa1e9cecc8246814eac393662")); + Arrays.asList("f54dafe8e9ac1402d230e4d83766d6cf")); executeTest("test using comp track", spec); } @@ -124,17 +124,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "be100d3f04592d0865d907177ee8c7e4"); + testOutputParameters("-sites_only", "dfd11f2f33dc4da50a2725bfb6f64dba"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9024aa3ccf7339b873444b792ff5cb91"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "a9c30d2bb179ea7a69dd0e7367fcd465"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "0d1e1fcc70fb4c159eac016f3085e5e9"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "791fbabf4a8a7a1d0735d57c4c6b4a5a"); } private void testOutputParameters(final String args, final String md5) { @@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("13d2cc1e0871f85a97fb1f73c052ea3e")); + Arrays.asList("38a2f8e5542f78c324e89d09eb545f07")); executeTest("test confidence 1", spec1); } @@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNoPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1, - Arrays.asList("d94ab69be622fb23c97f524234eb05b3")); + Arrays.asList("6f96d079f5889d8555880e6cb614a41d")); executeTest("test no prior 1", spec1); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testUserPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.001 -inputPrior 0.495", 1, - Arrays.asList("2f3c5fa06537c8a3f440fb70837702f6")); + Arrays.asList("6e48c44b58960954e7e49810d9e3b3ad")); executeTest("test user prior 1", spec1); } @@ -174,7 +174,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void emitPLsAtAllSites() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, - Arrays.asList("76f7a8508ad40cf35b68b07bc7e29a72")); + Arrays.asList("04630efda71e0e804cfa4b2e7461c083")); // GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail spec1.disableShadowBCF(); @@ -190,12 +190,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "e3c708bfeea99835512c1fac477d02e4" ); + testHeterozosity( 0.01, "3e6328e822a92911e591733438fe023c" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "1226aaa771b8f057646474d306a84866" ); + testHeterozosity( 1.0 / 1850, "e87e2ad4a6d86ba9cc627b397af9c681" ); } private void testHeterozosity(final double arg, final String md5) { @@ -274,7 +274,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("589f9dd567bb86ce74e5d48fd7cae0b2")); + Arrays.asList("9363de1caf569d565e0c377fe234f170")); executeTest(String.format("test multiple technologies"), spec); } @@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("be978fc8807fcbd514b2678adcdc2a28")); + Arrays.asList("5e9d48c296d50d553ae13f283b75e1ee")); executeTest(String.format("test calling with BAQ"), spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index af2480722..857474b55 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -94,7 +94,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("21b346710ccdbbfc904eafda2b0c7443")); + Arrays.asList("4cc795e0e2ec1ce31fd6545fb24a850e")); executeTest("test SingleSample Pilot2", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 569db99dd..d828a1c20 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -270,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testWrongGVCFNonVariantRecordOrderBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a22bafa2f91c197ea1ab13e02a0c08fb")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1c3d390b467a9b0e1e307419796142fd")); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } @@ -320,7 +320,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9db19d4706e32ce883c6e3f71e5f4178")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("169c2145c9981b8a1bb1d64a6d776d66")); spec.disableShadowBCF(); executeTest(" testAlleleSpecificAnnotations", spec); } @@ -329,7 +329,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testASMQMateRankSumAnnotation() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A AS_MQMateRankSumTest --disableDithering", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("85b26109c4586ba33c44e253297050be")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("75b78770469c5aaa73f1c95db8fda574")); spec.disableShadowBCF(); executeTest(" testASMQMateRankSumAnnotation", spec); } @@ -338,7 +338,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testASInsertSizeRankSum() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering -A AS_InsertSizeRankSum", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("f1531ed03d6f9538d4d6af7630ff2502")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fd0fcf44b1121c15e9d38de4171002c")); spec.disableShadowBCF(); executeTest(" testASInsertSizeRankSum", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 7e5ae8fdf..21b3ba5f9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -106,7 +106,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeBAMOutFlags() throws IOException { - HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "b49e15b4680d197db9b9f71644a5a1f9", "f84c8bc44b6af548bd3b8555a068b59e"); + HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "1ad8934dc0ea624ffeb89d3e877176b2", "f84c8bc44b6af548bd3b8555a068b59e"); } @Test @@ -202,7 +202,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6f5678f8fb6a4c0b21df1e6b0d1019ee"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "a048ea02eeb9610660e31f36e6114bf4"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("392ac1966cfa1cae78e8d674065a0d28")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("188ce2d74ee42f4187c7b41a01a193bb")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 2de0af25b..91edcc02b 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -566,7 +566,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("7261cde50d4556c6dc190b10222a8538")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d4dd179d8a53c4a550d4a22cc9ef1aa8")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } @@ -575,7 +575,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testASMateRankSumAnnotation() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V " + privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("08dc30527c98d78d09f8575efc3b84ae")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("515771fa88b0c4bc2a40e9c233806fb1")); spec.disableShadowBCF(); executeTest("testASMateRankSumAnnotation", spec); } @@ -584,7 +584,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testASInsertSizeRankSumAnnotation() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("723522fe1d05f2d11efda008b65884d2")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c8a096f2533d06c28ec115a24ffb7ca0")); spec.disableShadowBCF(); executeTest("testASInsertSizeRankSumAnnotation", spec); } @@ -597,7 +597,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testAlleleSpecificAnnotations_oneSample() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("cc2fe42eafd9789be49b550a639a7a1b")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("558486182a84d0a274534fc00fee326e")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations_oneSample", spec); } @@ -625,7 +625,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { public void testFractionInformativeReads() { final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header -A FractionInformativeReads --disableDithering -V " + privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf"; - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("16dbeec9d640e61453e902918427343a")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("70fda103f68709e32d691393e9228a9b")); spec.disableShadowBCF(); executeTest("testAlleleSpecificAnnotations", spec); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java index 9ddd62645..f16178d26 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/MannWhitneyU.java @@ -27,6 +27,7 @@ package org.broadinstitute.gatk.utils; import htsjdk.samtools.util.Histogram; import org.apache.commons.math3.distribution.NormalDistribution; +import org.apache.log4j.Logger; import java.util.*; @@ -39,6 +40,8 @@ import java.util.concurrent.ConcurrentHashMap; */ public class MannWhitneyU { + protected static Logger logger = Logger.getLogger(MannWhitneyU.class); + private static final class Rank implements Comparable { final double value; float rank; @@ -188,10 +191,9 @@ public class MannWhitneyU { } } - // Constructs a normal distribution; actual values of mean and SD don't matter since it's - // just used to convert a z-score into a cumulative probability - private static final double NORMAL_MEAN = 100; - private static final double NORMAL_SD = 15; + // Constructs a normal distribution; this needs to be a standard normal in order to get a Z-score in the exact case + private static final double NORMAL_MEAN = 0; + private static final double NORMAL_SD = 1; private static final NormalDistribution NORMAL = new NormalDistribution(NORMAL_MEAN, NORMAL_SD); /** @@ -206,11 +208,6 @@ public class MannWhitneyU { */ private int minimumNormalN = 10; - /** - * If the exact p-value is 1 then the z-score is infinite, so we need a max cutoff instead. - */ - private double maxZScore = 9.0; - /** * Sets the minimum number of values in each data series to use the normal distribution approximation. */ @@ -289,7 +286,8 @@ public class MannWhitneyU { for (int count : numOfTies) { //If every single datapoint is tied then we want to return a p-value of .5 and //the formula for sigma that includes the number of ties breaks down. Setting - //the number of ties to 0 in this case gets the desired result. + //the number of ties to 0 in this case gets the desired result in the normal + //approximation case. if (count != ranks.length) { transformedTies.add((count * count * count) - count); } @@ -424,15 +422,11 @@ public class MannWhitneyU { } } else { // TODO -- This exact test is only implemented for the one sided test, but we currently don't call the two sided version - Histogram distribution = permutationTest(series1, series2); - p = distribution.getCumulativeProbability(u); - if (p == 1) { - z = maxZScore; - } else if (p == 0) { - z = -1.0 * maxZScore; - } else { - z = NORMAL.inverseCumulativeProbability(p); + if (whichSide != TestType.FIRST_DOMINATES) { + logger.warn("An exact two-sided MannWhitneyU test was called. Only the one-sided exact test is implemented, use the approximation instead by setting minimumNormalN to 0."); } + p = permutationTest(series1, series2, u); + z = NORMAL.inverseCumulativeProbability(p); } return new Result(u, z, p, Math.abs(median(series1) - median(series2))); @@ -505,9 +499,10 @@ public class MannWhitneyU { * * @param series1 Data from group 1 * @param series2 Data from group 2 - * @return Histogram with u calculated for every possible permutation of group tag. + * @param testStatU Test statistic U from observed data + * @return P-value based on histogram with u calculated for every possible permutation of group tag. */ - public Histogram permutationTest(final double[] series1, final double[] series2) { + public double permutationTest(final double[] series1, final double[] series2, final double testStatU) { final Histogram histo = new Histogram(); final int n1 = series1.length; final int n2 = series2.length; @@ -552,7 +547,19 @@ public class MannWhitneyU { histo.increment(newU); } - return histo; + /** + * In order to deal with edge cases where the observed value is also the most extreme value, we are taking half + * of the count in the observed bin plus everything more extreme (in the FIRST_DOMINATES case the smaller bins) + * and dividing by the total count of everything in the histogram. Just using getCumulativeDistribution() gives + * a p-value of 1 in the most extreme case which doesn't result in a usable z-score. + */ + double sumOfAllSmallerBins = histo.get(testStatU).getValue() / 2.0; + + for (final Histogram.Bin bin : histo.values()) { + if (bin.getId() < testStatU) sumOfAllSmallerBins += bin.getValue(); + } + + return sumOfAllSmallerBins / histo.getCount(); } } diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java index 4e1bad337..a3e781343 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/MWUnitTest.java @@ -47,30 +47,6 @@ public class MWUnitTest extends BaseTest { private static final MannWhitneyU rst = new MannWhitneyU(); - @Test(dataProvider = "rankSumTestData") - public void testSimpleU(String name, double[] series1, double[] series2, double U) { - MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED); - System.out.println("=========================================================="); - System.out.println("Series 1: " + Arrays.toString(series1)); - System.out.println("Series 2: " + Arrays.toString(series2)); - System.out.println(" U: " + test.getU()); - System.out.println(" Z: " + test.getZ()); - System.out.println("2-side p: " + test.getP()); - Assert.assertEquals(test.getU(), U, name); - } - - @Test(dataProvider = "oneSidedPTestData") - public void testOnesidedP(String name, double[] series1, double[] series2, double P) { - MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES); - System.out.println("=========================================================="); - System.out.println("Series 1: " + Arrays.toString(series1)); - System.out.println("Series 2: " + Arrays.toString(series2)); - System.out.println(" U: " + test.getU()); - System.out.println(" Z: " + test.getZ()); - System.out.println("1-side p: " + test.getP()); - Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name); - } - @DataProvider(name="rankSumTestData") public Object[][] dataProvider() { return new Object[][] { @@ -94,21 +70,73 @@ public class MWUnitTest extends BaseTest { }; } + @Test(dataProvider = "rankSumTestData") + public void testSimpleU(String name, double[] series1, double[] series2, double U) { + MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED); + System.out.println("=========================================================="); + System.out.println("Series 1: " + Arrays.toString(series1)); + System.out.println("Series 2: " + Arrays.toString(series2)); + System.out.println(" U: " + test.getU()); + System.out.println(" Z: " + test.getZ()); + System.out.println("2-side p: " + test.getP()); + Assert.assertEquals(test.getU(), U, name); + } + @DataProvider(name="oneSidedPTestData") public Object[][] oneSidedDataProvider() { return new Object[][] { - new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, .16666666}, - new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .5}, - new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .77778}, - new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.0079365}, + new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, 0.083333333}, + new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .25}, + new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .5}, + new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.00396825}, new Object[] {"test4", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21,21,21,22,23,27}, 0.001469192}, new Object[] {"test5", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20}, .5}, - new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, .00396}, - new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 1}, + new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, 0.001984}, + new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 0.99801587}, new Object[] {"test8", new double[] {16,20,20,21,21,21,21,22,23,27,16,20,20,21,21,21,21,22,23,27}, - new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102}, - new Object[] {"test8", new double[] {16,20,20,21,21,21,21,21,20}, - new double[] {22,23,27,16,20,20,20,20,21}, .395763}, + new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102}, + new Object[] {"test9", new double[] {16,20,20,21,21,21,21,21,20}, + new double[] {22,23,27,16,20,20,20,20,21}, 0.388204}, }; } + + @Test(dataProvider = "oneSidedPTestData") + public void testOnesidedP(String name, double[] series1, double[] series2, double P) { + MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES); + System.out.println("=========================================================="); + System.out.println("Series 1: " + Arrays.toString(series1)); + System.out.println("Series 2: " + Arrays.toString(series2)); + System.out.println(" U: " + test.getU()); + System.out.println(" Z: " + test.getZ()); + System.out.println("1-side p: " + test.getP()); + Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name); + } + + @DataProvider(name="oneSidedZTestData") + public Object[][] oneSidedZDataProvider() { + return new Object[][] { + new Object[] {"test1", new double[] {20}, new double[] {20,20,20}, 0}, + new Object[] {"test2", new double[] {1}, new double[] {1,2,3}, -0.67448975}, + new Object[] {"test3", new double[] {1,2,3}, new double[] {3}, -0.67448975}, + new Object[] {"test4", new double[] {1,2,3}, new double[] {1}, 0.67448975}, + new Object[] {"test5", new double[] {3,3}, new double[] {1,2,3}, 1.036433}, + new Object[] {"test6", new double[] {20,20,20}, new double[] {20,20,20}, 0}, + new Object[] {"test7", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20,20,20,20}, 0}, + new Object[] {"test8", new double[] {1}, new double[] {2}, -0.67448975}, + new Object[] {"test9", new double[] {1}, new double[] {1}, 0}, + new Object[] {"test10", new double[] {60,70,70,60,60,60,60,60}, new double[] {60,60,60,60,60}, .91732119} + }; + } + + @Test(dataProvider = "oneSidedZTestData") + public void testOnesidedZ(String name, double[] series1, double[] series2, double Z) { + MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES); + System.out.println("=========================================================="); + System.out.println("Series 1: " + Arrays.toString(series1)); + System.out.println("Series 2: " + Arrays.toString(series2)); + System.out.println(" U: " + test.getU()); + System.out.println(" Z: " + test.getZ()); + System.out.println("1-side p: " + test.getP()); + Assert.assertEquals(test.getZ(), Z, DELTA_PRECISION, name); + } }