Merge pull request #1349 from broadinstitute/ms_RankSum

Fixes RankSum Small Sample Size Edge Case #1341
This commit is contained in:
meganshand 2016-04-22 16:55:39 -04:00
commit b54904dd42
11 changed files with 126 additions and 91 deletions

View File

@ -107,7 +107,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("5120f5f419cb32b37d5c56101f4c5da2"));
Arrays.asList("d584454019f21729757ed7685ff8d02c"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -141,7 +141,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("8436bf4acb81f2c03bb5ecf8a514d90a"));
Arrays.asList("fe6a7fdd3f7c1c77b31c2676aeb0ed8d"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}

View File

@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
//TODO the old MD5 is kept for the record.
//TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach.
// executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "8449ea4fa85eac8834b049a0e3daee5a");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "6fcffeb383100c1a11f0f0bff0a6e426");
}
}

View File

@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","2a0dcd92f3a07f45b7927e66404bc37e");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","62ef94ffa0670c6467cf90b27cc59a5d");
}
@Test(enabled = true)

View File

@ -78,7 +78,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("f78975d22ac27a841d807765242d3c3f"));
Arrays.asList("8893a0ef99744757333ec6a85c31b753"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -105,7 +105,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("e89c2a0d30a409087af25fc06fd46404"));
Arrays.asList("92cbf13fd3ca797c2cacccf12890f40d"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -115,7 +115,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("af5475e45b6ffca990dd035448e6b0fe"));
Arrays.asList("618d6cdd823c9f88c03b58fc7ebf47b4"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -125,7 +125,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("f9b4ed435a53198d30199e8a5c98252d"));
Arrays.asList("889e43aa0d2c9ce07064817268c965a4"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -181,7 +181,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("0369f9c03df86486709a35a5a2f23192"));
Arrays.asList("8861c367dd20505e120af174a9d4f9f4"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -189,7 +189,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("8bd1f903445255f24bd9ef677e1ba9a8"));
Arrays.asList("a3bb6d95764d6f5a7d2f78e09699d4ef"));
executeTest("test minIndelFraction 0.25", spec);
}

View File

@ -94,7 +94,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("45f867c73dc3008e874eb3c009cd9674"));
Arrays.asList("06d166ecf40563c234902d1b340a3ff1"));
executeTest("test SLOD", spec);
}
@ -102,7 +102,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("905e9a2ae183fc152101314774707372"));
Arrays.asList("0538b7363d271f1f37d73dc3bd5b0071"));
executeTest("test NDA", spec);
}
@ -110,7 +110,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("ac14f19fa1e9cecc8246814eac393662"));
Arrays.asList("f54dafe8e9ac1402d230e4d83766d6cf"));
executeTest("test using comp track", spec);
}
@ -124,17 +124,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "be100d3f04592d0865d907177ee8c7e4");
testOutputParameters("-sites_only", "dfd11f2f33dc4da50a2725bfb6f64dba");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9024aa3ccf7339b873444b792ff5cb91");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "a9c30d2bb179ea7a69dd0e7367fcd465");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "0d1e1fcc70fb4c159eac016f3085e5e9");
testOutputParameters("--output_mode EMIT_ALL_SITES", "791fbabf4a8a7a1d0735d57c4c6b4a5a");
}
private void testOutputParameters(final String args, final String md5) {
@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("13d2cc1e0871f85a97fb1f73c052ea3e"));
Arrays.asList("38a2f8e5542f78c324e89d09eb545f07"));
executeTest("test confidence 1", spec1);
}
@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNoPrior() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1,
Arrays.asList("d94ab69be622fb23c97f524234eb05b3"));
Arrays.asList("6f96d079f5889d8555880e6cb614a41d"));
executeTest("test no prior 1", spec1);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testUserPrior() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.001 -inputPrior 0.495", 1,
Arrays.asList("2f3c5fa06537c8a3f440fb70837702f6"));
Arrays.asList("6e48c44b58960954e7e49810d9e3b3ad"));
executeTest("test user prior 1", spec1);
}
@ -174,7 +174,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void emitPLsAtAllSites() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
Arrays.asList("76f7a8508ad40cf35b68b07bc7e29a72"));
Arrays.asList("04630efda71e0e804cfa4b2e7461c083"));
// GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail
spec1.disableShadowBCF();
@ -190,12 +190,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "e3c708bfeea99835512c1fac477d02e4" );
testHeterozosity( 0.01, "3e6328e822a92911e591733438fe023c" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "1226aaa771b8f057646474d306a84866" );
testHeterozosity( 1.0 / 1850, "e87e2ad4a6d86ba9cc627b397af9c681" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -274,7 +274,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("589f9dd567bb86ce74e5d48fd7cae0b2"));
Arrays.asList("9363de1caf569d565e0c377fe234f170"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("be978fc8807fcbd514b2678adcdc2a28"));
Arrays.asList("5e9d48c296d50d553ae13f283b75e1ee"));
executeTest(String.format("test calling with BAQ"), spec);
}

View File

@ -94,7 +94,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("21b346710ccdbbfc904eafda2b0c7443"));
Arrays.asList("4cc795e0e2ec1ce31fd6545fb24a850e"));
executeTest("test SingleSample Pilot2", spec);
}

View File

@ -270,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a22bafa2f91c197ea1ab13e02a0c08fb"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1c3d390b467a9b0e1e307419796142fd"));
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
}
@ -320,7 +320,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testAlleleSpecificAnnotations() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9db19d4706e32ce883c6e3f71e5f4178"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("169c2145c9981b8a1bb1d64a6d776d66"));
spec.disableShadowBCF();
executeTest(" testAlleleSpecificAnnotations", spec);
}
@ -329,7 +329,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testASMQMateRankSumAnnotation() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A AS_MQMateRankSumTest --disableDithering",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("85b26109c4586ba33c44e253297050be"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("75b78770469c5aaa73f1c95db8fda574"));
spec.disableShadowBCF();
executeTest(" testASMQMateRankSumAnnotation", spec);
}
@ -338,7 +338,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testASInsertSizeRankSum() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering -A AS_InsertSizeRankSum",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("f1531ed03d6f9538d4d6af7630ff2502"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fd0fcf44b1121c15e9d38de4171002c"));
spec.disableShadowBCF();
executeTest(" testASInsertSizeRankSum", spec);
}

View File

@ -106,7 +106,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeBAMOutFlags() throws IOException {
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "b49e15b4680d197db9b9f71644a5a1f9", "f84c8bc44b6af548bd3b8555a068b59e");
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "1ad8934dc0ea624ffeb89d3e877176b2", "f84c8bc44b6af548bd3b8555a068b59e");
}
@Test
@ -202,7 +202,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6f5678f8fb6a4c0b21df1e6b0d1019ee");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "a048ea02eeb9610660e31f36e6114bf4");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("392ac1966cfa1cae78e8d674065a0d28"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("188ce2d74ee42f4187c7b41a01a193bb"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}

View File

@ -566,7 +566,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testAlleleSpecificAnnotations() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("7261cde50d4556c6dc190b10222a8538"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d4dd179d8a53c4a550d4a22cc9ef1aa8"));
spec.disableShadowBCF();
executeTest("testAlleleSpecificAnnotations", spec);
}
@ -575,7 +575,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testASMateRankSumAnnotation() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "
+ privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("08dc30527c98d78d09f8575efc3b84ae"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("515771fa88b0c4bc2a40e9c233806fb1"));
spec.disableShadowBCF();
executeTest("testASMateRankSumAnnotation", spec);
}
@ -584,7 +584,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testASInsertSizeRankSumAnnotation() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
+ privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("723522fe1d05f2d11efda008b65884d2"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c8a096f2533d06c28ec115a24ffb7ca0"));
spec.disableShadowBCF();
executeTest("testASInsertSizeRankSumAnnotation", spec);
}
@ -597,7 +597,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testAlleleSpecificAnnotations_oneSample() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("cc2fe42eafd9789be49b550a639a7a1b"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("558486182a84d0a274534fc00fee326e"));
spec.disableShadowBCF();
executeTest("testAlleleSpecificAnnotations_oneSample", spec);
}
@ -625,7 +625,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
public void testFractionInformativeReads() {
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header -A FractionInformativeReads --disableDithering -V "
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("16dbeec9d640e61453e902918427343a"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("70fda103f68709e32d691393e9228a9b"));
spec.disableShadowBCF();
executeTest("testAlleleSpecificAnnotations", spec);
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.gatk.utils;
import htsjdk.samtools.util.Histogram;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.log4j.Logger;
import java.util.*;
@ -39,6 +40,8 @@ import java.util.concurrent.ConcurrentHashMap;
*/
public class MannWhitneyU {
protected static Logger logger = Logger.getLogger(MannWhitneyU.class);
private static final class Rank implements Comparable<Rank> {
final double value;
float rank;
@ -188,10 +191,9 @@ public class MannWhitneyU {
}
}
// Constructs a normal distribution; actual values of mean and SD don't matter since it's
// just used to convert a z-score into a cumulative probability
private static final double NORMAL_MEAN = 100;
private static final double NORMAL_SD = 15;
// Constructs a normal distribution; this needs to be a standard normal in order to get a Z-score in the exact case
private static final double NORMAL_MEAN = 0;
private static final double NORMAL_SD = 1;
private static final NormalDistribution NORMAL = new NormalDistribution(NORMAL_MEAN, NORMAL_SD);
/**
@ -206,11 +208,6 @@ public class MannWhitneyU {
*/
private int minimumNormalN = 10;
/**
* If the exact p-value is 1 then the z-score is infinite, so we need a max cutoff instead.
*/
private double maxZScore = 9.0;
/**
* Sets the minimum number of values in each data series to use the normal distribution approximation.
*/
@ -289,7 +286,8 @@ public class MannWhitneyU {
for (int count : numOfTies) {
//If every single datapoint is tied then we want to return a p-value of .5 and
//the formula for sigma that includes the number of ties breaks down. Setting
//the number of ties to 0 in this case gets the desired result.
//the number of ties to 0 in this case gets the desired result in the normal
//approximation case.
if (count != ranks.length) {
transformedTies.add((count * count * count) - count);
}
@ -424,15 +422,11 @@ public class MannWhitneyU {
}
} else {
// TODO -- This exact test is only implemented for the one sided test, but we currently don't call the two sided version
Histogram<Double> distribution = permutationTest(series1, series2);
p = distribution.getCumulativeProbability(u);
if (p == 1) {
z = maxZScore;
} else if (p == 0) {
z = -1.0 * maxZScore;
} else {
z = NORMAL.inverseCumulativeProbability(p);
if (whichSide != TestType.FIRST_DOMINATES) {
logger.warn("An exact two-sided MannWhitneyU test was called. Only the one-sided exact test is implemented, use the approximation instead by setting minimumNormalN to 0.");
}
p = permutationTest(series1, series2, u);
z = NORMAL.inverseCumulativeProbability(p);
}
return new Result(u, z, p, Math.abs(median(series1) - median(series2)));
@ -505,9 +499,10 @@ public class MannWhitneyU {
*
* @param series1 Data from group 1
* @param series2 Data from group 2
* @return Histogram with u calculated for every possible permutation of group tag.
* @param testStatU Test statistic U from observed data
* @return P-value based on histogram with u calculated for every possible permutation of group tag.
*/
public Histogram permutationTest(final double[] series1, final double[] series2) {
public double permutationTest(final double[] series1, final double[] series2, final double testStatU) {
final Histogram<Double> histo = new Histogram<Double>();
final int n1 = series1.length;
final int n2 = series2.length;
@ -552,7 +547,19 @@ public class MannWhitneyU {
histo.increment(newU);
}
return histo;
/**
* In order to deal with edge cases where the observed value is also the most extreme value, we are taking half
* of the count in the observed bin plus everything more extreme (in the FIRST_DOMINATES case the smaller bins)
* and dividing by the total count of everything in the histogram. Just using getCumulativeDistribution() gives
* a p-value of 1 in the most extreme case which doesn't result in a usable z-score.
*/
double sumOfAllSmallerBins = histo.get(testStatU).getValue() / 2.0;
for (final Histogram<Double>.Bin bin : histo.values()) {
if (bin.getId() < testStatU) sumOfAllSmallerBins += bin.getValue();
}
return sumOfAllSmallerBins / histo.getCount();
}
}

View File

@ -47,30 +47,6 @@ public class MWUnitTest extends BaseTest {
private static final MannWhitneyU rst = new MannWhitneyU();
@Test(dataProvider = "rankSumTestData")
public void testSimpleU(String name, double[] series1, double[] series2, double U) {
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED);
System.out.println("==========================================================");
System.out.println("Series 1: " + Arrays.toString(series1));
System.out.println("Series 2: " + Arrays.toString(series2));
System.out.println(" U: " + test.getU());
System.out.println(" Z: " + test.getZ());
System.out.println("2-side p: " + test.getP());
Assert.assertEquals(test.getU(), U, name);
}
@Test(dataProvider = "oneSidedPTestData")
public void testOnesidedP(String name, double[] series1, double[] series2, double P) {
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
System.out.println("==========================================================");
System.out.println("Series 1: " + Arrays.toString(series1));
System.out.println("Series 2: " + Arrays.toString(series2));
System.out.println(" U: " + test.getU());
System.out.println(" Z: " + test.getZ());
System.out.println("1-side p: " + test.getP());
Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name);
}
@DataProvider(name="rankSumTestData")
public Object[][] dataProvider() {
return new Object[][] {
@ -94,21 +70,73 @@ public class MWUnitTest extends BaseTest {
};
}
@Test(dataProvider = "rankSumTestData")
public void testSimpleU(String name, double[] series1, double[] series2, double U) {
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED);
System.out.println("==========================================================");
System.out.println("Series 1: " + Arrays.toString(series1));
System.out.println("Series 2: " + Arrays.toString(series2));
System.out.println(" U: " + test.getU());
System.out.println(" Z: " + test.getZ());
System.out.println("2-side p: " + test.getP());
Assert.assertEquals(test.getU(), U, name);
}
@DataProvider(name="oneSidedPTestData")
public Object[][] oneSidedDataProvider() {
return new Object[][] {
new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, .16666666},
new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .5},
new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .77778},
new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.0079365},
new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, 0.083333333},
new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .25},
new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .5},
new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.00396825},
new Object[] {"test4", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21,21,21,22,23,27}, 0.001469192},
new Object[] {"test5", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20}, .5},
new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, .00396},
new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 1},
new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, 0.001984},
new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 0.99801587},
new Object[] {"test8", new double[] {16,20,20,21,21,21,21,22,23,27,16,20,20,21,21,21,21,22,23,27},
new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102},
new Object[] {"test8", new double[] {16,20,20,21,21,21,21,21,20},
new double[] {22,23,27,16,20,20,20,20,21}, .395763},
new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102},
new Object[] {"test9", new double[] {16,20,20,21,21,21,21,21,20},
new double[] {22,23,27,16,20,20,20,20,21}, 0.388204},
};
}
@Test(dataProvider = "oneSidedPTestData")
public void testOnesidedP(String name, double[] series1, double[] series2, double P) {
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
System.out.println("==========================================================");
System.out.println("Series 1: " + Arrays.toString(series1));
System.out.println("Series 2: " + Arrays.toString(series2));
System.out.println(" U: " + test.getU());
System.out.println(" Z: " + test.getZ());
System.out.println("1-side p: " + test.getP());
Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name);
}
@DataProvider(name="oneSidedZTestData")
public Object[][] oneSidedZDataProvider() {
return new Object[][] {
new Object[] {"test1", new double[] {20}, new double[] {20,20,20}, 0},
new Object[] {"test2", new double[] {1}, new double[] {1,2,3}, -0.67448975},
new Object[] {"test3", new double[] {1,2,3}, new double[] {3}, -0.67448975},
new Object[] {"test4", new double[] {1,2,3}, new double[] {1}, 0.67448975},
new Object[] {"test5", new double[] {3,3}, new double[] {1,2,3}, 1.036433},
new Object[] {"test6", new double[] {20,20,20}, new double[] {20,20,20}, 0},
new Object[] {"test7", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20,20,20,20}, 0},
new Object[] {"test8", new double[] {1}, new double[] {2}, -0.67448975},
new Object[] {"test9", new double[] {1}, new double[] {1}, 0},
new Object[] {"test10", new double[] {60,70,70,60,60,60,60,60}, new double[] {60,60,60,60,60}, .91732119}
};
}
@Test(dataProvider = "oneSidedZTestData")
public void testOnesidedZ(String name, double[] series1, double[] series2, double Z) {
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
System.out.println("==========================================================");
System.out.println("Series 1: " + Arrays.toString(series1));
System.out.println("Series 2: " + Arrays.toString(series2));
System.out.println(" U: " + test.getU());
System.out.println(" Z: " + test.getZ());
System.out.println("1-side p: " + test.getP());
Assert.assertEquals(test.getZ(), Z, DELTA_PRECISION, name);
}
}