Merge pull request #1349 from broadinstitute/ms_RankSum
Fixes RankSum Small Sample Size Edge Case #1341
This commit is contained in:
commit
b54904dd42
|
|
@ -107,7 +107,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("5120f5f419cb32b37d5c56101f4c5da2"));
|
||||
Arrays.asList("d584454019f21729757ed7685ff8d02c"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -141,7 +141,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + STANDARD_ANNOTATIONS + "--variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("8436bf4acb81f2c03bb5ecf8a514d90a"));
|
||||
Arrays.asList("fe6a7fdd3f7c1c77b31c2676aeb0ed8d"));
|
||||
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
//TODO the old MD5 is kept for the record.
|
||||
//TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach.
|
||||
// executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "8449ea4fa85eac8834b049a0e3daee5a");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "6fcffeb383100c1a11f0f0bff0a6e426");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","2a0dcd92f3a07f45b7927e66404bc37e");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","62ef94ffa0670c6467cf90b27cc59a5d");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("f78975d22ac27a841d807765242d3c3f"));
|
||||
Arrays.asList("8893a0ef99744757333ec6a85c31b753"));
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("e89c2a0d30a409087af25fc06fd46404"));
|
||||
Arrays.asList("92cbf13fd3ca797c2cacccf12890f40d"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -115,7 +115,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("af5475e45b6ffca990dd035448e6b0fe"));
|
||||
Arrays.asList("618d6cdd823c9f88c03b58fc7ebf47b4"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("f9b4ed435a53198d30199e8a5c98252d"));
|
||||
Arrays.asList("889e43aa0d2c9ce07064817268c965a4"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -181,7 +181,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("0369f9c03df86486709a35a5a2f23192"));
|
||||
Arrays.asList("8861c367dd20505e120af174a9d4f9f4"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -189,7 +189,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("8bd1f903445255f24bd9ef677e1ba9a8"));
|
||||
Arrays.asList("a3bb6d95764d6f5a7d2f78e09699d4ef"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("45f867c73dc3008e874eb3c009cd9674"));
|
||||
Arrays.asList("06d166ecf40563c234902d1b340a3ff1"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("905e9a2ae183fc152101314774707372"));
|
||||
Arrays.asList("0538b7363d271f1f37d73dc3bd5b0071"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("ac14f19fa1e9cecc8246814eac393662"));
|
||||
Arrays.asList("f54dafe8e9ac1402d230e4d83766d6cf"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -124,17 +124,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "be100d3f04592d0865d907177ee8c7e4");
|
||||
testOutputParameters("-sites_only", "dfd11f2f33dc4da50a2725bfb6f64dba");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9024aa3ccf7339b873444b792ff5cb91");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "a9c30d2bb179ea7a69dd0e7367fcd465");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "0d1e1fcc70fb4c159eac016f3085e5e9");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "791fbabf4a8a7a1d0735d57c4c6b4a5a");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("13d2cc1e0871f85a97fb1f73c052ea3e"));
|
||||
Arrays.asList("38a2f8e5542f78c324e89d09eb545f07"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNoPrior() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.33333 -inputPrior 0.33333", 1,
|
||||
Arrays.asList("d94ab69be622fb23c97f524234eb05b3"));
|
||||
Arrays.asList("6f96d079f5889d8555880e6cb614a41d"));
|
||||
executeTest("test no prior 1", spec1);
|
||||
|
||||
}
|
||||
|
|
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testUserPrior() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -inputPrior 0.001 -inputPrior 0.495", 1,
|
||||
Arrays.asList("2f3c5fa06537c8a3f440fb70837702f6"));
|
||||
Arrays.asList("6e48c44b58960954e7e49810d9e3b3ad"));
|
||||
executeTest("test user prior 1", spec1);
|
||||
|
||||
}
|
||||
|
|
@ -174,7 +174,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void emitPLsAtAllSites() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
|
||||
Arrays.asList("76f7a8508ad40cf35b68b07bc7e29a72"));
|
||||
Arrays.asList("04630efda71e0e804cfa4b2e7461c083"));
|
||||
// GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail
|
||||
spec1.disableShadowBCF();
|
||||
|
||||
|
|
@ -190,12 +190,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "e3c708bfeea99835512c1fac477d02e4" );
|
||||
testHeterozosity( 0.01, "3e6328e822a92911e591733438fe023c" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "1226aaa771b8f057646474d306a84866" );
|
||||
testHeterozosity( 1.0 / 1850, "e87e2ad4a6d86ba9cc627b397af9c681" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -274,7 +274,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("589f9dd567bb86ce74e5d48fd7cae0b2"));
|
||||
Arrays.asList("9363de1caf569d565e0c377fe234f170"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("be978fc8807fcbd514b2678adcdc2a28"));
|
||||
Arrays.asList("5e9d48c296d50d553ae13f283b75e1ee"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("21b346710ccdbbfc904eafda2b0c7443"));
|
||||
Arrays.asList("4cc795e0e2ec1ce31fd6545fb24a850e"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testWrongGVCFNonVariantRecordOrderBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("a22bafa2f91c197ea1ab13e02a0c08fb"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1c3d390b467a9b0e1e307419796142fd"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
}
|
||||
|
|
@ -320,7 +320,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testAlleleSpecificAnnotations() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9db19d4706e32ce883c6e3f71e5f4178"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("169c2145c9981b8a1bb1d64a6d776d66"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testAlleleSpecificAnnotations", spec);
|
||||
}
|
||||
|
|
@ -329,7 +329,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testASMQMateRankSumAnnotation() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A AS_MQMateRankSumTest --disableDithering",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("85b26109c4586ba33c44e253297050be"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("75b78770469c5aaa73f1c95db8fda574"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testASMQMateRankSumAnnotation", spec);
|
||||
}
|
||||
|
|
@ -338,7 +338,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testASInsertSizeRankSum() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -G Standard -G AS_Standard --disableDithering -A AS_InsertSizeRankSum",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReference, privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "20:10433000-10437000", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("f1531ed03d6f9538d4d6af7630ff2502"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fd0fcf44b1121c15e9d38de4171002c"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testASInsertSizeRankSum", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeBAMOutFlags() throws IOException {
|
||||
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "b49e15b4680d197db9b9f71644a5a1f9", "f84c8bc44b6af548bd3b8555a068b59e");
|
||||
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "1ad8934dc0ea624ffeb89d3e877176b2", "f84c8bc44b6af548bd3b8555a068b59e");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -202,7 +202,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6f5678f8fb6a4c0b21df1e6b0d1019ee");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "a048ea02eeb9610660e31f36e6114bf4");
|
||||
}
|
||||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
|
|
@ -249,7 +249,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("392ac1966cfa1cae78e8d674065a0d28"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("188ce2d74ee42f4187c7b41a01a193bb"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -566,7 +566,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testAlleleSpecificAnnotations() {
|
||||
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
|
||||
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("7261cde50d4556c6dc190b10222a8538"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d4dd179d8a53c4a550d4a22cc9ef1aa8"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testAlleleSpecificAnnotations", spec);
|
||||
}
|
||||
|
|
@ -575,7 +575,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testASMateRankSumAnnotation() {
|
||||
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard -A AS_MQMateRankSumTest --disableDithering -V "
|
||||
+ privateTestDir + "NA12878.AS.MateRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.MateRankSum.chr20snippet.g.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("08dc30527c98d78d09f8575efc3b84ae"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("515771fa88b0c4bc2a40e9c233806fb1"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testASMateRankSumAnnotation", spec);
|
||||
}
|
||||
|
|
@ -584,7 +584,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testASInsertSizeRankSumAnnotation() {
|
||||
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
|
||||
+ privateTestDir + "NA12878.AS.InsertSizeRankSum.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.InsertSizeRankSum.chr20snippet.g.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("723522fe1d05f2d11efda008b65884d2"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c8a096f2533d06c28ec115a24ffb7ca0"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testASInsertSizeRankSumAnnotation", spec);
|
||||
}
|
||||
|
|
@ -597,7 +597,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testAlleleSpecificAnnotations_oneSample() {
|
||||
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -G Standard -G AS_Standard --disableDithering -V "
|
||||
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("cc2fe42eafd9789be49b550a639a7a1b"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("558486182a84d0a274534fc00fee326e"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testAlleleSpecificAnnotations_oneSample", spec);
|
||||
}
|
||||
|
|
@ -625,7 +625,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testFractionInformativeReads() {
|
||||
final String cmd = "-T GenotypeGVCFs -R " + b37KGReference + " -G AS_Standard -o %s --no_cmdline_in_header -A FractionInformativeReads --disableDithering -V "
|
||||
+ privateTestDir + "NA12878.AS.chr20snippet.g.vcf -V " + privateTestDir + "NA12891.AS.chr20snippet.g.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("16dbeec9d640e61453e902918427343a"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Collections.singletonList("70fda103f68709e32d691393e9228a9b"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testAlleleSpecificAnnotations", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.utils;
|
|||
|
||||
import htsjdk.samtools.util.Histogram;
|
||||
import org.apache.commons.math3.distribution.NormalDistribution;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -39,6 +40,8 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
*/
|
||||
public class MannWhitneyU {
|
||||
|
||||
protected static Logger logger = Logger.getLogger(MannWhitneyU.class);
|
||||
|
||||
private static final class Rank implements Comparable<Rank> {
|
||||
final double value;
|
||||
float rank;
|
||||
|
|
@ -188,10 +191,9 @@ public class MannWhitneyU {
|
|||
}
|
||||
}
|
||||
|
||||
// Constructs a normal distribution; actual values of mean and SD don't matter since it's
|
||||
// just used to convert a z-score into a cumulative probability
|
||||
private static final double NORMAL_MEAN = 100;
|
||||
private static final double NORMAL_SD = 15;
|
||||
// Constructs a normal distribution; this needs to be a standard normal in order to get a Z-score in the exact case
|
||||
private static final double NORMAL_MEAN = 0;
|
||||
private static final double NORMAL_SD = 1;
|
||||
private static final NormalDistribution NORMAL = new NormalDistribution(NORMAL_MEAN, NORMAL_SD);
|
||||
|
||||
/**
|
||||
|
|
@ -206,11 +208,6 @@ public class MannWhitneyU {
|
|||
*/
|
||||
private int minimumNormalN = 10;
|
||||
|
||||
/**
|
||||
* If the exact p-value is 1 then the z-score is infinite, so we need a max cutoff instead.
|
||||
*/
|
||||
private double maxZScore = 9.0;
|
||||
|
||||
/**
|
||||
* Sets the minimum number of values in each data series to use the normal distribution approximation.
|
||||
*/
|
||||
|
|
@ -289,7 +286,8 @@ public class MannWhitneyU {
|
|||
for (int count : numOfTies) {
|
||||
//If every single datapoint is tied then we want to return a p-value of .5 and
|
||||
//the formula for sigma that includes the number of ties breaks down. Setting
|
||||
//the number of ties to 0 in this case gets the desired result.
|
||||
//the number of ties to 0 in this case gets the desired result in the normal
|
||||
//approximation case.
|
||||
if (count != ranks.length) {
|
||||
transformedTies.add((count * count * count) - count);
|
||||
}
|
||||
|
|
@ -424,15 +422,11 @@ public class MannWhitneyU {
|
|||
}
|
||||
} else {
|
||||
// TODO -- This exact test is only implemented for the one sided test, but we currently don't call the two sided version
|
||||
Histogram<Double> distribution = permutationTest(series1, series2);
|
||||
p = distribution.getCumulativeProbability(u);
|
||||
if (p == 1) {
|
||||
z = maxZScore;
|
||||
} else if (p == 0) {
|
||||
z = -1.0 * maxZScore;
|
||||
} else {
|
||||
z = NORMAL.inverseCumulativeProbability(p);
|
||||
if (whichSide != TestType.FIRST_DOMINATES) {
|
||||
logger.warn("An exact two-sided MannWhitneyU test was called. Only the one-sided exact test is implemented, use the approximation instead by setting minimumNormalN to 0.");
|
||||
}
|
||||
p = permutationTest(series1, series2, u);
|
||||
z = NORMAL.inverseCumulativeProbability(p);
|
||||
}
|
||||
|
||||
return new Result(u, z, p, Math.abs(median(series1) - median(series2)));
|
||||
|
|
@ -505,9 +499,10 @@ public class MannWhitneyU {
|
|||
*
|
||||
* @param series1 Data from group 1
|
||||
* @param series2 Data from group 2
|
||||
* @return Histogram with u calculated for every possible permutation of group tag.
|
||||
* @param testStatU Test statistic U from observed data
|
||||
* @return P-value based on histogram with u calculated for every possible permutation of group tag.
|
||||
*/
|
||||
public Histogram permutationTest(final double[] series1, final double[] series2) {
|
||||
public double permutationTest(final double[] series1, final double[] series2, final double testStatU) {
|
||||
final Histogram<Double> histo = new Histogram<Double>();
|
||||
final int n1 = series1.length;
|
||||
final int n2 = series2.length;
|
||||
|
|
@ -552,7 +547,19 @@ public class MannWhitneyU {
|
|||
histo.increment(newU);
|
||||
}
|
||||
|
||||
return histo;
|
||||
/**
|
||||
* In order to deal with edge cases where the observed value is also the most extreme value, we are taking half
|
||||
* of the count in the observed bin plus everything more extreme (in the FIRST_DOMINATES case the smaller bins)
|
||||
* and dividing by the total count of everything in the histogram. Just using getCumulativeDistribution() gives
|
||||
* a p-value of 1 in the most extreme case which doesn't result in a usable z-score.
|
||||
*/
|
||||
double sumOfAllSmallerBins = histo.get(testStatU).getValue() / 2.0;
|
||||
|
||||
for (final Histogram<Double>.Bin bin : histo.values()) {
|
||||
if (bin.getId() < testStatU) sumOfAllSmallerBins += bin.getValue();
|
||||
}
|
||||
|
||||
return sumOfAllSmallerBins / histo.getCount();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,30 +47,6 @@ public class MWUnitTest extends BaseTest {
|
|||
|
||||
private static final MannWhitneyU rst = new MannWhitneyU();
|
||||
|
||||
@Test(dataProvider = "rankSumTestData")
|
||||
public void testSimpleU(String name, double[] series1, double[] series2, double U) {
|
||||
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED);
|
||||
System.out.println("==========================================================");
|
||||
System.out.println("Series 1: " + Arrays.toString(series1));
|
||||
System.out.println("Series 2: " + Arrays.toString(series2));
|
||||
System.out.println(" U: " + test.getU());
|
||||
System.out.println(" Z: " + test.getZ());
|
||||
System.out.println("2-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getU(), U, name);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "oneSidedPTestData")
|
||||
public void testOnesidedP(String name, double[] series1, double[] series2, double P) {
|
||||
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
|
||||
System.out.println("==========================================================");
|
||||
System.out.println("Series 1: " + Arrays.toString(series1));
|
||||
System.out.println("Series 2: " + Arrays.toString(series2));
|
||||
System.out.println(" U: " + test.getU());
|
||||
System.out.println(" Z: " + test.getZ());
|
||||
System.out.println("1-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name);
|
||||
}
|
||||
|
||||
@DataProvider(name="rankSumTestData")
|
||||
public Object[][] dataProvider() {
|
||||
return new Object[][] {
|
||||
|
|
@ -94,21 +70,73 @@ public class MWUnitTest extends BaseTest {
|
|||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "rankSumTestData")
|
||||
public void testSimpleU(String name, double[] series1, double[] series2, double U) {
|
||||
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.TWO_SIDED);
|
||||
System.out.println("==========================================================");
|
||||
System.out.println("Series 1: " + Arrays.toString(series1));
|
||||
System.out.println("Series 2: " + Arrays.toString(series2));
|
||||
System.out.println(" U: " + test.getU());
|
||||
System.out.println(" Z: " + test.getZ());
|
||||
System.out.println("2-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getU(), U, name);
|
||||
}
|
||||
|
||||
@DataProvider(name="oneSidedPTestData")
|
||||
public Object[][] oneSidedDataProvider() {
|
||||
return new Object[][] {
|
||||
new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, .16666666},
|
||||
new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .5},
|
||||
new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .77778},
|
||||
new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.0079365},
|
||||
new Object[] {"test0", new double[] {0,0}, new double[] {1,1}, 0.083333333},
|
||||
new Object[] {"test1", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,21}, .25},
|
||||
new Object[] {"test2", new double[] {20,20,20,20,21}, new double[] {20,20,20,20,21}, .5},
|
||||
new Object[] {"test3", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21}, 0.00396825},
|
||||
new Object[] {"test4", new double[] {13,14,15,15,16}, new double[] {16,20,20,21,21,21,21,22,23,27}, 0.001469192},
|
||||
new Object[] {"test5", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20}, .5},
|
||||
new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, .00396},
|
||||
new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 1},
|
||||
new Object[] {"test6", new double[] {1,2,3,4,5}, new double[] {6,7,8,9,10}, 0.001984},
|
||||
new Object[] {"test7", new double[] {6,7,8,9,10}, new double[] {1,2,3,4,5}, 0.99801587},
|
||||
new Object[] {"test8", new double[] {16,20,20,21,21,21,21,22,23,27,16,20,20,21,21,21,21,22,23,27},
|
||||
new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102},
|
||||
new Object[] {"test8", new double[] {16,20,20,21,21,21,21,21,20},
|
||||
new double[] {22,23,27,16,20,20,20,20,21}, .395763},
|
||||
new double[] {22,23,27,16,20,20,21,21,21,21,22,23,27,60,60}, .08303102},
|
||||
new Object[] {"test9", new double[] {16,20,20,21,21,21,21,21,20},
|
||||
new double[] {22,23,27,16,20,20,20,20,21}, 0.388204},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "oneSidedPTestData")
|
||||
public void testOnesidedP(String name, double[] series1, double[] series2, double P) {
|
||||
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
|
||||
System.out.println("==========================================================");
|
||||
System.out.println("Series 1: " + Arrays.toString(series1));
|
||||
System.out.println("Series 2: " + Arrays.toString(series2));
|
||||
System.out.println(" U: " + test.getU());
|
||||
System.out.println(" Z: " + test.getZ());
|
||||
System.out.println("1-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getP(), P, DELTA_PRECISION, name);
|
||||
}
|
||||
|
||||
@DataProvider(name="oneSidedZTestData")
|
||||
public Object[][] oneSidedZDataProvider() {
|
||||
return new Object[][] {
|
||||
new Object[] {"test1", new double[] {20}, new double[] {20,20,20}, 0},
|
||||
new Object[] {"test2", new double[] {1}, new double[] {1,2,3}, -0.67448975},
|
||||
new Object[] {"test3", new double[] {1,2,3}, new double[] {3}, -0.67448975},
|
||||
new Object[] {"test4", new double[] {1,2,3}, new double[] {1}, 0.67448975},
|
||||
new Object[] {"test5", new double[] {3,3}, new double[] {1,2,3}, 1.036433},
|
||||
new Object[] {"test6", new double[] {20,20,20}, new double[] {20,20,20}, 0},
|
||||
new Object[] {"test7", new double[] {20,20,20,20,20}, new double[] {20,20,20,20,20,20,20,20,20,20,20,20,20}, 0},
|
||||
new Object[] {"test8", new double[] {1}, new double[] {2}, -0.67448975},
|
||||
new Object[] {"test9", new double[] {1}, new double[] {1}, 0},
|
||||
new Object[] {"test10", new double[] {60,70,70,60,60,60,60,60}, new double[] {60,60,60,60,60}, .91732119}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "oneSidedZTestData")
|
||||
public void testOnesidedZ(String name, double[] series1, double[] series2, double Z) {
|
||||
MannWhitneyU.Result test = rst.test(series1, series2, MannWhitneyU.TestType.FIRST_DOMINATES);
|
||||
System.out.println("==========================================================");
|
||||
System.out.println("Series 1: " + Arrays.toString(series1));
|
||||
System.out.println("Series 2: " + Arrays.toString(series2));
|
||||
System.out.println(" U: " + test.getU());
|
||||
System.out.println(" Z: " + test.getZ());
|
||||
System.out.println("1-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getZ(), Z, DELTA_PRECISION, name);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue