From 511cd48d7a5df03c8873fb2ec124e319ace19d5f Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 1 Jun 2011 17:30:09 +0000 Subject: [PATCH] There is an edge case ( |Set1| = 5, |Set2| = 4) where the exact p-value exceeds the range of the normal distribution we want to invert. For the edge cases, this happens exactly at the mean, and so this can be safely replaced with a z value of 0.0 git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5915 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/MannWhitneyU.java | 18 ++++++++++++++---- .../broadinstitute/sting/utils/MWUnitTest.java | 6 ++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 8ac80b281..f9dbc36c4 100755 --- a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -305,11 +305,16 @@ public class MannWhitneyU { if ( mode == ExactMode.CUMULATIVE ) { z = APACHE_NORMAL.inverseCumulativeProbability(p); } else { - double sd = Math.sqrt((1.0/(n+m))*(n*m*(1+n+m))/12); // biased variance empirically better fit to distribution then asymptotic variance - if ( u >= n*m/2 ) { - z = (1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI)); + double sd = Math.sqrt((1.0+1.0/(1+n+m))*(n*m)*(1.0+n+m)/12); // biased variance empirically better fit to distribution then asymptotic variance + //System.out.printf("SD is %f and Max is %f and prob is %f%n",sd,1.0/Math.sqrt(sd*sd*2.0*Math.PI),p); + if ( p > 1.0/Math.sqrt(sd*sd*2.0*Math.PI) ) { // possible for p-value to be outside the range of the normal. Happens at the mean, so z is 0. + z = 0.0; } else { - z = -(1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI)); + if ( u >= n*m/2 ) { + z = Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI)); + } else { + z = -Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI)); + } } } @@ -398,6 +403,11 @@ public class MannWhitneyU { return new Pair(sizeSet1,sizeSet2); } + /** + * Validates that observations are in the correct format for a MWU test -- this is only called by the contracts API during testing + * @param tree - the collection of labeled observations + * @return true iff the tree set is valid (no INFs or NaNs, at least one data point in each set) + */ protected static boolean validateObservations(TreeSet> tree) { boolean seen1 = false; boolean seen2 = false; diff --git a/java/test/org/broadinstitute/sting/utils/MWUnitTest.java b/java/test/org/broadinstitute/sting/utils/MWUnitTest.java index ec10700a9..6ca4b4365 100755 --- a/java/test/org/broadinstitute/sting/utils/MWUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/MWUnitTest.java @@ -71,8 +71,10 @@ public class MWUnitTest extends BaseTest { Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).second,0.08547008547008,1e-14); // r does a correction, subtracting 1 from U Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).first,-1.36918910442,1e-2); // apache inversion set to be good only to 1e-2 Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,cm).first,1.36918910442,1e-2); // apache inversion set to be good only to 1e-2 - Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.4908546184916176,1e-8); // PDF should be similar - Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.4908546184916176,1e-8); // PDF should be similar + Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.2558754796642067,1e-8); // PDF should be similar + Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.2558754796642067,1e-8); // PDF should be similar + Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).second,0.0952381,1e-5); + Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).first,0.0,1e-14); logger.warn("Set 1"); Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8);