There is an edge case ( |Set1| = 5, |Set2| = 4) where the exact p-value exceeds the range of the normal distribution we want to invert. For the edge cases, this happens exactly at the mean, and so this can be safely replaced with a z value of 0.0

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5915 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-06-01 17:30:09 +00:00
parent dcd13060e1
commit 511cd48d7a
2 changed files with 18 additions and 6 deletions

View File

@ -305,11 +305,16 @@ public class MannWhitneyU {
if ( mode == ExactMode.CUMULATIVE ) {
z = APACHE_NORMAL.inverseCumulativeProbability(p);
} else {
double sd = Math.sqrt((1.0/(n+m))*(n*m*(1+n+m))/12); // biased variance empirically better fit to distribution then asymptotic variance
if ( u >= n*m/2 ) {
z = (1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
double sd = Math.sqrt((1.0+1.0/(1+n+m))*(n*m)*(1.0+n+m)/12); // biased variance empirically better fit to distribution then asymptotic variance
//System.out.printf("SD is %f and Max is %f and prob is %f%n",sd,1.0/Math.sqrt(sd*sd*2.0*Math.PI),p);
if ( p > 1.0/Math.sqrt(sd*sd*2.0*Math.PI) ) { // possible for p-value to be outside the range of the normal. Happens at the mean, so z is 0.
z = 0.0;
} else {
z = -(1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
if ( u >= n*m/2 ) {
z = Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
} else {
z = -Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
}
}
}
@ -398,6 +403,11 @@ public class MannWhitneyU {
return new Pair<Integer,Integer>(sizeSet1,sizeSet2);
}
/**
* Validates that observations are in the correct format for a MWU test -- this is only called by the contracts API during testing
* @param tree - the collection of labeled observations
* @return true iff the tree set is valid (no INFs or NaNs, at least one data point in each set)
*/
protected static boolean validateObservations(TreeSet<Pair<Number,USet>> tree) {
boolean seen1 = false;
boolean seen2 = false;

View File

@ -71,8 +71,10 @@ public class MWUnitTest extends BaseTest {
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).second,0.08547008547008,1e-14); // r does a correction, subtracting 1 from U
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).first,-1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,cm).first,1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.4908546184916176,1e-8); // PDF should be similar
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.4908546184916176,1e-8); // PDF should be similar
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.2558754796642067,1e-8); // PDF should be similar
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.2558754796642067,1e-8); // PDF should be similar
Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).second,0.0952381,1e-5);
Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).first,0.0,1e-14);
logger.warn("Set 1");
Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8);