There is an edge case ( |Set1| = 5, |Set2| = 4) where the exact p-value exceeds the range of the normal distribution we want to invert. For the edge cases, this happens exactly at the mean, and so this can be safely replaced with a z value of 0.0
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5915 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dcd13060e1
commit
511cd48d7a
|
|
@ -305,11 +305,16 @@ public class MannWhitneyU {
|
|||
if ( mode == ExactMode.CUMULATIVE ) {
|
||||
z = APACHE_NORMAL.inverseCumulativeProbability(p);
|
||||
} else {
|
||||
double sd = Math.sqrt((1.0/(n+m))*(n*m*(1+n+m))/12); // biased variance empirically better fit to distribution then asymptotic variance
|
||||
if ( u >= n*m/2 ) {
|
||||
z = (1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
|
||||
double sd = Math.sqrt((1.0+1.0/(1+n+m))*(n*m)*(1.0+n+m)/12); // biased variance empirically better fit to distribution then asymptotic variance
|
||||
//System.out.printf("SD is %f and Max is %f and prob is %f%n",sd,1.0/Math.sqrt(sd*sd*2.0*Math.PI),p);
|
||||
if ( p > 1.0/Math.sqrt(sd*sd*2.0*Math.PI) ) { // possible for p-value to be outside the range of the normal. Happens at the mean, so z is 0.
|
||||
z = 0.0;
|
||||
} else {
|
||||
z = -(1.0/sd)*Math.sqrt(-2.0*sd*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
|
||||
if ( u >= n*m/2 ) {
|
||||
z = Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
|
||||
} else {
|
||||
z = -Math.sqrt(-2.0*(Math.log(sd)+Math.log(p)+LNSQRT2PI));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -398,6 +403,11 @@ public class MannWhitneyU {
|
|||
return new Pair<Integer,Integer>(sizeSet1,sizeSet2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that observations are in the correct format for a MWU test -- this is only called by the contracts API during testing
|
||||
* @param tree - the collection of labeled observations
|
||||
* @return true iff the tree set is valid (no INFs or NaNs, at least one data point in each set)
|
||||
*/
|
||||
protected static boolean validateObservations(TreeSet<Pair<Number,USet>> tree) {
|
||||
boolean seen1 = false;
|
||||
boolean seen2 = false;
|
||||
|
|
|
|||
|
|
@ -71,8 +71,10 @@ public class MWUnitTest extends BaseTest {
|
|||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).second,0.08547008547008,1e-14); // r does a correction, subtracting 1 from U
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,cm).first,-1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,cm).first,1.36918910442,1e-2); // apache inversion set to be good only to 1e-2
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.4908546184916176,1e-8); // PDF should be similar
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.4908546184916176,1e-8); // PDF should be similar
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,29L,false,pm).first,1.2558754796642067,1e-8); // PDF should be similar
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,11L,false,pm).first,-1.2558754796642067,1e-8); // PDF should be similar
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).second,0.0952381,1e-5);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(4,5,10L,false,pm).first,0.0,1e-14);
|
||||
|
||||
logger.warn("Set 1");
|
||||
Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8);
|
||||
|
|
|
|||
Loading…
Reference in New Issue