Fixed an edge case where an exception was thrown if either of the sets was empty for the MWU test. Also altered the output format so U itself is not printed (which though interesting, isn't so useful for recalibration), but rather a value I call V (really the deviation of U from its expectation).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5490 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
748787c509
commit
5a79f16ea4
|
|
@ -134,14 +134,16 @@ public class AssociationTestRunner {
|
|||
}
|
||||
|
||||
public static String runU(UStatistic context) {
|
||||
Pair<Long,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("U: %d\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
||||
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
||||
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
||||
Pair<Double,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
||||
}
|
||||
|
||||
public static Pair<Long,Double> mannWhitneyUTest(UStatistic context) {
|
||||
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Long,Double>(-1l,Double.NaN);
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
MannWhitneyU mwu = new MannWhitneyU();
|
||||
for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) {
|
||||
|
|
|
|||
|
|
@ -45,13 +45,13 @@ public class MannWhitneyU {
|
|||
* returns the u and p values.
|
||||
* @Returns a pair holding the u and p-value.
|
||||
*/
|
||||
public Pair<Long,Double> runTwoSidedTest() {
|
||||
public Pair<Double,Double> runTwoSidedTest() {
|
||||
Pair<Long,USet> uPair = calculateTwoSidedU(observations);
|
||||
long u = uPair.first;
|
||||
int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||
int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||
double pval = calculateP(n,m,u,true);
|
||||
return new Pair<Long,Double>(u,pval);
|
||||
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -65,7 +65,9 @@ public class MannWhitneyU {
|
|||
*/
|
||||
public static double calculateP(int n, int m, long u, boolean twoSided) {
|
||||
double pval;
|
||||
if ( n > 8 && m > 8 ) {
|
||||
if ( m == 0 || n == 0 ) {
|
||||
pval = 1.0;
|
||||
} else if ( n > 8 && m > 8 ) {
|
||||
// large m and n - normal approx
|
||||
pval = calculatePNormalApproximation(n,m,u);
|
||||
} else if ( n > 4 && m > 7 ) {
|
||||
|
|
@ -96,10 +98,22 @@ public class MannWhitneyU {
|
|||
* @return p-value associated with the normal approximation
|
||||
*/
|
||||
public static double calculatePNormalApproximation(int n,int m,long u) {
|
||||
double z = getZApprox(n,m,u);
|
||||
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the Z-score approximation of the u-statistic
|
||||
* @param n - The number of entries in the DOMINATED set
|
||||
* @param m - The number of entries in the DOMINANT set
|
||||
* @param u - the Mann-Whitney U value
|
||||
* @return z-score associated with the normal approximation
|
||||
*/
|
||||
private static double getZApprox(int n, int m, long u) {
|
||||
double mean = ( ((long)m)*n+1.0)/2;
|
||||
double var = (((long) n)*m*(n+m+1.0))/12;
|
||||
double z = ( u - mean )/Math.sqrt(var);
|
||||
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
||||
return z;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -76,22 +76,22 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
UTest test1 = new UTest();
|
||||
test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8));
|
||||
test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13));
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test1).first,10l);
|
||||
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test1).first,-1.537,1e-4);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation
|
||||
UTest test2 = new UTest();
|
||||
test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18));
|
||||
test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17));
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test2).first,37l);
|
||||
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test2).first,-0.3109831608,1e-10);
|
||||
UTest test3 = new UTest();
|
||||
test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1));
|
||||
test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26));
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test3).first,93l);
|
||||
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test3).first,-2.907884571802469,1e-14);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3);
|
||||
UTest test4 = new UTest();
|
||||
test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9));
|
||||
test4.setControlData((Collection) Arrays.asList(3,8,11,12,13));
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test4).first,5l);
|
||||
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test4).first,-1.9170289512680814,1e-14);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4);
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue