Fixed an edge case where an exception was thrown if either of the sets was empty for the MWU test. Also altered the output format so U itself is not printed (which though interesting, isn't so useful for recalibration), but rather a value I call V (really the deviation of U from its expectation).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5490 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
748787c509
commit
5a79f16ea4
|
|
@ -134,14 +134,16 @@ public class AssociationTestRunner {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String runU(UStatistic context) {
|
public static String runU(UStatistic context) {
|
||||||
Pair<Long,Double> results = mannWhitneyUTest(context);
|
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
||||||
return String.format("U: %d\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
||||||
|
Pair<Double,Double> results = mannWhitneyUTest(context);
|
||||||
|
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<Long,Double> mannWhitneyUTest(UStatistic context) {
|
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
||||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||||
return new Pair<Long,Double>(-1l,Double.NaN);
|
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||||
}
|
}
|
||||||
MannWhitneyU mwu = new MannWhitneyU();
|
MannWhitneyU mwu = new MannWhitneyU();
|
||||||
for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) {
|
for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) {
|
||||||
|
|
|
||||||
|
|
@ -45,13 +45,13 @@ public class MannWhitneyU {
|
||||||
* returns the u and p values.
|
* returns the u and p values.
|
||||||
* @Returns a pair holding the u and p-value.
|
* @Returns a pair holding the u and p-value.
|
||||||
*/
|
*/
|
||||||
public Pair<Long,Double> runTwoSidedTest() {
|
public Pair<Double,Double> runTwoSidedTest() {
|
||||||
Pair<Long,USet> uPair = calculateTwoSidedU(observations);
|
Pair<Long,USet> uPair = calculateTwoSidedU(observations);
|
||||||
long u = uPair.first;
|
long u = uPair.first;
|
||||||
int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
|
int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||||
int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
|
int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||||
double pval = calculateP(n,m,u,true);
|
double pval = calculateP(n,m,u,true);
|
||||||
return new Pair<Long,Double>(u,pval);
|
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -65,7 +65,9 @@ public class MannWhitneyU {
|
||||||
*/
|
*/
|
||||||
public static double calculateP(int n, int m, long u, boolean twoSided) {
|
public static double calculateP(int n, int m, long u, boolean twoSided) {
|
||||||
double pval;
|
double pval;
|
||||||
if ( n > 8 && m > 8 ) {
|
if ( m == 0 || n == 0 ) {
|
||||||
|
pval = 1.0;
|
||||||
|
} else if ( n > 8 && m > 8 ) {
|
||||||
// large m and n - normal approx
|
// large m and n - normal approx
|
||||||
pval = calculatePNormalApproximation(n,m,u);
|
pval = calculatePNormalApproximation(n,m,u);
|
||||||
} else if ( n > 4 && m > 7 ) {
|
} else if ( n > 4 && m > 7 ) {
|
||||||
|
|
@ -96,10 +98,22 @@ public class MannWhitneyU {
|
||||||
* @return p-value associated with the normal approximation
|
* @return p-value associated with the normal approximation
|
||||||
*/
|
*/
|
||||||
public static double calculatePNormalApproximation(int n,int m,long u) {
|
public static double calculatePNormalApproximation(int n,int m,long u) {
|
||||||
|
double z = getZApprox(n,m,u);
|
||||||
|
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the Z-score approximation of the u-statistic
|
||||||
|
* @param n - The number of entries in the DOMINATED set
|
||||||
|
* @param m - The number of entries in the DOMINANT set
|
||||||
|
* @param u - the Mann-Whitney U value
|
||||||
|
* @return z-score associated with the normal approximation
|
||||||
|
*/
|
||||||
|
private static double getZApprox(int n, int m, long u) {
|
||||||
double mean = ( ((long)m)*n+1.0)/2;
|
double mean = ( ((long)m)*n+1.0)/2;
|
||||||
double var = (((long) n)*m*(n+m+1.0))/12;
|
double var = (((long) n)*m*(n+m+1.0))/12;
|
||||||
double z = ( u - mean )/Math.sqrt(var);
|
double z = ( u - mean )/Math.sqrt(var);
|
||||||
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
return z;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -76,22 +76,22 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
||||||
UTest test1 = new UTest();
|
UTest test1 = new UTest();
|
||||||
test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8));
|
test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8));
|
||||||
test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13));
|
test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13));
|
||||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test1).first,10l);
|
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test1).first,-1.537,1e-4);
|
||||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05
|
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05
|
||||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation
|
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation
|
||||||
UTest test2 = new UTest();
|
UTest test2 = new UTest();
|
||||||
test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18));
|
test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18));
|
||||||
test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17));
|
test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17));
|
||||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test2).first,37l);
|
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test2).first,-0.3109831608,1e-10);
|
||||||
UTest test3 = new UTest();
|
UTest test3 = new UTest();
|
||||||
test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1));
|
test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1));
|
||||||
test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26));
|
test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26));
|
||||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test3).first,93l);
|
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test3).first,-2.907884571802469,1e-14);
|
||||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3);
|
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3);
|
||||||
UTest test4 = new UTest();
|
UTest test4 = new UTest();
|
||||||
test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9));
|
test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9));
|
||||||
test4.setControlData((Collection) Arrays.asList(3,8,11,12,13));
|
test4.setControlData((Collection) Arrays.asList(3,8,11,12,13));
|
||||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test4).first,5l);
|
Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test4).first,-1.9170289512680814,1e-14);
|
||||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4);
|
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue