Single-tailed hypothesis testing in MWU

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-03-29 15:53:40 +00:00
parent f443137dda
commit f6dfdc7f3b
2 changed files with 69 additions and 2 deletions

View File

@ -41,8 +41,22 @@ public class MannWhitneyU {
}
/**
* temporary method that will be generalized. Runs the standard two-sided test,
* returns the u and p values.
* Runs the one-sided test under the hypothesis that the data in set "lessThanOther" stochastically
* dominates the other set
* @param lessThanOther - either Set1 or Set2
* @return - u-based z-approximation, and p-value associated with the test (p-value is exact for small n,m)
*/
public Pair<Double,Double> runOneSidedTest(USet lessThanOther) {
long u = calculateOneSidedU(observations, lessThanOther);
int n = lessThanOther == USet.SET1 ? sizeSet1 : sizeSet2;
int m = lessThanOther == USet.SET1 ? sizeSet2 : sizeSet1;
double pval = calculateP(n,m,u,false);
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
}
/**
* Runs the standard two-sided test,
* returns the u-based z-approximate and p values.
* @Returns a pair holding the u and p-value.
*/
public Pair<Double,Double> runTwoSidedTest() {
@ -185,6 +199,27 @@ public class MannWhitneyU {
return uSet1DomSet2 < uSet2DomSet1 ? new Pair<Long,USet>(uSet1DomSet2,USet.SET1) : new Pair<Long,USet>(uSet2DomSet1,USet.SET2);
}
/**
* Calculates the U-statistic associated with the one-sided hypothesis that "dominator" stochastically dominates
* the other U-set
* @param observed - the observed data points, tagged by each set
* @param dominator - the set that is hypothesized to be stochastically dominating
* @return the u-statistic associated with the hypothesis
*/
public static long calculateOneSidedU(TreeSet<Pair<Number,USet>> observed,USet dominator) {
long domninatorBeforeOther = 0l;
int domSeenSoFar = 0;
for ( Pair<Number,USet> dataPoint : observed ) {
if ( dataPoint.second == dominator ) {
++domSeenSoFar;
} else {
domninatorBeforeOther += domSeenSoFar;
}
}
return domninatorBeforeOther;
}
/**
* The Mann-Whitney U statistic follows a recursive equation (that enumerates the proportion of possible
* binary strings of "n" zeros, and "m" ones, where a one precedes a zero "u" times). This accessor
@ -219,6 +254,16 @@ public class MannWhitneyU {
return (((double)n)/(n+m))*cpr(n-1,m,u-m) + (((double)m)/(n+m))*cpr(n,m-1,u);
}
/**
* hook into the data tree, for testing purposes only
* @return observations
* @deprecated - only for testing
*/
@Deprecated
public TreeSet<Pair<Number,USet>> getObservations() {
return observations;
}
/**
* A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
* and to ensure that rank ties are broken at random.

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner;
@ -96,6 +97,27 @@ public class RegionalAssociationUnitTest extends BaseTest {
}
@Test
private void testMWU() {
logger.warn("Testing MWU");
MannWhitneyU mwu = new MannWhitneyU();
mwu.add(0, MannWhitneyU.USet.SET1);
mwu.add(1,MannWhitneyU.USet.SET2);
mwu.add(2,MannWhitneyU.USet.SET2);
mwu.add(3,MannWhitneyU.USet.SET2);
mwu.add(4,MannWhitneyU.USet.SET2);
mwu.add(5,MannWhitneyU.USet.SET2);
mwu.add(6,MannWhitneyU.USet.SET1);
mwu.add(7,MannWhitneyU.USet.SET1);
mwu.add(8,MannWhitneyU.USet.SET1);
mwu.add(9,MannWhitneyU.USet.SET1);
mwu.add(10,MannWhitneyU.USet.SET1);
mwu.add(11,MannWhitneyU.USet.SET2);
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(), MannWhitneyU.USet.SET1),11l);
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),25l);
}
private class TTest extends TStatistic {
Map<Cohort,Collection<Number>> toTest = new HashMap<Cohort,Collection<Number>>(2);