Single-tailed hypothesis testing in MWU
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f443137dda
commit
f6dfdc7f3b
|
|
@ -41,8 +41,22 @@ public class MannWhitneyU {
|
|||
}
|
||||
|
||||
/**
|
||||
* temporary method that will be generalized. Runs the standard two-sided test,
|
||||
* returns the u and p values.
|
||||
* Runs the one-sided test under the hypothesis that the data in set "lessThanOther" stochastically
|
||||
* dominates the other set
|
||||
* @param lessThanOther - either Set1 or Set2
|
||||
* @return - u-based z-approximation, and p-value associated with the test (p-value is exact for small n,m)
|
||||
*/
|
||||
public Pair<Double,Double> runOneSidedTest(USet lessThanOther) {
|
||||
long u = calculateOneSidedU(observations, lessThanOther);
|
||||
int n = lessThanOther == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||
int m = lessThanOther == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||
double pval = calculateP(n,m,u,false);
|
||||
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the standard two-sided test,
|
||||
* returns the u-based z-approximate and p values.
|
||||
* @Returns a pair holding the u and p-value.
|
||||
*/
|
||||
public Pair<Double,Double> runTwoSidedTest() {
|
||||
|
|
@ -185,6 +199,27 @@ public class MannWhitneyU {
|
|||
return uSet1DomSet2 < uSet2DomSet1 ? new Pair<Long,USet>(uSet1DomSet2,USet.SET1) : new Pair<Long,USet>(uSet2DomSet1,USet.SET2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the U-statistic associated with the one-sided hypothesis that "dominator" stochastically dominates
|
||||
* the other U-set
|
||||
* @param observed - the observed data points, tagged by each set
|
||||
* @param dominator - the set that is hypothesized to be stochastically dominating
|
||||
* @return the u-statistic associated with the hypothesis
|
||||
*/
|
||||
public static long calculateOneSidedU(TreeSet<Pair<Number,USet>> observed,USet dominator) {
|
||||
long domninatorBeforeOther = 0l;
|
||||
int domSeenSoFar = 0;
|
||||
for ( Pair<Number,USet> dataPoint : observed ) {
|
||||
if ( dataPoint.second == dominator ) {
|
||||
++domSeenSoFar;
|
||||
} else {
|
||||
domninatorBeforeOther += domSeenSoFar;
|
||||
}
|
||||
}
|
||||
|
||||
return domninatorBeforeOther;
|
||||
}
|
||||
|
||||
/**
|
||||
* The Mann-Whitney U statistic follows a recursive equation (that enumerates the proportion of possible
|
||||
* binary strings of "n" zeros, and "m" ones, where a one precedes a zero "u" times). This accessor
|
||||
|
|
@ -219,6 +254,16 @@ public class MannWhitneyU {
|
|||
return (((double)n)/(n+m))*cpr(n-1,m,u-m) + (((double)m)/(n+m))*cpr(n,m-1,u);
|
||||
}
|
||||
|
||||
/**
|
||||
* hook into the data tree, for testing purposes only
|
||||
* @return observations
|
||||
* @deprecated - only for testing
|
||||
*/
|
||||
@Deprecated
|
||||
public TreeSet<Pair<Number,USet>> getObservations() {
|
||||
return observations;
|
||||
}
|
||||
|
||||
/**
|
||||
* A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
|
||||
* and to ensure that rank ties are broken at random.
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers;
|
|||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner;
|
||||
|
|
@ -96,6 +97,27 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testMWU() {
|
||||
logger.warn("Testing MWU");
|
||||
MannWhitneyU mwu = new MannWhitneyU();
|
||||
mwu.add(0, MannWhitneyU.USet.SET1);
|
||||
mwu.add(1,MannWhitneyU.USet.SET2);
|
||||
mwu.add(2,MannWhitneyU.USet.SET2);
|
||||
mwu.add(3,MannWhitneyU.USet.SET2);
|
||||
mwu.add(4,MannWhitneyU.USet.SET2);
|
||||
mwu.add(5,MannWhitneyU.USet.SET2);
|
||||
mwu.add(6,MannWhitneyU.USet.SET1);
|
||||
mwu.add(7,MannWhitneyU.USet.SET1);
|
||||
mwu.add(8,MannWhitneyU.USet.SET1);
|
||||
mwu.add(9,MannWhitneyU.USet.SET1);
|
||||
mwu.add(10,MannWhitneyU.USet.SET1);
|
||||
mwu.add(11,MannWhitneyU.USet.SET2);
|
||||
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(), MannWhitneyU.USet.SET1),11l);
|
||||
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),25l);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private class TTest extends TStatistic {
|
||||
Map<Cohort,Collection<Number>> toTest = new HashMap<Cohort,Collection<Number>>(2);
|
||||
|
|
|
|||
Loading…
Reference in New Issue