Added a boatload of new case-control association modules. Switched the U-test to use longs rather than ints (it just so happened that I overflowed and started getting negative U statistics. Not good.) Added the ALL association type for ease of specifying that we want to throw the book at something. Added an svn-commit.tmp~ because i can't get rid of it even with --force. Hopefully I can remove it after.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5386 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5c979633f0
commit
60ddc08cdf
|
|
@ -10,6 +10,7 @@ import cern.jet.random.StudentT;
|
|||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.*;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.WilcoxonRankSum;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
|
|
@ -53,11 +54,14 @@ public class AssociationTestRunner {
|
|||
Pair<Double,Double> stats = testStudentT(context);
|
||||
double t = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("T: %.2f\tP: %.2e",t,p);
|
||||
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testStudentT(TStatistic context) {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE));
|
||||
double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase);
|
||||
double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size();
|
||||
|
|
@ -79,11 +83,14 @@ public class AssociationTestRunner {
|
|||
Pair<Double,Double> stats = testZ(context);
|
||||
double z = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("Z: %.2f\tP: %.2e",z,p);
|
||||
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testZ(ZStatistic context) {
|
||||
Map<CaseControl.Cohort,Pair<Number,Number>> caseControlCounts = context.getCaseControl();
|
||||
if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
|
||||
double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
|
|
@ -100,12 +107,15 @@ public class AssociationTestRunner {
|
|||
}
|
||||
|
||||
public static String runU(UStatistic context) {
|
||||
Pair<Integer,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("U: %d\tP: %.2e",results.first,results.second);
|
||||
Pair<Long,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("U: %d\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
||||
}
|
||||
|
||||
public static Pair<Integer,Double> mannWhitneyUTest(UStatistic context) {
|
||||
public static Pair<Long,Double> mannWhitneyUTest(UStatistic context) {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Long,Double>(-1l,Double.NaN);
|
||||
}
|
||||
MannWhitneyU mwu = new MannWhitneyU();
|
||||
for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) {
|
||||
mwu.add(n, MannWhitneyU.USet.SET1);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ public class RegionalAssociationHandler {
|
|||
for ( AssociationContext w : associations ) {
|
||||
if ( w.isFull() ) {
|
||||
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
|
||||
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStop(),AssociationTestRunner.runTests(w)));
|
||||
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w)));
|
||||
w.slide();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,11 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
|
|||
|
||||
private Set<Class<? extends AssociationContext>> getAssociations(String[] associationsToUse) {
|
||||
List<Class<? extends AssociationContext>> contexts = new PluginManager<AssociationContext>(AssociationContext.class).getPlugins();
|
||||
|
||||
if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) {
|
||||
return new HashSet<Class<? extends AssociationContext>>((Collection)contexts);
|
||||
}
|
||||
|
||||
Map<String,Class<? extends AssociationContext>> classNameToClass = new HashMap<String,Class<? extends AssociationContext>>(contexts.size());
|
||||
for ( Class<? extends AssociationContext> clazz : contexts ) {
|
||||
classNameToClass.put(clazz.getSimpleName(),clazz);
|
||||
|
|
|
|||
|
|
@ -62,6 +62,25 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
|
||||
public Set<AssociationContext> getAssociations() {
|
||||
List<Class<? extends AssociationContext>> contexts = new PluginManager<AssociationContext>(AssociationContext.class).getPlugins();
|
||||
|
||||
|
||||
|
||||
if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) {
|
||||
HashSet<AssociationContext> allAssoc = new HashSet<AssociationContext>(contexts.size());
|
||||
for ( Class<? extends AssociationContext> clazz : contexts ) {
|
||||
AssociationContext context;
|
||||
try {
|
||||
context = clazz.newInstance();
|
||||
} catch (Exception e ) {
|
||||
throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e);
|
||||
}
|
||||
context.init(this);
|
||||
allAssoc.add(context);
|
||||
}
|
||||
return allAssoc;
|
||||
}
|
||||
|
||||
|
||||
Map<String,Class<? extends AssociationContext>> classNameToClass = new HashMap<String,Class<? extends AssociationContext>>(contexts.size());
|
||||
for ( Class<? extends AssociationContext> clazz : contexts ) {
|
||||
classNameToClass.put(clazz.getSimpleName(),clazz);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:52 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class BaseQualityScore extends TStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> baseQuals = new ArrayList<Integer>(rbp.size());
|
||||
for (PileupElement e : rbp ) {
|
||||
baseQuals.add((int)e.getQual());
|
||||
}
|
||||
|
||||
return (Collection) baseQuals;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 5; }
|
||||
public int slideByValue() { return 1; }
|
||||
public boolean usePreviouslySeenReads() { return true; }
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -11,7 +12,7 @@ import java.util.List;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class InsertSizeDistribution extends TStatistic {
|
||||
public class InsertSizeDistribution extends UStatistic {
|
||||
public int getWindowSize() { return 100; }
|
||||
public int slideByValue() { return 10; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:29 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MappingQuality0 extends ZStatistic {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int total = 0;
|
||||
int mq0 = 0;
|
||||
for ( PileupElement e : rbp ) {
|
||||
++total;
|
||||
if (e.getMappingQual() == 0)
|
||||
++mq0;
|
||||
}
|
||||
|
||||
return new Pair<Number,Number>(mq0,total);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 50; }
|
||||
public int slideByValue() { return 10; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:43 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateMappingQuality extends UStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> mateMapQ = new ArrayList<Integer>(rbp.size());
|
||||
for ( PileupElement e : rbp ) {
|
||||
if ( e.getRead().getReadPairedFlag() && e.getRead().getAttribute("MQ") != null) {
|
||||
mateMapQ.add((Integer) e.getRead().getAttribute("MQ"));
|
||||
}
|
||||
}
|
||||
|
||||
return (Collection) mateMapQ;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 20; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:47 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateSameStrand extends ZStatistic {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int numPairs = 0;
|
||||
int mateSameStrand = 0;
|
||||
for (PileupElement e : rbp ) {
|
||||
if ( e.getRead().getReadPairedFlag() ) {
|
||||
++numPairs;
|
||||
if ( e.getRead().getMateNegativeStrandFlag() == e.getRead().getReadNegativeStrandFlag() ) {
|
||||
++mateSameStrand;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Number,Number>(mateSameStrand,numPairs);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 5; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:50 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProperPairs extends ZStatistic {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int numReads = 0;
|
||||
int numPropPair = 0;
|
||||
for (PileupElement e : rbp ) {
|
||||
++numReads;
|
||||
if ( e.getRead().getReadPairedFlag() && e.getRead().getProperPairFlag() ) {
|
||||
++numPropPair;
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Number,Number>(numPropPair,numReads);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 30; }
|
||||
public int slideByValue() { return 10; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:31 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipping extends TStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> clipping = new ArrayList<Integer>(rbp.size());
|
||||
for ( PileupElement e : rbp ) {
|
||||
int clips = 0;
|
||||
for( CigarElement c : e.getRead().getCigar().getCigarElements() ) {
|
||||
if ( c.getOperator() == CigarOperator.SOFT_CLIP || c.getOperator() == CigarOperator.HARD_CLIP ) {
|
||||
clips += c.getLength();
|
||||
}
|
||||
}
|
||||
clipping.add(clips);
|
||||
}
|
||||
|
||||
return (Collection) clipping;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 30; }
|
||||
public int slideByValue() { return 5; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadIndels extends UStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> indelElements = new ArrayList<Integer>(rbp.size());
|
||||
for (PileupElement e : rbp ) {
|
||||
int indelOps = 0;
|
||||
for ( CigarElement c : e.getRead().getCigar().getCigarElements()) {
|
||||
if ( c.getOperator() == CigarOperator.DELETION || c.getOperator() == CigarOperator.INSERTION )
|
||||
++indelOps;
|
||||
}
|
||||
indelElements.add(indelOps);
|
||||
}
|
||||
|
||||
return (Collection) indelElements;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 50; }
|
||||
public int slideByValue() { return 10; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/6/11
|
||||
* Time: 1:41 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadMappingQuality extends UStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> mapQuals = new ArrayList<Integer>(rbp.size());
|
||||
for ( PileupElement e : rbp ) {
|
||||
mapQuals.add(e.getMappingQual());
|
||||
}
|
||||
|
||||
return (Collection) mapQuals;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 5; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -16,7 +17,7 @@ import java.util.*;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class SampleDepth extends UStatistic {
|
||||
public class SampleDepth extends TStatistic {
|
||||
|
||||
public Map<Sample,Object> sampleStats = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
--This line, and those below, will be ignored--
|
||||
|
||||
AM SampleDepth.java
|
||||
|
|
@ -20,8 +20,8 @@ public abstract class TStatistic extends CaseControl<Collection<Number>> {
|
|||
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||
if ( left instanceof List) {
|
||||
((List) left).addAll(right);
|
||||
if ( left instanceof ArrayList) {
|
||||
((ArrayList) left).addAll(right);
|
||||
return left;
|
||||
} else if ( left instanceof Set ) {
|
||||
((Set) left).addAll(right);
|
||||
|
|
|
|||
|
|
@ -45,13 +45,13 @@ public class MannWhitneyU {
|
|||
* returns the u and p values.
|
||||
* @Returns a pair holding the u and p-value.
|
||||
*/
|
||||
public Pair<Integer,Double> runTwoSidedTest() {
|
||||
Pair<Integer,USet> uPair = calculateTwoSidedU(observations);
|
||||
int u = uPair.first;
|
||||
public Pair<Long,Double> runTwoSidedTest() {
|
||||
Pair<Long,USet> uPair = calculateTwoSidedU(observations);
|
||||
long u = uPair.first;
|
||||
int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||
int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||
double pval = calculateP(n,m,u,true);
|
||||
return new Pair<Integer,Double>(u,pval);
|
||||
return new Pair<Long,Double>(u,pval);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -61,14 +61,18 @@ public class MannWhitneyU {
|
|||
* @param u - the Mann-Whitney U value
|
||||
* @param twoSided - is the test twosided
|
||||
* @return the (possibly approximate) p-value associated with the MWU test
|
||||
* todo -- there must be an approximation for small m and large n
|
||||
*/
|
||||
public static double calculateP(int n, int m, int u, boolean twoSided) {
|
||||
public static double calculateP(int n, int m, long u, boolean twoSided) {
|
||||
double pval;
|
||||
if ( n > 8 && m > 8 ) {
|
||||
// large m and n - normal approx
|
||||
pval = calculatePNormalApproximation(n,m,u);
|
||||
} else if ( n > 4 && m > 7 ) {
|
||||
// large m, small n - sum uniform approx
|
||||
pval = calculatePUniformApproximation(n,m,u);
|
||||
} else {
|
||||
// small m [possibly small n] - full approx
|
||||
pval = calculatePRecursively(n,m,u);
|
||||
}
|
||||
|
||||
|
|
@ -82,9 +86,9 @@ public class MannWhitneyU {
|
|||
* @param u - the Mann-Whitney U value
|
||||
* @return p-value associated with the normal approximation
|
||||
*/
|
||||
public static double calculatePNormalApproximation(int n,int m,int u) {
|
||||
double mean = ((double) m*n+1)/2;
|
||||
double var = (n*m*(n+m+1))/12;
|
||||
public static double calculatePNormalApproximation(int n,int m,long u) {
|
||||
double mean = ( ((long)m)*n+1.0)/2;
|
||||
double var = (((long) n)*m*(n+m+1.0))/12;
|
||||
double z = ( u - mean )/Math.sqrt(var);
|
||||
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
||||
}
|
||||
|
|
@ -97,8 +101,8 @@ public class MannWhitneyU {
|
|||
* @param u -
|
||||
* @return
|
||||
*/
|
||||
public static double calculatePUniformApproximation(int n, int m, int u) {
|
||||
int R = u + (n*(n+1))/2;
|
||||
public static double calculatePUniformApproximation(int n, int m, long u) {
|
||||
long R = u + (n*(n+1))/2;
|
||||
double a = Math.sqrt(m*(n+m+1));
|
||||
double b = (n/2.0)*(1-Math.sqrt((n+m+1)/m));
|
||||
double z = b + R/a;
|
||||
|
|
@ -130,11 +134,11 @@ public class MannWhitneyU {
|
|||
* @param observed
|
||||
* @return the minimum of the U counts (set1 dominates 2, set 2 dominates 1)
|
||||
*/
|
||||
public static Pair<Integer,USet> calculateTwoSidedU(TreeSet<Pair<Number,USet>> observed ) {
|
||||
public static Pair<Long,USet> calculateTwoSidedU(TreeSet<Pair<Number,USet>> observed ) {
|
||||
int set1SeenSoFar = 0;
|
||||
int set2SeenSoFar = 0;
|
||||
int uSet1DomSet2 = 0;
|
||||
int uSet2DomSet1 = 0;
|
||||
long uSet1DomSet2 = 0;
|
||||
long uSet2DomSet1 = 0;
|
||||
USet previous = null;
|
||||
for ( Pair<Number,USet> dataPoint : observed ) {
|
||||
|
||||
|
|
@ -155,7 +159,7 @@ public class MannWhitneyU {
|
|||
previous = dataPoint.second;
|
||||
}
|
||||
|
||||
return uSet1DomSet2 < uSet2DomSet1 ? new Pair<Integer,USet>(uSet1DomSet2,USet.SET1) : new Pair<Integer,USet>(uSet2DomSet1,USet.SET2);
|
||||
return uSet1DomSet2 < uSet2DomSet1 ? new Pair<Long,USet>(uSet1DomSet2,USet.SET1) : new Pair<Long,USet>(uSet2DomSet1,USet.SET2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -167,8 +171,8 @@ public class MannWhitneyU {
|
|||
* @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
|
||||
* @return the probability under the hypothesis that all sequences are equally likely of finding a set-two entry preceding a set-one entry "u" times.
|
||||
*/
|
||||
public static double calculatePRecursively(int n, int m, int u) {
|
||||
if ( m + n > 16 ) { throw new StingException("Please use the appropriate (normal or sum of uniform) approximation"); }
|
||||
public static double calculatePRecursively(int n, int m, long u) {
|
||||
if ( m > 7 && n > 4 ) { throw new StingException(String.format("Please use the appropriate (normal or sum of uniform) approximation. Values n: %d, m: %d",n,m)); }
|
||||
return cpr(n,m,u);
|
||||
}
|
||||
|
||||
|
|
@ -178,7 +182,7 @@ public class MannWhitneyU {
|
|||
* @m: number of set-2 entries
|
||||
* @u: number of times a set-2 entry as preceded a set-1 entry
|
||||
*/
|
||||
private static double cpr(int n, int m, int u) {
|
||||
private static double cpr(int n, int m, long u) {
|
||||
if ( u < 0 || n == 0 && m == 0 ) {
|
||||
return 0.0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,22 +76,22 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
UTest test1 = new UTest();
|
||||
test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8));
|
||||
test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13));
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).first,10,0);
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test1).first,10l);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation
|
||||
UTest test2 = new UTest();
|
||||
test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18));
|
||||
test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17));
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test2).first,37,0);
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test2).first,37l);
|
||||
UTest test3 = new UTest();
|
||||
test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1));
|
||||
test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26));
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).first,93,0);
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test3).first,93l);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3);
|
||||
UTest test4 = new UTest();
|
||||
test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9));
|
||||
test4.setControlData((Collection) Arrays.asList(3,8,11,12,13));
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).first,5,0);
|
||||
Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test4).first,5l);
|
||||
Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4);
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue