diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 30f6c6b57..13f4e6c09 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -10,6 +10,7 @@ import cern.jet.random.StudentT; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.*; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.WilcoxonRankSum; import org.broadinstitute.sting.utils.collections.Pair; @@ -53,11 +54,14 @@ public class AssociationTestRunner { Pair stats = testStudentT(context); double t = stats.first; double p = stats.second; - return String.format("T: %.2f\tP: %.2e",t,p); + return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p))); } public static Pair testStudentT(TStatistic context) { Map> caseControlVectors = context.getCaseControl(); + if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { + return new Pair(Double.NaN,Double.NaN); + } double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE)); double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase); double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size(); @@ -79,11 +83,14 @@ public class AssociationTestRunner { Pair stats = testZ(context); double z = stats.first; double p = stats.second; - return String.format("Z: %.2f\tP: %.2e",z,p); + return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p))); } public static Pair testZ(ZStatistic context) { Map> caseControlCounts = context.getCaseControl(); + if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) { + return new Pair(Double.NaN,Double.NaN); + } double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); @@ -100,12 +107,15 @@ public class AssociationTestRunner { } public static String runU(UStatistic context) { - Pair results = mannWhitneyUTest(context); - return String.format("U: %d\tP: %.2e",results.first,results.second); + Pair results = mannWhitneyUTest(context); + return String.format("U: %d\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second))); } - public static Pair mannWhitneyUTest(UStatistic context) { + public static Pair mannWhitneyUTest(UStatistic context) { Map> caseControlVectors = context.getCaseControl(); + if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { + return new Pair(-1l,Double.NaN); + } MannWhitneyU mwu = new MannWhitneyU(); for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) { mwu.add(n, MannWhitneyU.USet.SET1); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java index 5b38933aa..4bbc1c1aa 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java @@ -49,7 +49,7 @@ public class RegionalAssociationHandler { for ( AssociationContext w : associations ) { if ( w.isFull() ) { testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(), - maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStop(),AssociationTestRunner.runTests(w))); + maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w))); w.slide(); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java index 3598ac5b9..5969754b2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java @@ -33,6 +33,11 @@ public class RegionalAssociationMultiplexer implements Multiplexer> getAssociations(String[] associationsToUse) { List> contexts = new PluginManager(AssociationContext.class).getPlugins(); + + if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) { + return new HashSet>((Collection)contexts); + } + Map> classNameToClass = new HashMap>(contexts.size()); for ( Class clazz : contexts ) { classNameToClass.put(clazz.getSimpleName(),clazz); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index 157c88472..dcff890b6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -62,6 +62,25 @@ public class RegionalAssociationWalker extends LocusWalker getAssociations() { List> contexts = new PluginManager(AssociationContext.class).getPlugins(); + + + + if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) { + HashSet allAssoc = new HashSet(contexts.size()); + for ( Class clazz : contexts ) { + AssociationContext context; + try { + context = clazz.newInstance(); + } catch (Exception e ) { + throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e); + } + context.init(this); + allAssoc.add(context); + } + return allAssoc; + } + + Map> classNameToClass = new HashMap>(contexts.size()); for ( Class clazz : contexts ) { classNameToClass.put(clazz.getSimpleName(),clazz); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java new file mode 100755 index 000000000..a15600322 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:52 PM + * To change this template use File | Settings | File Templates. + */ +public class BaseQualityScore extends TStatistic { + + public Collection map(ReadBackedPileup rbp) { + ArrayList baseQuals = new ArrayList(rbp.size()); + for (PileupElement e : rbp ) { + baseQuals.add((int)e.getQual()); + } + + return (Collection) baseQuals; + } + + public int getWindowSize() { return 5; } + public int slideByValue() { return 1; } + public boolean usePreviouslySeenReads() { return true; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java index d1f6fdf6a..449a61c99 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -11,7 +12,7 @@ import java.util.List; /** * @author chartl */ -public class InsertSizeDistribution extends TStatistic { +public class InsertSizeDistribution extends UStatistic { public int getWindowSize() { return 100; } public int slideByValue() { return 10; } public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java new file mode 100755 index 000000000..531cba002 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:29 PM + * To change this template use File | Settings | File Templates. + */ +public class MappingQuality0 extends ZStatistic { + + public Pair map(ReadBackedPileup rbp) { + int total = 0; + int mq0 = 0; + for ( PileupElement e : rbp ) { + ++total; + if (e.getMappingQual() == 0) + ++mq0; + } + + return new Pair(mq0,total); + } + + public int getWindowSize() { return 50; } + public int slideByValue() { return 10; } + public boolean usePreviouslySeenReads() { return false; } + +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java new file mode 100755 index 000000000..f3718bce3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:43 PM + * To change this template use File | Settings | File Templates. + */ +public class MateMappingQuality extends UStatistic { + + public Collection map(ReadBackedPileup rbp) { + ArrayList mateMapQ = new ArrayList(rbp.size()); + for ( PileupElement e : rbp ) { + if ( e.getRead().getReadPairedFlag() && e.getRead().getAttribute("MQ") != null) { + mateMapQ.add((Integer) e.getRead().getAttribute("MQ")); + } + } + + return (Collection) mateMapQ; + } + + public int getWindowSize() { return 40; } + public int slideByValue() { return 20; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java new file mode 100755 index 000000000..143c71c71 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:47 PM + * To change this template use File | Settings | File Templates. + */ +public class MateSameStrand extends ZStatistic { + + public Pair map(ReadBackedPileup rbp) { + int numPairs = 0; + int mateSameStrand = 0; + for (PileupElement e : rbp ) { + if ( e.getRead().getReadPairedFlag() ) { + ++numPairs; + if ( e.getRead().getMateNegativeStrandFlag() == e.getRead().getReadNegativeStrandFlag() ) { + ++mateSameStrand; + } + } + } + + return new Pair(mateSameStrand,numPairs); + } + + public int getWindowSize() { return 40; } + public int slideByValue() { return 5; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java new file mode 100755 index 000000000..6045fcf4b --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:50 PM + * To change this template use File | Settings | File Templates. + */ +public class ProperPairs extends ZStatistic { + + public Pair map(ReadBackedPileup rbp) { + int numReads = 0; + int numPropPair = 0; + for (PileupElement e : rbp ) { + ++numReads; + if ( e.getRead().getReadPairedFlag() && e.getRead().getProperPairFlag() ) { + ++numPropPair; + } + } + + return new Pair(numPropPair,numReads); + } + + public int getWindowSize() { return 30; } + public int slideByValue() { return 10; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java new file mode 100755 index 000000000..8692da8db --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java @@ -0,0 +1,39 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:31 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadClipping extends TStatistic { + + public Collection map(ReadBackedPileup rbp) { + ArrayList clipping = new ArrayList(rbp.size()); + for ( PileupElement e : rbp ) { + int clips = 0; + for( CigarElement c : e.getRead().getCigar().getCigarElements() ) { + if ( c.getOperator() == CigarOperator.SOFT_CLIP || c.getOperator() == CigarOperator.HARD_CLIP ) { + clips += c.getLength(); + } + } + clipping.add(clips); + } + + return (Collection) clipping; + } + + public int getWindowSize() { return 30; } + public int slideByValue() { return 5; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java new file mode 100755 index 000000000..f5ffd6fe0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java @@ -0,0 +1,38 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:54 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadIndels extends UStatistic { + + public Collection map(ReadBackedPileup rbp) { + ArrayList indelElements = new ArrayList(rbp.size()); + for (PileupElement e : rbp ) { + int indelOps = 0; + for ( CigarElement c : e.getRead().getCigar().getCigarElements()) { + if ( c.getOperator() == CigarOperator.DELETION || c.getOperator() == CigarOperator.INSERTION ) + ++indelOps; + } + indelElements.add(indelOps); + } + + return (Collection) indelElements; + } + + public int getWindowSize() { return 50; } + public int slideByValue() { return 10; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java new file mode 100755 index 000000000..9d6daab09 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/6/11 + * Time: 1:41 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadMappingQuality extends UStatistic { + + public Collection map(ReadBackedPileup rbp) { + ArrayList mapQuals = new ArrayList(rbp.size()); + for ( PileupElement e : rbp ) { + mapQuals.add(e.getMappingQual()); + } + + return (Collection) mapQuals; + } + + public int getWindowSize() { return 40; } + public int slideByValue() { return 5; } + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java index 1315dc315..f1001115e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender; import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -16,7 +17,7 @@ import java.util.*; /** * @author chartl */ -public class SampleDepth extends UStatistic { +public class SampleDepth extends TStatistic { public Map sampleStats = null; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/svn-commit.tmp~ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/svn-commit.tmp~ new file mode 100644 index 000000000..299864283 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/svn-commit.tmp~ @@ -0,0 +1,4 @@ + +--This line, and those below, will be ignored-- + +AM SampleDepth.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/casecontrol/TStatistic.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/casecontrol/TStatistic.java index 650f8ff4e..1eddb495e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/casecontrol/TStatistic.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/casecontrol/TStatistic.java @@ -20,8 +20,8 @@ public abstract class TStatistic extends CaseControl> { public abstract Collection map(ReadBackedPileup rbp ); public Collection add(Collection left, Collection right) { - if ( left instanceof List) { - ((List) left).addAll(right); + if ( left instanceof ArrayList) { + ((ArrayList) left).addAll(right); return left; } else if ( left instanceof Set ) { ((Set) left).addAll(right); diff --git a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 825374982..4d89d4894 100755 --- a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -45,13 +45,13 @@ public class MannWhitneyU { * returns the u and p values. * @Returns a pair holding the u and p-value. */ - public Pair runTwoSidedTest() { - Pair uPair = calculateTwoSidedU(observations); - int u = uPair.first; + public Pair runTwoSidedTest() { + Pair uPair = calculateTwoSidedU(observations); + long u = uPair.first; int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2; int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1; double pval = calculateP(n,m,u,true); - return new Pair(u,pval); + return new Pair(u,pval); } /** @@ -61,14 +61,18 @@ public class MannWhitneyU { * @param u - the Mann-Whitney U value * @param twoSided - is the test twosided * @return the (possibly approximate) p-value associated with the MWU test + * todo -- there must be an approximation for small m and large n */ - public static double calculateP(int n, int m, int u, boolean twoSided) { + public static double calculateP(int n, int m, long u, boolean twoSided) { double pval; if ( n > 8 && m > 8 ) { + // large m and n - normal approx pval = calculatePNormalApproximation(n,m,u); } else if ( n > 4 && m > 7 ) { + // large m, small n - sum uniform approx pval = calculatePUniformApproximation(n,m,u); } else { + // small m [possibly small n] - full approx pval = calculatePRecursively(n,m,u); } @@ -82,9 +86,9 @@ public class MannWhitneyU { * @param u - the Mann-Whitney U value * @return p-value associated with the normal approximation */ - public static double calculatePNormalApproximation(int n,int m,int u) { - double mean = ((double) m*n+1)/2; - double var = (n*m*(n+m+1))/12; + public static double calculatePNormalApproximation(int n,int m,long u) { + double mean = ( ((long)m)*n+1.0)/2; + double var = (((long) n)*m*(n+m+1.0))/12; double z = ( u - mean )/Math.sqrt(var); return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z); } @@ -97,8 +101,8 @@ public class MannWhitneyU { * @param u - * @return */ - public static double calculatePUniformApproximation(int n, int m, int u) { - int R = u + (n*(n+1))/2; + public static double calculatePUniformApproximation(int n, int m, long u) { + long R = u + (n*(n+1))/2; double a = Math.sqrt(m*(n+m+1)); double b = (n/2.0)*(1-Math.sqrt((n+m+1)/m)); double z = b + R/a; @@ -130,11 +134,11 @@ public class MannWhitneyU { * @param observed * @return the minimum of the U counts (set1 dominates 2, set 2 dominates 1) */ - public static Pair calculateTwoSidedU(TreeSet> observed ) { + public static Pair calculateTwoSidedU(TreeSet> observed ) { int set1SeenSoFar = 0; int set2SeenSoFar = 0; - int uSet1DomSet2 = 0; - int uSet2DomSet1 = 0; + long uSet1DomSet2 = 0; + long uSet2DomSet1 = 0; USet previous = null; for ( Pair dataPoint : observed ) { @@ -155,7 +159,7 @@ public class MannWhitneyU { previous = dataPoint.second; } - return uSet1DomSet2 < uSet2DomSet1 ? new Pair(uSet1DomSet2,USet.SET1) : new Pair(uSet2DomSet1,USet.SET2); + return uSet1DomSet2 < uSet2DomSet1 ? new Pair(uSet1DomSet2,USet.SET1) : new Pair(uSet2DomSet1,USet.SET2); } /** @@ -167,8 +171,8 @@ public class MannWhitneyU { * @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 ) * @return the probability under the hypothesis that all sequences are equally likely of finding a set-two entry preceding a set-one entry "u" times. */ - public static double calculatePRecursively(int n, int m, int u) { - if ( m + n > 16 ) { throw new StingException("Please use the appropriate (normal or sum of uniform) approximation"); } + public static double calculatePRecursively(int n, int m, long u) { + if ( m > 7 && n > 4 ) { throw new StingException(String.format("Please use the appropriate (normal or sum of uniform) approximation. Values n: %d, m: %d",n,m)); } return cpr(n,m,u); } @@ -178,7 +182,7 @@ public class MannWhitneyU { * @m: number of set-2 entries * @u: number of times a set-2 entry as preceded a set-1 entry */ - private static double cpr(int n, int m, int u) { + private static double cpr(int n, int m, long u) { if ( u < 0 || n == 0 && m == 0 ) { return 0.0; } diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java index c3be42103..dd0401b7b 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java @@ -76,22 +76,22 @@ public class RegionalAssociationUnitTest extends BaseTest { UTest test1 = new UTest(); test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8)); test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13)); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).first,10,0); + Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test1).first,10l); Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05 Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation UTest test2 = new UTest(); test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18)); test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17)); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test2).first,37,0); + Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test2).first,37l); UTest test3 = new UTest(); test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1)); test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26)); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).first,93,0); + Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test3).first,93l); Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3); UTest test4 = new UTest(); test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9)); test4.setControlData((Collection) Arrays.asList(3,8,11,12,13)); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).first,5,0); + Assert.assertEquals((long) AssociationTestRunner.mannWhitneyUTest(test4).first,5l); Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4); }