From 79b5fa6cc5b7b475cd8dda850f1be86f3cd846cd Mon Sep 17 00:00:00 2001 From: chartl Date: Tue, 5 Apr 2011 18:52:32 +0000 Subject: [PATCH] Structural refactoring in advance of dichotomization statistics; generalization of statistical test infrastructure. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5579 348d0f76-0448-11de-a6fe-93d51630548a --- .../association/AssociationTestRunner.java | 120 ++---------------- .../modules/casecontrol/BaseQualityScore.java | 2 +- .../casecontrol/InsertSizeDistribution.java | 2 +- .../modules/casecontrol/MappingQuality0.java | 2 +- .../casecontrol/MateMappingQuality.java | 2 +- .../modules/casecontrol/MateOtherContig.java | 2 +- .../modules/casecontrol/MateSameStrand.java | 2 +- .../modules/casecontrol/MateUnmapped.java | 2 +- .../modules/casecontrol/MismatchRate.java | 2 +- .../modules/casecontrol/ProperPairs.java | 2 +- .../modules/casecontrol/ProportionTest.java | 71 +++++++++++ .../modules/casecontrol/ReadClipping.java | 2 +- .../modules/casecontrol/ReadIndels.java | 2 +- .../casecontrol/ReadMappingQuality.java | 2 +- .../casecontrol/ReferenceMismatches.java | 2 +- .../modules/casecontrol/SampleDepth.java | 2 +- .../modules/casecontrol/TStatistic.java | 36 ------ .../modules/casecontrol/UStatistic.java | 36 ------ .../modules/casecontrol/ValueTest.java | 103 +++++++++++++++ .../modules/casecontrol/ZStatistic.java | 21 --- .../statistics/Dichotomizable.java | 16 +++ .../statistics/Dichotomizer1D.java | 56 ++++++++ .../statistics/StatisticalTest.java | 15 +++ .../sting/utils/MannWhitneyU.java | 11 ++ .../walkers/RegionalAssociationUnitTest.java | 38 +++--- 25 files changed, 318 insertions(+), 233 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/TStatistic.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/UStatistic.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ZStatistic.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index dfb5fbef9..fb0b6d6c2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -1,15 +1,13 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association; -import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.Map; import cern.jet.random.Normal; import cern.jet.random.StudentT; import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; import org.broadinstitute.sting.utils.MannWhitneyU; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -23,122 +21,34 @@ import org.broadinstitute.sting.utils.collections.Pair; public class AssociationTestRunner { final static int MAX_Q_VALUE = Integer.MAX_VALUE; // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance - static Normal standardNormal = new Normal(0.0,1.0,null); public static int pToQ(double p) { return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); } public static Pair> getTestValues(AssociationContext context) { - if ( context instanceof TStatistic) { - Pair t = testStudentT((TStatistic) context); - return new Pair> (t.first,new Pair(t.second,pToQ(t.second))); - } - - if ( context instanceof ZStatistic) { - Pair z = testZ((ZStatistic) context); - return new Pair> (z.first,new Pair(z.second,pToQ(z.second))); - } - - if ( context instanceof UStatistic ) { - Pair u = mannWhitneyUTest((UStatistic) context); - return new Pair> ( u.first, new Pair(u.second,pToQ(u.second))); + if ( context instanceof StatisticalTest ) { + Pair statAndP = ((StatisticalTest) context).getStatisticAndPValue(); + return new Pair>(statAndP.first, + new Pair(statAndP.second,pToQ(statAndP.second))); } return null; } public static String runTests(AssociationContext context) { - List results = new ArrayList(); - if ( context instanceof TStatistic) { - results.add(runStudentT((TStatistic) context)); + if ( context instanceof StatisticalTest ) { + Pair> results = getTestValues(context); + return String.format("%s: %.2f\tP: %.2e\tQ: %d", + ((StatisticalTest) context).getStatisticName() , + results.first,results.second.first,results.second.second); } - if ( context instanceof ZStatistic) { - results.add(runZ((ZStatistic) context)); - } - - if ( context instanceof UStatistic ) { - results.add(runU((UStatistic) context)); - } - - StringBuffer buf = new StringBuffer(); - if ( results.size() > 0 ) { - buf.append(results.remove(0)); - for ( String s : results ) { - buf.append('\t'); - buf.append(s); - } - } - - return buf.toString(); + return null; } - - public static String runStudentT(TStatistic context) { - Pair stats = testStudentT(context); - double t = stats.first; - double p = stats.second; - return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p)); - } - - public static Pair testStudentT(TStatistic context) { - Map> caseControlVectors = context.getCaseControl(); - if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { - return new Pair(Double.NaN,Double.NaN); - } - double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE)); - double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase); - double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size(); - double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL)); - double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl); - double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size(); - - double df_num = Math.pow(varCase/nCase + varControl/nControl,2); - double df_denom = Math.pow(varCase/nCase,2)/(nCase-1) + Math.pow(varControl/nControl,2)/(nControl-1); - double t = (meanCase-meanControl)/Math.sqrt(varCase/nCase+varControl/nControl); - - StudentT studentT = new StudentT(df_num/df_denom,null); - double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t)); - - return new Pair(t,p); - } - - public static String runZ(ZStatistic context) { - Pair stats = testZ(context); - double z = stats.first; - double p = stats.second; - return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p)); - } - - public static Pair testZ(ZStatistic context) { - Map> caseControlCounts = context.getCaseControl(); - if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) { - return new Pair(Double.NaN,Double.NaN); - } - double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - - double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/ - (caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue()); - double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl)); - - double z = (pCase-pControl)/se; - double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z)); - - return new Pair(z,p); - } - - public static String runU(UStatistic context) { - // note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n - // thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12) - Pair results = mannWhitneyUTest(context); - return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second)); - } - - public static Pair mannWhitneyUTest(UStatistic context) { + @Deprecated // this is used for testing only at the moment + public static Pair mannWhitneyUTest(ValueTest context) { Map> caseControlVectors = context.getCaseControl(); if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { return new Pair(Double.NaN,Double.NaN); @@ -152,8 +62,4 @@ public class AssociationTestRunner { } return mwu.runTwoSidedTest(); } - - public static String runFisherExact(AssociationContext context) { - return "Test not yet implemented"; - } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java index dae99951a..f8c85f511 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java @@ -13,7 +13,7 @@ import java.util.Collection; * Time: 1:52 PM * To change this template use File | Settings | File Templates. */ -public class BaseQualityScore extends TStatistic { +public class BaseQualityScore extends ValueTest { public Collection map(ReadBackedPileup rbp) { ArrayList baseQuals = new ArrayList(rbp.size()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java index b6aba38c4..e4ed6008f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java @@ -10,7 +10,7 @@ import java.util.List; /** * @author chartl */ -public class InsertSizeDistribution extends UStatistic { +public class InsertSizeDistribution extends ValueTest { public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java index 83fb7a6e7..83081aaef 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 1:29 PM * To change this template use File | Settings | File Templates. */ -public class MappingQuality0 extends ZStatistic { +public class MappingQuality0 extends ProportionTest { public Pair map(ReadBackedPileup rbp) { int total = 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java index ccd9b6d48..2e27a69e2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java @@ -13,7 +13,7 @@ import java.util.Collection; * Time: 1:43 PM * To change this template use File | Settings | File Templates. */ -public class MateMappingQuality extends UStatistic { +public class MateMappingQuality extends ValueTest { public Collection map(ReadBackedPileup rbp) { ArrayList mateMapQ = new ArrayList(rbp.size()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java index a71c88ae7..2fe423c5a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 3:00 PM * To change this template use File | Settings | File Templates. */ -public class MateOtherContig extends ZStatistic { +public class MateOtherContig extends ProportionTest { public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java index c8b5cb9e9..371e4897b 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 1:47 PM * To change this template use File | Settings | File Templates. */ -public class MateSameStrand extends ZStatistic { +public class MateSameStrand extends ProportionTest { public Pair map(ReadBackedPileup rbp) { int numPairs = 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java index 07e75ae06..70b210ee8 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 2:09 PM * To change this template use File | Settings | File Templates. */ -public class MateUnmapped extends ZStatistic { +public class MateUnmapped extends ProportionTest { public Pair map(ReadBackedPileup pileup) { int numMatedReads = 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java index 95bda5ea3..c05bd0bd2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java @@ -17,7 +17,7 @@ import java.util.*; * Time: 2:32 PM * To change this template use File | Settings | File Templates. */ -public class MismatchRate extends UStatistic { +public class MismatchRate extends ValueTest { private Map sampleStats = new HashMap(); private int currentRefBase = 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java index cf23cd0d8..989adfabf 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; * Time: 1:50 PM * To change this template use File | Settings | File Templates. */ -public class ProperPairs extends ZStatistic { +public class ProperPairs extends ProportionTest { public Pair map(ReadBackedPileup rbp) { int numReads = 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java new file mode 100755 index 000000000..1204ff39e --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; + +import cern.jet.random.Normal; +import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Feb 24, 2011 + * Time: 12:58:38 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class ProportionTest extends CaseControl> implements Dichotomizable, StatisticalTest { + public static final Normal standardNormal = new Normal(0.0,1.0,null); + + public abstract Pair map(ReadBackedPileup rbp ); + + public Pair add(Pair left, Pair right) { + return new Pair(left.first.doubleValue()+right.first.doubleValue(), + left.second.doubleValue()+right.second.doubleValue()); + } + + public Pair,Collection> getDichotomizedData() { + Collection caseProps = new ArrayList(); + Collection controlProps = new ArrayList(); + + for (Map> sampleNumberMap : window ) { + for ( Map.Entry> samplePairEntry : sampleNumberMap.entrySet() ) { + Pair val = samplePairEntry.getValue(); + if ( samplePairEntry.getKey().getProperty("cohort").equals("case")) { + caseProps.add(val.first.doubleValue()/val.second.doubleValue()); + } else if ( samplePairEntry.getKey().getProperty("cohort").equals("control") ) { + controlProps.add(val.first.doubleValue()/val.second.doubleValue()); + } + } + } + + return new Pair,Collection>(caseProps,controlProps); + } + + public Pair getStatisticAndPValue() { + Map> caseControlCounts = getCaseControl(); + if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) { + return new Pair(Double.NaN,Double.NaN); + } + double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); + double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); + double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); + double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); + + double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/ + (caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue()); + double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl)); + + double z = (pCase-pControl)/se; + double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z)); + + return new Pair(z,p); + } + + public String getStatisticName() { return "Z"; } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java index fa234700e..6d29f92ef 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java @@ -15,7 +15,7 @@ import java.util.Collection; * Time: 1:31 PM * To change this template use File | Settings | File Templates. */ -public class ReadClipping extends TStatistic { +public class ReadClipping extends ValueTest { public Collection map(ReadBackedPileup rbp) { ArrayList clipping = new ArrayList(rbp.size()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java index 64478d33b..db85acad0 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java @@ -15,7 +15,7 @@ import java.util.Collection; * Time: 1:54 PM * To change this template use File | Settings | File Templates. */ -public class ReadIndels extends UStatistic { +public class ReadIndels extends ValueTest { public Collection map(ReadBackedPileup rbp) { ArrayList indelElements = new ArrayList(rbp.size()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java index 782a45adb..e5e2ba727 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java @@ -13,7 +13,7 @@ import java.util.Collection; * Time: 1:41 PM * To change this template use File | Settings | File Templates. */ -public class ReadMappingQuality extends TStatistic { +public class ReadMappingQuality extends ValueTest { public Collection map(ReadBackedPileup rbp) { ArrayList mapQuals = new ArrayList(rbp.size()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java index 455138cd8..0abc9930f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java @@ -15,7 +15,7 @@ import java.util.Map; * Time: 1:17 PM * To change this template use File | Settings | File Templates. */ -public class ReferenceMismatches extends ZStatistic { +public class ReferenceMismatches extends ProportionTest { final static int[] BASE_INDEX = {0,1,2,3}; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java index fd7661e8a..99c5e5ddb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java @@ -12,7 +12,7 @@ import java.util.*; /** * @author chartl */ -public class SampleDepth extends UStatistic { +public class SampleDepth extends ValueTest { public Map sampleStats = null; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/TStatistic.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/TStatistic.java deleted file mode 100755 index 4037ca3bf..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/TStatistic.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Set; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Feb 24, 2011 - * Time: 12:58:16 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class TStatistic extends CaseControl> { - - public abstract Collection map(ReadBackedPileup rbp ); - - public Collection add(Collection left, Collection right) { - if ( left instanceof ArrayList) { - ((ArrayList) left).addAll(right); - return left; - } else if ( left instanceof Set ) { - ((Set) left).addAll(right); - return left; - } else { - List newList = new ArrayList(left.size()+right.size()); - newList.addAll(left); - newList.addAll(right); - return newList; - } - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/UStatistic.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/UStatistic.java deleted file mode 100755 index 77001262a..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/UStatistic.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Set; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/2/11 - * Time: 1:53 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class UStatistic extends CaseControl> { - - public abstract Collection map(ReadBackedPileup rbp ); - - public Collection add(Collection left, Collection right) { - if ( left instanceof ArrayList ) { - ((ArrayList) left).addAll(right); - return left; - } else if ( left instanceof Set) { - ((Set) left).addAll(right); - return left; - } else { - List newList = new ArrayList(left.size()+right.size()); - newList.addAll(left); - newList.addAll(right); - return newList; - } - } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java new file mode 100755 index 000000000..6a21ecb7f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java @@ -0,0 +1,103 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; + +import cern.jet.random.StudentT; +import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/2/11 + * Time: 1:53 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class ValueTest extends CaseControl> implements StatisticalTest, Dichotomizable { + + public abstract Collection map(ReadBackedPileup rbp ); + + public boolean useTStatistic() { return false; } // default to using the U statistic + + public Collection add(Collection left, Collection right) { + if ( left instanceof ArrayList ) { + ((ArrayList) left).addAll(right); + return left; + } else if ( left instanceof Set) { + ((Set) left).addAll(right); + return left; + } else { + List newList = new ArrayList(left.size()+right.size()); + newList.addAll(left); + newList.addAll(right); + return newList; + } + } + + public Pair,Collection> getDichotomizedData() { + Collection caseMeans = new ArrayList(); + Collection controlMeans = new ArrayList(); + + for ( Map> sampleMap : window ) { + for ( Map.Entry> sampleEntry : sampleMap.entrySet() ) { + if ( sampleEntry.getKey().getProperty("cohort").equals("case") ) { + caseMeans.add(MathUtils.average(sampleEntry.getValue())); + } else if ( sampleEntry.getKey().getProperty("cohort").equals("control") ) { + controlMeans.add(MathUtils.average(sampleEntry.getValue())); + } + } + } + + return new Pair,Collection>(caseMeans,controlMeans); + } + + public Pair getUStatisticAndPValue() { + MannWhitneyU mwu = new MannWhitneyU(); + + for ( Map.Entry> cohortEntry : getCaseControl().entrySet() ) { + if ( cohortEntry.getKey().equals(Cohort.CASE) ) { + for ( Number n : cohortEntry.getValue() ) { + mwu.add(n,MannWhitneyU.USet.SET1); + } + } else if ( cohortEntry.getKey().equals(Cohort.CONTROL)) { + for ( Number n : cohortEntry.getValue() ) { + mwu.add(n,MannWhitneyU.USet.SET2); + } + } + } + + return mwu.runTwoSidedTest(); + } + + public Pair getStatisticAndPValue() { return useTStatistic() ? getTStatisticAndPValue() : getUStatisticAndPValue(); } + + public String getStatisticName() { return useTStatistic() ? "T" : "V"; } + + public Pair getTStatisticAndPValue() { + Map> caseControlVectors = getCaseControl(); + if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { + return new Pair(Double.NaN,Double.NaN); + } + double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE)); + double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase); + double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size(); + double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL)); + double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl); + double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size(); + + double df_num = Math.pow(varCase/nCase + varControl/nControl,2); + double df_denom = Math.pow(varCase/nCase,2)/(nCase-1) + Math.pow(varControl/nControl,2)/(nControl-1); + double t = (meanCase-meanControl)/Math.sqrt(varCase/nCase+varControl/nControl); + + StudentT studentT = new StudentT(df_num/df_denom,null); + double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t)); + + return new Pair(t,p); + } + +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ZStatistic.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ZStatistic.java deleted file mode 100755 index f69c990e3..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ZStatistic.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Feb 24, 2011 - * Time: 12:58:38 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class ZStatistic extends CaseControl> { - public abstract Pair map(ReadBackedPileup rbp ); - - public Pair add(Pair left, Pair right) { - return new Pair(left.first.doubleValue()+right.first.doubleValue(), - left.second.doubleValue()+right.second.doubleValue()); - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java new file mode 100755 index 000000000..29e985295 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java @@ -0,0 +1,16 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; + +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 4/5/11 + * Time: 11:35 AM + * To change this template use File | Settings | File Templates. + */ +public interface Dichotomizable { + public abstract Pair,Collection> getDichotomizedData(); +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java new file mode 100755 index 000000000..c6b977f9c --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; + +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 4/4/11 + * Time: 7:18 PM + * To change this template use File | Settings | File Templates. + */ +public class Dichotomizer1D { + + private Dichotomizer1D() { } // no instantiation + + public abstract class Transform { + public abstract double transform(double n); + + public Collection apply(Collection collection) { + ArrayList tData = new ArrayList(collection.size()); + for ( Number n : collection ) { + tData.add(transform(n.doubleValue())); + } + + return tData; + } + } + + public static double simpleGaussianDichotomy(Collection setOne, Collection setTwo) { + double meanOne = MathUtils.sum(setOne)/setOne.size(); + double meanTwo = MathUtils.sum(setTwo)/setTwo.size(); + double stdOne = Math.sqrt(MathUtils.variance(setOne, meanOne)); + double stdTwo = Math.sqrt(MathUtils.variance(setTwo, meanTwo)); + + return 1.0 - (3.0*(stdOne+stdTwo))/Math.abs(meanOne-meanTwo); + } + + public static double simpleGaussianDichotomy(Collection setOne, Collection setTwo, Transform transform) { + return simpleGaussianDichotomy(transform.apply(setOne),transform.apply(setTwo)); + } + + public static double simpleGaussianDichotomy(Dichotomizable dichotomizable) { + Pair,Collection> dichotomizedData = dichotomizable.getDichotomizedData(); + return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second); + } + + public static double simpleGaussianDichotomy(Dichotomizable dichotomizable, Transform transform) { + Pair,Collection> dichotomizedData = dichotomizable.getDichotomizedData(); + return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform); + } + +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java new file mode 100755 index 000000000..50ee8ad05 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java @@ -0,0 +1,15 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; + +import org.broadinstitute.sting.utils.collections.Pair; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 4/5/11 + * Time: 12:26 PM + * To change this template use File | Settings | File Templates. + */ +public interface StatisticalTest { + public abstract Pair getStatisticAndPValue(); + public abstract String getStatisticName(); +} diff --git a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index b196524e8..d97a98868 100755 --- a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -40,6 +40,17 @@ public class MannWhitneyU { } } + public Pair getR1R2() { + long u1 = calculateOneSidedU(observations,MannWhitneyU.USet.SET1); + long n1 = sizeSet1*(sizeSet1+1)/2; + long r1 = u1 + n1; + long n2 = sizeSet2*(sizeSet2+1)/2; + long u2 = n1*n2-u1; + long r2 = u2 + n2; + + return new Pair(r1,r2); + } + /** * Runs the one-sided test under the hypothesis that the data in set "lessThanOther" stochastically * dominates the other set diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java index a94be934f..077388946 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java @@ -1,18 +1,15 @@ package org.broadinstitute.sting.oneoffprojects.walkers; -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.UStatistic; -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ZStatistic; +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ProportionTest; +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ValueTest; import org.broadinstitute.sting.utils.MannWhitneyU; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner; -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.TStatistic; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import java.util.Arrays; import java.util.Collection; @@ -36,20 +33,20 @@ public class RegionalAssociationUnitTest extends BaseTest { TTest test1 = new TTest(); test1.setCaseData((Collection) Arrays.asList(1,1,2,3,4)); test1.setControlData((Collection) Arrays.asList(10, 10, 20, 30, 40)); - Assert.assertEquals(AssociationTestRunner.testStudentT(test1).second,0.1702,1e-2); + Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.1702,1e-2); TTest test2 = new TTest(); test2.setCaseData((Collection) Arrays.asList(5, 6, 5, 2, 3, 8, 7, 12, 10, 6, 4, 2, 8, 7, 3)); test2.setControlData((Collection) Arrays.asList(1, 6, 7, 2, 3, 3, 4, 1, 2, 5, 7, 3, 10, 3, 3, 2, 3)); - Assert.assertEquals(AssociationTestRunner.testStudentT(test2).second, 0.5805, 1e-2); + Assert.assertEquals(AssociationTestRunner.getTestValues(test2).second.first, 0.5805, 1e-2); TTest test3 = new TTest(); test3.setCaseData((Collection) Arrays.asList(94,25,68,4,27,51,9,10,91,61,61,37,39,44,36,27,86,33,3,38,5,6,28,93,30,56,81,8,40,44)); test3.setControlData((Collection) Arrays.asList(6,64,96,85,20,74,93,18,31,20,88,38,80,50,33,81,35,8,2,69,49,6,26,74,79,63,63,96,45,18)); - Assert.assertEquals(AssociationTestRunner.testStudentT(test3).second,0.8229,1e-4); + Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.8229,1e-4); TTest test4 = new TTest(); test4.setCaseData((Collection) Arrays.asList(14,8,8,17,8,12,10,10,13,9,13,9,9,12,12,11,16,12,13,16,10,13,11,16,13,16,11,13,9,16,16,14,9,14,17,10,15,15,9,15,17,15,17,12,10,13,11,14,8,14)); test4.setControlData((Collection) Arrays.asList(7,1,4,2,3,7,8,5,5,4,10,6,4,9,2,9,9,3,3,10,1,8,9,5,3,7,2,7,10,9,4,9,2,10,10,3,2,3,4,4,5,10,9,4,3,5,6,10,5,10)); - Assert.assertEquals(AssociationTestRunner.testStudentT(test4).second,0.1006,1e-4); - Assert.assertEquals(AssociationTestRunner.testStudentT(test4).first,1.657989,1e-6); + Assert.assertEquals(AssociationTestRunner.getTestValues(test4).second.first,0.1006,1e-4); + Assert.assertEquals(AssociationTestRunner.getTestValues(test4).first,1.657989,1e-6); } @Test @@ -58,17 +55,17 @@ public class RegionalAssociationUnitTest extends BaseTest { ZTest test1 = new ZTest(); test1.setCaseData(new Pair(100,500)); test1.setControlData(new Pair(55,300)); - Assert.assertEquals(AssociationTestRunner.testZ(test1).first,0.57742362050306,2e-6); - Assert.assertEquals(AssociationTestRunner.testZ(test1).second,0.56367,2e-5); + Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first,0.57742362050306,2e-6); + Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.56367,2e-5); ZTest test2 = new ZTest(); test1.setCaseData(new Pair(1020, 1800)); test1.setControlData(new Pair(680, 1670)); - Assert.assertEquals(AssociationTestRunner.testZ(test1).first,9.3898178216531,2e-6); + Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first,9.3898178216531,2e-6); ZTest test3 = new ZTest(); test3.setCaseData(new Pair(20,60)); test3.setControlData(new Pair(30,80)); - Assert.assertEquals(AssociationTestRunner.testZ(test3).first,-0.50917511840392,2e-6); - Assert.assertEquals(AssociationTestRunner.testZ(test3).second,0.610643593,2e-4); + Assert.assertEquals(AssociationTestRunner.getTestValues(test3).first,-0.50917511840392,2e-6); + Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.610643593,2e-4); } @Test @@ -160,7 +157,7 @@ public class RegionalAssociationUnitTest extends BaseTest { } - private class TTest extends TStatistic { + private class TTest extends ValueTest { Map> toTest = new HashMap>(2); @Override @@ -178,11 +175,14 @@ public class RegionalAssociationUnitTest extends BaseTest { public Collection map(ReadBackedPileup rbp) { return null; } public int getWindowSize() { return 1; } - public int slideByValue() { return 1; } + + @Override + public boolean useTStatistic() { return true; } + public boolean usePreviouslySeenReads() { return false; } } - private class ZTest extends ZStatistic { + private class ZTest extends ProportionTest { Map> toTest = new HashMap>(2); @Override @@ -204,7 +204,7 @@ public class RegionalAssociationUnitTest extends BaseTest { public boolean usePreviouslySeenReads() { return true; } } - private class UTest extends UStatistic { + private class UTest extends ValueTest { TTest test = new TTest(); public boolean usePreviouslySeenReads() { return false; } public int getWindowSize() { return 1; }