Structural refactoring in advance of dichotomization statistics; generalization of statistical test infrastructure.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5579 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
77ca4eef31
commit
79b5fa6cc5
|
|
@ -1,15 +1,13 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import cern.jet.random.Normal;
|
||||
import cern.jet.random.StudentT;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
|
|
@ -23,122 +21,34 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
public class AssociationTestRunner {
|
||||
final static int MAX_Q_VALUE = Integer.MAX_VALUE;
|
||||
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
|
||||
static Normal standardNormal = new Normal(0.0,1.0,null);
|
||||
|
||||
public static int pToQ(double p) {
|
||||
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
|
||||
}
|
||||
|
||||
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
|
||||
if ( context instanceof TStatistic) {
|
||||
Pair<Double,Double> t = testStudentT((TStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> (t.first,new Pair<Double,Integer>(t.second,pToQ(t.second)));
|
||||
}
|
||||
|
||||
if ( context instanceof ZStatistic) {
|
||||
Pair<Double,Double> z = testZ((ZStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> (z.first,new Pair<Double,Integer>(z.second,pToQ(z.second)));
|
||||
}
|
||||
|
||||
if ( context instanceof UStatistic ) {
|
||||
Pair<Double,Double> u = mannWhitneyUTest((UStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> ( u.first, new Pair<Double,Integer>(u.second,pToQ(u.second)));
|
||||
if ( context instanceof StatisticalTest ) {
|
||||
Pair<Double,Double> statAndP = ((StatisticalTest) context).getStatisticAndPValue();
|
||||
return new Pair<Double,Pair<Double,Integer>>(statAndP.first,
|
||||
new Pair<Double,Integer>(statAndP.second,pToQ(statAndP.second)));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String runTests(AssociationContext context) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
if ( context instanceof TStatistic) {
|
||||
results.add(runStudentT((TStatistic) context));
|
||||
if ( context instanceof StatisticalTest ) {
|
||||
Pair<Double,Pair<Double,Integer>> results = getTestValues(context);
|
||||
return String.format("%s: %.2f\tP: %.2e\tQ: %d",
|
||||
((StatisticalTest) context).getStatisticName() ,
|
||||
results.first,results.second.first,results.second.second);
|
||||
}
|
||||
|
||||
if ( context instanceof ZStatistic) {
|
||||
results.add(runZ((ZStatistic) context));
|
||||
}
|
||||
|
||||
if ( context instanceof UStatistic ) {
|
||||
results.add(runU((UStatistic) context));
|
||||
}
|
||||
|
||||
StringBuffer buf = new StringBuffer();
|
||||
if ( results.size() > 0 ) {
|
||||
buf.append(results.remove(0));
|
||||
for ( String s : results ) {
|
||||
buf.append('\t');
|
||||
buf.append(s);
|
||||
}
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public static String runStudentT(TStatistic context) {
|
||||
Pair<Double,Double> stats = testStudentT(context);
|
||||
double t = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testStudentT(TStatistic context) {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE));
|
||||
double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase);
|
||||
double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size();
|
||||
double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL));
|
||||
double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl);
|
||||
double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size();
|
||||
|
||||
double df_num = Math.pow(varCase/nCase + varControl/nControl,2);
|
||||
double df_denom = Math.pow(varCase/nCase,2)/(nCase-1) + Math.pow(varControl/nControl,2)/(nControl-1);
|
||||
double t = (meanCase-meanControl)/Math.sqrt(varCase/nCase+varControl/nControl);
|
||||
|
||||
StudentT studentT = new StudentT(df_num/df_denom,null);
|
||||
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
|
||||
|
||||
return new Pair<Double,Double>(t,p);
|
||||
}
|
||||
|
||||
public static String runZ(ZStatistic context) {
|
||||
Pair<Double,Double> stats = testZ(context);
|
||||
double z = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testZ(ZStatistic context) {
|
||||
Map<CaseControl.Cohort,Pair<Number,Number>> caseControlCounts = context.getCaseControl();
|
||||
if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
|
||||
double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
|
||||
|
||||
double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/
|
||||
(caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue());
|
||||
double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl));
|
||||
|
||||
double z = (pCase-pControl)/se;
|
||||
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
|
||||
|
||||
return new Pair<Double,Double>(z,p);
|
||||
}
|
||||
|
||||
public static String runU(UStatistic context) {
|
||||
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
||||
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
||||
Pair<Double,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
||||
@Deprecated // this is used for testing only at the moment
|
||||
public static Pair<Double,Double> mannWhitneyUTest(ValueTest context) {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = context.getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
|
|
@ -152,8 +62,4 @@ public class AssociationTestRunner {
|
|||
}
|
||||
return mwu.runTwoSidedTest();
|
||||
}
|
||||
|
||||
public static String runFisherExact(AssociationContext context) {
|
||||
return "Test not yet implemented";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import java.util.Collection;
|
|||
* Time: 1:52 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class BaseQualityScore extends TStatistic {
|
||||
public class BaseQualityScore extends ValueTest {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> baseQuals = new ArrayList<Integer>(rbp.size());
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import java.util.List;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class InsertSizeDistribution extends UStatistic {
|
||||
public class InsertSizeDistribution extends ValueTest {
|
||||
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
* Time: 1:29 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MappingQuality0 extends ZStatistic {
|
||||
public class MappingQuality0 extends ProportionTest {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int total = 0;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import java.util.Collection;
|
|||
* Time: 1:43 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateMappingQuality extends UStatistic {
|
||||
public class MateMappingQuality extends ValueTest {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> mateMapQ = new ArrayList<Integer>(rbp.size());
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
* Time: 3:00 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateOtherContig extends ZStatistic {
|
||||
public class MateOtherContig extends ProportionTest {
|
||||
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
* Time: 1:47 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateSameStrand extends ZStatistic {
|
||||
public class MateSameStrand extends ProportionTest {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int numPairs = 0;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
* Time: 2:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MateUnmapped extends ZStatistic {
|
||||
public class MateUnmapped extends ProportionTest {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup pileup) {
|
||||
int numMatedReads = 0;
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import java.util.*;
|
|||
* Time: 2:32 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MismatchRate extends UStatistic {
|
||||
public class MismatchRate extends ValueTest {
|
||||
|
||||
private Map<Sample,Object> sampleStats = new HashMap<Sample,Object>();
|
||||
private int currentRefBase = 0;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
* Time: 1:50 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProperPairs extends ZStatistic {
|
||||
public class ProperPairs extends ProportionTest {
|
||||
|
||||
public Pair<Number,Number> map(ReadBackedPileup rbp) {
|
||||
int numReads = 0;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
|
||||
|
||||
import cern.jet.random.Normal;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Feb 24, 2011
|
||||
* Time: 12:58:38 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class ProportionTest extends CaseControl<Pair<Number,Number>> implements Dichotomizable, StatisticalTest {
|
||||
public static final Normal standardNormal = new Normal(0.0,1.0,null);
|
||||
|
||||
public abstract Pair<Number,Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public Pair<Number,Number> add(Pair<Number,Number> left, Pair<Number,Number> right) {
|
||||
return new Pair<Number,Number>(left.first.doubleValue()+right.first.doubleValue(),
|
||||
left.second.doubleValue()+right.second.doubleValue());
|
||||
}
|
||||
|
||||
public Pair<Collection<Number>,Collection<Number>> getDichotomizedData() {
|
||||
Collection<Number> caseProps = new ArrayList<Number>();
|
||||
Collection<Number> controlProps = new ArrayList<Number>();
|
||||
|
||||
for (Map<Sample,Pair<Number,Number>> sampleNumberMap : window ) {
|
||||
for ( Map.Entry<Sample,Pair<Number,Number>> samplePairEntry : sampleNumberMap.entrySet() ) {
|
||||
Pair<Number,Number> val = samplePairEntry.getValue();
|
||||
if ( samplePairEntry.getKey().getProperty("cohort").equals("case")) {
|
||||
caseProps.add(val.first.doubleValue()/val.second.doubleValue());
|
||||
} else if ( samplePairEntry.getKey().getProperty("cohort").equals("control") ) {
|
||||
controlProps.add(val.first.doubleValue()/val.second.doubleValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Collection<Number>,Collection<Number>>(caseProps,controlProps);
|
||||
}
|
||||
|
||||
public Pair<Double,Double> getStatisticAndPValue() {
|
||||
Map<CaseControl.Cohort,Pair<Number,Number>> caseControlCounts = getCaseControl();
|
||||
if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
|
||||
double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
|
||||
double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
|
||||
|
||||
double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/
|
||||
(caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue());
|
||||
double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl));
|
||||
|
||||
double z = (pCase-pControl)/se;
|
||||
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
|
||||
|
||||
return new Pair<Double,Double>(z,p);
|
||||
}
|
||||
|
||||
public String getStatisticName() { return "Z"; }
|
||||
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@ import java.util.Collection;
|
|||
* Time: 1:31 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipping extends TStatistic {
|
||||
public class ReadClipping extends ValueTest {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> clipping = new ArrayList<Integer>(rbp.size());
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import java.util.Collection;
|
|||
* Time: 1:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadIndels extends UStatistic {
|
||||
public class ReadIndels extends ValueTest {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> indelElements = new ArrayList<Integer>(rbp.size());
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import java.util.Collection;
|
|||
* Time: 1:41 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadMappingQuality extends TStatistic {
|
||||
public class ReadMappingQuality extends ValueTest {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> mapQuals = new ArrayList<Integer>(rbp.size());
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import java.util.Map;
|
|||
* Time: 1:17 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReferenceMismatches extends ZStatistic {
|
||||
public class ReferenceMismatches extends ProportionTest {
|
||||
|
||||
final static int[] BASE_INDEX = {0,1,2,3};
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import java.util.*;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class SampleDepth extends UStatistic {
|
||||
public class SampleDepth extends ValueTest {
|
||||
|
||||
public Map<Sample,Object> sampleStats = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,36 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
|
||||
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Feb 24, 2011
|
||||
* Time: 12:58:16 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class TStatistic extends CaseControl<Collection<Number>> {
|
||||
|
||||
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||
if ( left instanceof ArrayList) {
|
||||
((ArrayList) left).addAll(right);
|
||||
return left;
|
||||
} else if ( left instanceof Set ) {
|
||||
((Set) left).addAll(right);
|
||||
return left;
|
||||
} else {
|
||||
List<Number> newList = new ArrayList<Number>(left.size()+right.size());
|
||||
newList.addAll(left);
|
||||
newList.addAll(right);
|
||||
return newList;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
|
||||
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/2/11
|
||||
* Time: 1:53 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class UStatistic extends CaseControl<Collection<Number>> {
|
||||
|
||||
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||
if ( left instanceof ArrayList ) {
|
||||
((ArrayList) left).addAll(right);
|
||||
return left;
|
||||
} else if ( left instanceof Set) {
|
||||
((Set) left).addAll(right);
|
||||
return left;
|
||||
} else {
|
||||
List<Number> newList = new ArrayList<Number>(left.size()+right.size());
|
||||
newList.addAll(left);
|
||||
newList.addAll(right);
|
||||
return newList;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
|
||||
|
||||
import cern.jet.random.StudentT;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 3/2/11
|
||||
* Time: 1:53 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class ValueTest extends CaseControl<Collection<Number>> implements StatisticalTest, Dichotomizable {
|
||||
|
||||
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public boolean useTStatistic() { return false; } // default to using the U statistic
|
||||
|
||||
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||
if ( left instanceof ArrayList ) {
|
||||
((ArrayList) left).addAll(right);
|
||||
return left;
|
||||
} else if ( left instanceof Set) {
|
||||
((Set) left).addAll(right);
|
||||
return left;
|
||||
} else {
|
||||
List<Number> newList = new ArrayList<Number>(left.size()+right.size());
|
||||
newList.addAll(left);
|
||||
newList.addAll(right);
|
||||
return newList;
|
||||
}
|
||||
}
|
||||
|
||||
public Pair<Collection<Number>,Collection<Number>> getDichotomizedData() {
|
||||
Collection<Number> caseMeans = new ArrayList<Number>();
|
||||
Collection<Number> controlMeans = new ArrayList<Number>();
|
||||
|
||||
for ( Map<Sample,Collection<Number>> sampleMap : window ) {
|
||||
for ( Map.Entry<Sample,Collection<Number>> sampleEntry : sampleMap.entrySet() ) {
|
||||
if ( sampleEntry.getKey().getProperty("cohort").equals("case") ) {
|
||||
caseMeans.add(MathUtils.average(sampleEntry.getValue()));
|
||||
} else if ( sampleEntry.getKey().getProperty("cohort").equals("control") ) {
|
||||
controlMeans.add(MathUtils.average(sampleEntry.getValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Collection<Number>,Collection<Number>>(caseMeans,controlMeans);
|
||||
}
|
||||
|
||||
public Pair<Double,Double> getUStatisticAndPValue() {
|
||||
MannWhitneyU mwu = new MannWhitneyU();
|
||||
|
||||
for ( Map.Entry<Cohort,Collection<Number>> cohortEntry : getCaseControl().entrySet() ) {
|
||||
if ( cohortEntry.getKey().equals(Cohort.CASE) ) {
|
||||
for ( Number n : cohortEntry.getValue() ) {
|
||||
mwu.add(n,MannWhitneyU.USet.SET1);
|
||||
}
|
||||
} else if ( cohortEntry.getKey().equals(Cohort.CONTROL)) {
|
||||
for ( Number n : cohortEntry.getValue() ) {
|
||||
mwu.add(n,MannWhitneyU.USet.SET2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mwu.runTwoSidedTest();
|
||||
}
|
||||
|
||||
public Pair<Double,Double> getStatisticAndPValue() { return useTStatistic() ? getTStatisticAndPValue() : getUStatisticAndPValue(); }
|
||||
|
||||
public String getStatisticName() { return useTStatistic() ? "T" : "V"; }
|
||||
|
||||
public Pair<Double,Double> getTStatisticAndPValue() {
|
||||
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = getCaseControl();
|
||||
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE));
|
||||
double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase);
|
||||
double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size();
|
||||
double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL));
|
||||
double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl);
|
||||
double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size();
|
||||
|
||||
double df_num = Math.pow(varCase/nCase + varControl/nControl,2);
|
||||
double df_denom = Math.pow(varCase/nCase,2)/(nCase-1) + Math.pow(varControl/nControl,2)/(nControl-1);
|
||||
double t = (meanCase-meanControl)/Math.sqrt(varCase/nCase+varControl/nControl);
|
||||
|
||||
StudentT studentT = new StudentT(df_num/df_denom,null);
|
||||
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
|
||||
|
||||
return new Pair<Double,Double>(t,p);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Feb 24, 2011
|
||||
* Time: 12:58:38 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class ZStatistic extends CaseControl<Pair<Number,Number>> {
|
||||
public abstract Pair<Number,Number> map(ReadBackedPileup rbp );
|
||||
|
||||
public Pair<Number,Number> add(Pair<Number,Number> left, Pair<Number,Number> right) {
|
||||
return new Pair<Number,Number>(left.first.doubleValue()+right.first.doubleValue(),
|
||||
left.second.doubleValue()+right.second.doubleValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: 4/5/11
|
||||
* Time: 11:35 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface Dichotomizable {
|
||||
public abstract Pair<Collection<Number>,Collection<Number>> getDichotomizedData();
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 4/4/11
|
||||
* Time: 7:18 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class Dichotomizer1D {
|
||||
|
||||
private Dichotomizer1D() { } // no instantiation
|
||||
|
||||
public abstract class Transform {
|
||||
public abstract double transform(double n);
|
||||
|
||||
public Collection<Number> apply(Collection<Number> collection) {
|
||||
ArrayList<Number> tData = new ArrayList<Number>(collection.size());
|
||||
for ( Number n : collection ) {
|
||||
tData.add(transform(n.doubleValue()));
|
||||
}
|
||||
|
||||
return tData;
|
||||
}
|
||||
}
|
||||
|
||||
public static double simpleGaussianDichotomy(Collection<Number> setOne, Collection<Number> setTwo) {
|
||||
double meanOne = MathUtils.sum(setOne)/setOne.size();
|
||||
double meanTwo = MathUtils.sum(setTwo)/setTwo.size();
|
||||
double stdOne = Math.sqrt(MathUtils.variance(setOne, meanOne));
|
||||
double stdTwo = Math.sqrt(MathUtils.variance(setTwo, meanTwo));
|
||||
|
||||
return 1.0 - (3.0*(stdOne+stdTwo))/Math.abs(meanOne-meanTwo);
|
||||
}
|
||||
|
||||
public static double simpleGaussianDichotomy(Collection<Number> setOne, Collection<Number> setTwo, Transform transform) {
|
||||
return simpleGaussianDichotomy(transform.apply(setOne),transform.apply(setTwo));
|
||||
}
|
||||
|
||||
public static double simpleGaussianDichotomy(Dichotomizable dichotomizable) {
|
||||
Pair<Collection<Number>,Collection<Number>> dichotomizedData = dichotomizable.getDichotomizedData();
|
||||
return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second);
|
||||
}
|
||||
|
||||
public static double simpleGaussianDichotomy(Dichotomizable dichotomizable, Transform transform) {
|
||||
Pair<Collection<Number>,Collection<Number>> dichotomizedData = dichotomizable.getDichotomizedData();
|
||||
return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: 4/5/11
|
||||
* Time: 12:26 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface StatisticalTest {
|
||||
public abstract Pair<Double,Double> getStatisticAndPValue();
|
||||
public abstract String getStatisticName();
|
||||
}
|
||||
|
|
@ -40,6 +40,17 @@ public class MannWhitneyU {
|
|||
}
|
||||
}
|
||||
|
||||
public Pair<Long,Long> getR1R2() {
|
||||
long u1 = calculateOneSidedU(observations,MannWhitneyU.USet.SET1);
|
||||
long n1 = sizeSet1*(sizeSet1+1)/2;
|
||||
long r1 = u1 + n1;
|
||||
long n2 = sizeSet2*(sizeSet2+1)/2;
|
||||
long u2 = n1*n2-u1;
|
||||
long r2 = u2 + n2;
|
||||
|
||||
return new Pair<Long,Long>(r1,r2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the one-sided test under the hypothesis that the data in set "lessThanOther" stochastically
|
||||
* dominates the other set
|
||||
|
|
|
|||
|
|
@ -1,18 +1,15 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ZStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ProportionTest;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ValueTest;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
|
@ -36,20 +33,20 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
TTest test1 = new TTest();
|
||||
test1.setCaseData((Collection) Arrays.asList(1,1,2,3,4));
|
||||
test1.setControlData((Collection) Arrays.asList(10, 10, 20, 30, 40));
|
||||
Assert.assertEquals(AssociationTestRunner.testStudentT(test1).second,0.1702,1e-2);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.1702,1e-2);
|
||||
TTest test2 = new TTest();
|
||||
test2.setCaseData((Collection) Arrays.asList(5, 6, 5, 2, 3, 8, 7, 12, 10, 6, 4, 2, 8, 7, 3));
|
||||
test2.setControlData((Collection) Arrays.asList(1, 6, 7, 2, 3, 3, 4, 1, 2, 5, 7, 3, 10, 3, 3, 2, 3));
|
||||
Assert.assertEquals(AssociationTestRunner.testStudentT(test2).second, 0.5805, 1e-2);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test2).second.first, 0.5805, 1e-2);
|
||||
TTest test3 = new TTest();
|
||||
test3.setCaseData((Collection) Arrays.asList(94,25,68,4,27,51,9,10,91,61,61,37,39,44,36,27,86,33,3,38,5,6,28,93,30,56,81,8,40,44));
|
||||
test3.setControlData((Collection) Arrays.asList(6,64,96,85,20,74,93,18,31,20,88,38,80,50,33,81,35,8,2,69,49,6,26,74,79,63,63,96,45,18));
|
||||
Assert.assertEquals(AssociationTestRunner.testStudentT(test3).second,0.8229,1e-4);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.8229,1e-4);
|
||||
TTest test4 = new TTest();
|
||||
test4.setCaseData((Collection) Arrays.asList(14,8,8,17,8,12,10,10,13,9,13,9,9,12,12,11,16,12,13,16,10,13,11,16,13,16,11,13,9,16,16,14,9,14,17,10,15,15,9,15,17,15,17,12,10,13,11,14,8,14));
|
||||
test4.setControlData((Collection) Arrays.asList(7,1,4,2,3,7,8,5,5,4,10,6,4,9,2,9,9,3,3,10,1,8,9,5,3,7,2,7,10,9,4,9,2,10,10,3,2,3,4,4,5,10,9,4,3,5,6,10,5,10));
|
||||
Assert.assertEquals(AssociationTestRunner.testStudentT(test4).second,0.1006,1e-4);
|
||||
Assert.assertEquals(AssociationTestRunner.testStudentT(test4).first,1.657989,1e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test4).second.first,0.1006,1e-4);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test4).first,1.657989,1e-6);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -58,17 +55,17 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
ZTest test1 = new ZTest();
|
||||
test1.setCaseData(new Pair<Number,Number>(100,500));
|
||||
test1.setControlData(new Pair<Number,Number>(55,300));
|
||||
Assert.assertEquals(AssociationTestRunner.testZ(test1).first,0.57742362050306,2e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.testZ(test1).second,0.56367,2e-5);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first,0.57742362050306,2e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.56367,2e-5);
|
||||
ZTest test2 = new ZTest();
|
||||
test1.setCaseData(new Pair<Number, Number>(1020, 1800));
|
||||
test1.setControlData(new Pair<Number, Number>(680, 1670));
|
||||
Assert.assertEquals(AssociationTestRunner.testZ(test1).first,9.3898178216531,2e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first,9.3898178216531,2e-6);
|
||||
ZTest test3 = new ZTest();
|
||||
test3.setCaseData(new Pair<Number,Number>(20,60));
|
||||
test3.setControlData(new Pair<Number,Number>(30,80));
|
||||
Assert.assertEquals(AssociationTestRunner.testZ(test3).first,-0.50917511840392,2e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.testZ(test3).second,0.610643593,2e-4);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test3).first,-0.50917511840392,2e-6);
|
||||
Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.610643593,2e-4);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -160,7 +157,7 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
|
||||
private class TTest extends TStatistic {
|
||||
private class TTest extends ValueTest {
|
||||
Map<Cohort,Collection<Number>> toTest = new HashMap<Cohort,Collection<Number>>(2);
|
||||
|
||||
@Override
|
||||
|
|
@ -178,11 +175,14 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) { return null; }
|
||||
public int getWindowSize() { return 1; }
|
||||
public int slideByValue() { return 1; }
|
||||
|
||||
@Override
|
||||
public boolean useTStatistic() { return true; }
|
||||
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
||||
private class ZTest extends ZStatistic {
|
||||
private class ZTest extends ProportionTest {
|
||||
Map<Cohort,Pair<Number,Number>> toTest = new HashMap<Cohort,Pair<Number,Number>>(2);
|
||||
|
||||
@Override
|
||||
|
|
@ -204,7 +204,7 @@ public class RegionalAssociationUnitTest extends BaseTest {
|
|||
public boolean usePreviouslySeenReads() { return true; }
|
||||
}
|
||||
|
||||
private class UTest extends UStatistic {
|
||||
private class UTest extends ValueTest {
|
||||
TTest test = new TTest();
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
public int getWindowSize() { return 1; }
|
||||
|
|
|
|||
Loading…
Reference in New Issue