Adding in a delta to try and better measure effect size -- equivalent to looking at the lower end of the N^th percentile confidence interval. Kind of a hacky way to add it in, the infrastructure is about due for a streamlining rewrite.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5676 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-04-22 03:53:33 +00:00
parent 7428ae338a
commit 88735a8c9b
7 changed files with 56 additions and 9 deletions

View File

@ -17,6 +17,7 @@ public abstract class AssociationContext<X,Y> {
protected List<Map<Sample,Y>> window;
private int size;
private int slide;
protected double zVal;
public AssociationContext() {
}
@ -42,6 +43,7 @@ public abstract class AssociationContext<X,Y> {
size = walker.windowSize;
slide = walker.slideBy;
window = new ArrayList<Map<Sample,Y>>(size);
zVal = walker.zVal;
}
public Map<Sample,Object> mapLocus(MapExtender extender) {

View File

@ -42,10 +42,12 @@ public class AssociationTestRunner {
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
}
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
public static Pair<Pair<Double,Double>,Pair<Double,Integer>> getTestValues(AssociationContext context) {
if ( context instanceof StatisticalTest ) {
Pair<Double,Double> statAndP = ((StatisticalTest) context).getStatisticAndPValue();
return new Pair<Double,Pair<Double,Integer>>(statAndP.first,
Double delta = ((StatisticalTest) context).getZConfDelta();
return new Pair<Pair<Double,Double>,Pair<Double,Integer>>(
new Pair<Double,Double>(statAndP.first,delta),
new Pair<Double,Integer>(statAndP.second,pToQ(statAndP.second)));
}
@ -54,10 +56,10 @@ public class AssociationTestRunner {
public static String runTests(AssociationContext context) {
if ( context instanceof StatisticalTest ) {
Pair<Double,Pair<Double,Integer>> results = getTestValues(context);
return String.format("%s: %.2f\tP: %.2e\tQ: %d",
Pair<Pair<Double,Double>,Pair<Double,Integer>> results = getTestValues(context);
return String.format("%s: %.2f\tD: %.2f\tP: %.2e\tQ: %d",
((StatisticalTest) context).getStatisticName() ,
results.first,results.second.first,results.second.second);
results.first.first,results.first.second,results.second.first,results.second.second);
}
return null;

View File

@ -62,14 +62,14 @@ public class RegionalAssociationHandler {
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
String outVal;
if ( bedGraphFormat ) {
Pair<Double,Pair<Double,Integer>> statVals = AssociationTestRunner.getTestValues(context);
Pair<Pair<Double,Double>,Pair<Double,Integer>> statVals = AssociationTestRunner.getTestValues(context);
Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(context);
outVal = String.format("%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first,statVals.second.first,statVals.second.second,
simpleDichotVals.first,simpleDichotVals.second);
outVal = String.format("%.2f\t%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first.first,statVals.first.second,
statVals.second.first,statVals.second.second,simpleDichotVals.first,simpleDichotVals.second);
} else {
outVal = AssociationTestRunner.runTests(context);
Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(context);
outVal += String.format("\tD: %.2f\tLogD: %.2f",simpleDichotVals.first,simpleDichotVals.second);
outVal += String.format("\tDi: %.2f\tLogDi: %.2f",simpleDichotVals.first,simpleDichotVals.second);
}
return String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
maps.getReferenceContext().getLocus().getStart()-context.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal);

View File

@ -41,6 +41,10 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
public int windowSize = 50;
@Argument(doc="Set the window sliding value for associations to this value",shortName="s",fullName="slide",required=false)
public int slideBy = 10;
@Argument(doc="Set the exercise-wide constant Z-value for delta-measure",shortName="z",fullName="zValue",required=false)
public double zVal = 6.0;
// for now apply this to t-tests too -- though df means the percentile is not constant, most
// dfs are large, so it doesn't really vary all that much
@Output
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"})

View File

@ -66,6 +66,26 @@ public abstract class ProportionTest extends CaseControl<Pair<Number,Number>> im
return new Pair<Double,Double>(z,p);
}
// todo -- this is a temporary method, it needs to be merged in with others if it proves useful
public Double getZConfDelta() {
Map<CaseControl.Cohort,Pair<Number,Number>> caseControlCounts = getCaseControl();
if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) {
return Double.NaN;
}
double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue();
double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue();
double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/
(caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue());
double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl));
double z = (pCase-pControl)/se;
return ( z < 0 ? -1.0*se*(z-zVal) : se*(z-zVal));
}
public String getStatisticName() { return "Z"; }
}

View File

@ -100,4 +100,22 @@ public abstract class ValueTest extends CaseControl<Collection<Number>> implemen
return new Pair<Double,Double>(t,p);
}
public Double getZConfDelta() {
Map<CaseControl.Cohort,Collection<Number>> caseControlVectors = getCaseControl();
if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) {
return Double.NaN;
}
double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE),true);
double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase,true);
double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size();
double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL),true);
double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl,true);
double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size();
double dnom = Math.sqrt(varCase/nCase+varControl/nControl);
double t = (meanCase-meanControl)/dnom;
return ( t < 0 ? -1.0*dnom*(t-zVal) : dnom*(t-zVal));
}
}

View File

@ -12,4 +12,5 @@ import org.broadinstitute.sting.utils.collections.Pair;
public interface StatisticalTest {
public abstract Pair<Double,Double> getStatisticAndPValue();
public abstract String getStatisticName();
public abstract Double getZConfDelta();
}