Switch from togglable wiggle output to togglable bedgraph format. Can be pulled directly into IGV to show the statistics values. I'll need to bug jim to allow value-toggling in a bedgraph, currently 2nd and 3rd columns are just ignored.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5492 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a6d873b268
commit
687b2e51b4
|
|
@ -22,7 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public class AssociationTestRunner {
|
public class AssociationTestRunner {
|
||||||
final static int MAX_Q_VALUE = 2000;
|
final static int MAX_Q_VALUE = Integer.MAX_VALUE;
|
||||||
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
|
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
|
||||||
static Normal standardNormal = new Normal(0.0,1.0,null);
|
static Normal standardNormal = new Normal(0.0,1.0,null);
|
||||||
|
|
||||||
|
|
@ -30,6 +30,25 @@ public class AssociationTestRunner {
|
||||||
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
|
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
|
||||||
|
if ( context instanceof TStatistic) {
|
||||||
|
Pair<Double,Double> t = testStudentT((TStatistic) context);
|
||||||
|
return new Pair<Double,Pair<Double,Integer>> (t.first,new Pair<Double,Integer>(t.second,pToQ(t.second)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( context instanceof ZStatistic) {
|
||||||
|
Pair<Double,Double> z = testZ((ZStatistic) context);
|
||||||
|
return new Pair<Double,Pair<Double,Integer>> (z.first,new Pair<Double,Integer>(z.second,pToQ(z.second)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( context instanceof UStatistic ) {
|
||||||
|
Pair<Double,Double> u = mannWhitneyUTest((UStatistic) context);
|
||||||
|
return new Pair<Double,Pair<Double,Integer>> ( u.first, new Pair<Double,Integer>(u.second,pToQ(u.second)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public static String runTests(AssociationContext context) {
|
public static String runTests(AssociationContext context) {
|
||||||
List<String> results = new ArrayList<String>();
|
List<String> results = new ArrayList<String>();
|
||||||
if ( context instanceof TStatistic) {
|
if ( context instanceof TStatistic) {
|
||||||
|
|
@ -56,32 +75,12 @@ public class AssociationTestRunner {
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Just gets the Q value associated with a particular association context.
|
|
||||||
* @param context
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static int getQValue(AssociationContext context) {
|
|
||||||
if ( context instanceof TStatistic ) {
|
|
||||||
return pToQ(testStudentT((TStatistic) context).second);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( context instanceof ZStatistic ) {
|
|
||||||
return pToQ(testZ((ZStatistic) context).second);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( context instanceof UStatistic ) {
|
|
||||||
return pToQ(mannWhitneyUTest((UStatistic) context).second);
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String runStudentT(TStatistic context) {
|
public static String runStudentT(TStatistic context) {
|
||||||
Pair<Double,Double> stats = testStudentT(context);
|
Pair<Double,Double> stats = testStudentT(context);
|
||||||
double t = stats.first;
|
double t = stats.first;
|
||||||
double p = stats.second;
|
double p = stats.second;
|
||||||
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<Double,Double> testStudentT(TStatistic context) {
|
public static Pair<Double,Double> testStudentT(TStatistic context) {
|
||||||
|
|
@ -110,7 +109,7 @@ public class AssociationTestRunner {
|
||||||
Pair<Double,Double> stats = testZ(context);
|
Pair<Double,Double> stats = testZ(context);
|
||||||
double z = stats.first;
|
double z = stats.first;
|
||||||
double p = stats.second;
|
double p = stats.second;
|
||||||
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<Double,Double> testZ(ZStatistic context) {
|
public static Pair<Double,Double> testZ(ZStatistic context) {
|
||||||
|
|
@ -137,7 +136,7 @@ public class AssociationTestRunner {
|
||||||
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
||||||
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
||||||
Pair<Double,Double> results = mannWhitneyUTest(context);
|
Pair<Double,Double> results = mannWhitneyUTest(context);
|
||||||
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -40,41 +41,23 @@ public class RegionalAssociationHandler {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Switches what formatting to use based on wiggle or standard
|
* Switches what formatting to use based on wiggle or standard
|
||||||
* @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:)
|
* @param bedGraphFormat - use bedgraph format (s p q) or standard (S: s P: p Q: q)
|
||||||
* @return - test results in proper format
|
* @return - test results in proper format
|
||||||
*/
|
*/
|
||||||
public Map<AssociationContext,String> runTests(boolean wiggleFormat) {
|
public Map<AssociationContext,String> runTests(boolean bedGraphFormat) {
|
||||||
if ( wiggleFormat ) {
|
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
||||||
return runWiggleTests();
|
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
||||||
|
for ( AssociationContext w : associations ) {
|
||||||
|
if ( w.isFull() ) {
|
||||||
|
String outVal;
|
||||||
|
if ( bedGraphFormat ) {
|
||||||
|
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w);
|
||||||
|
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second);
|
||||||
} else {
|
} else {
|
||||||
return runTests();
|
outVal = AssociationTestRunner.runTests(w);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public Map<AssociationContext,String> runWiggleTests() {
|
|
||||||
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
|
||||||
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
|
||||||
for ( AssociationContext w : associations ) {
|
|
||||||
if ( w.isFull() ) {
|
|
||||||
testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w)));
|
|
||||||
w.slide();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return testResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For those AssociationContexts with full windows:
|
|
||||||
* 1) Run their associated test(s)
|
|
||||||
* 2) Slide the windows
|
|
||||||
*/
|
|
||||||
public Map<AssociationContext,String> runTests() {
|
|
||||||
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
|
||||||
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
|
||||||
for ( AssociationContext w : associations ) {
|
|
||||||
if ( w.isFull() ) {
|
|
||||||
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
|
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
|
||||||
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w)));
|
maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));
|
||||||
w.slide();
|
w.slide();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,12 +16,12 @@ import java.util.*;
|
||||||
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
|
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
|
||||||
|
|
||||||
Set<Class<? extends AssociationContext>> contexts = null;
|
Set<Class<? extends AssociationContext>> contexts = null;
|
||||||
boolean wigOut = false;
|
boolean bedGraphOut = false;
|
||||||
|
|
||||||
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
|
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
|
||||||
super();
|
super();
|
||||||
contexts = getAssociations(toUse);
|
contexts = getAssociations(toUse);
|
||||||
wigOut = wiggleOutput;
|
bedGraphOut = wiggleOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Collection<Class<? extends AssociationContext>> multiplex() {
|
public Collection<Class<? extends AssociationContext>> multiplex() {
|
||||||
|
|
@ -29,8 +29,8 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
|
||||||
}
|
}
|
||||||
|
|
||||||
public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
|
public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
|
||||||
if ( wigOut ) {
|
if ( bedGraphOut ) {
|
||||||
return String.format("%s.%s.wig",arg,context.getSimpleName());
|
return String.format("%s.%s.bedgraph",arg,context.getSimpleName());
|
||||||
}
|
}
|
||||||
return String.format("%s.%s.tdf", arg, context.getSimpleName());
|
return String.format("%s.%s.tdf", arg, context.getSimpleName());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,22 +31,19 @@ import java.util.*;
|
||||||
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
|
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
|
||||||
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
|
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
|
||||||
public String[] associationsToUse = null;
|
public String[] associationsToUse = null;
|
||||||
@Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false)
|
@Argument(doc="Change output file to bedgraph format (s p q, not STAT: s P: p Q: q",shortName="bg",fullName="bedgraph",required=false)
|
||||||
public boolean wiggleFormat = false;
|
public boolean bedGraph = false;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"})
|
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"})
|
||||||
Map<AssociationContext,PrintStream> out;
|
Map<AssociationContext,PrintStream> out;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) {
|
|
||||||
throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output.");
|
|
||||||
}
|
|
||||||
|
|
||||||
Set<AssociationContext> validAssociations = getAssociations();
|
Set<AssociationContext> validAssociations = getAssociations();
|
||||||
|
|
||||||
if ( wiggleFormat ) {
|
if ( bedGraph ) {
|
||||||
writeWiggleHeaders(validAssociations);
|
writeBedGraphHeaders(validAssociations);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -68,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new StingException("Error in map reduce",e);
|
throw new StingException("Error in map reduce",e);
|
||||||
}
|
}
|
||||||
Map<AssociationContext,String> testsHere = rac.runTests(wiggleFormat);
|
Map<AssociationContext,String> testsHere = rac.runTests(bedGraph);
|
||||||
// todo -- really awful shitty formatting
|
// todo -- really awful shitty formatting
|
||||||
if ( testsHere.size() > 0 ) {
|
if ( testsHere.size() > 0 ) {
|
||||||
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
|
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
|
||||||
|
|
@ -132,10 +129,10 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
||||||
return getToolkit().getSAMFileSamples();
|
return getToolkit().getSAMFileSamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void writeWiggleHeaders(Set<AssociationContext> cons) {
|
public void writeBedGraphHeaders(Set<AssociationContext> cons) {
|
||||||
for ( AssociationContext con : cons ) {
|
for ( AssociationContext con : cons ) {
|
||||||
GenomeLoc first = getToolkit().getIntervals().iterator().next();
|
GenomeLoc first = getToolkit().getIntervals().iterator().next();
|
||||||
String header = String.format("fixedStep chrom=%s start=%d step=%d span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize());
|
String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
|
||||||
out.get(con.getClass()).printf("%s%n",header);
|
out.get(con.getClass()).printf("%s%n",header);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue