Switch from togglable wiggle output to togglable bedgraph format. Can be pulled directly into IGV to show the statistics values. I'll need to bug jim to allow value-toggling in a bedgraph, currently 2nd and 3rd columns are just ignored.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5492 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-03-22 17:58:53 +00:00
parent a6d873b268
commit 687b2e51b4
4 changed files with 46 additions and 67 deletions

View File

@ -22,7 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
* To change this template use File | Settings | File Templates.
*/
public class AssociationTestRunner {
final static int MAX_Q_VALUE = 2000;
final static int MAX_Q_VALUE = Integer.MAX_VALUE;
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
static Normal standardNormal = new Normal(0.0,1.0,null);
@ -30,6 +30,25 @@ public class AssociationTestRunner {
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
}
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
if ( context instanceof TStatistic) {
Pair<Double,Double> t = testStudentT((TStatistic) context);
return new Pair<Double,Pair<Double,Integer>> (t.first,new Pair<Double,Integer>(t.second,pToQ(t.second)));
}
if ( context instanceof ZStatistic) {
Pair<Double,Double> z = testZ((ZStatistic) context);
return new Pair<Double,Pair<Double,Integer>> (z.first,new Pair<Double,Integer>(z.second,pToQ(z.second)));
}
if ( context instanceof UStatistic ) {
Pair<Double,Double> u = mannWhitneyUTest((UStatistic) context);
return new Pair<Double,Pair<Double,Integer>> ( u.first, new Pair<Double,Integer>(u.second,pToQ(u.second)));
}
return null;
}
public static String runTests(AssociationContext context) {
List<String> results = new ArrayList<String>();
if ( context instanceof TStatistic) {
@ -56,32 +75,12 @@ public class AssociationTestRunner {
return buf.toString();
}
/**
* Just gets the Q value associated with a particular association context.
* @param context
* @return
*/
public static int getQValue(AssociationContext context) {
if ( context instanceof TStatistic ) {
return pToQ(testStudentT((TStatistic) context).second);
}
if ( context instanceof ZStatistic ) {
return pToQ(testZ((ZStatistic) context).second);
}
if ( context instanceof UStatistic ) {
return pToQ(mannWhitneyUTest((UStatistic) context).second);
}
return -1;
}
public static String runStudentT(TStatistic context) {
Pair<Double,Double> stats = testStudentT(context);
double t = stats.first;
double p = stats.second;
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p));
}
public static Pair<Double,Double> testStudentT(TStatistic context) {
@ -110,7 +109,7 @@ public class AssociationTestRunner {
Pair<Double,Double> stats = testZ(context);
double z = stats.first;
double p = stats.second;
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p));
}
public static Pair<Double,Double> testZ(ZStatistic context) {
@ -137,7 +136,7 @@ public class AssociationTestRunner {
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
Pair<Double,Double> results = mannWhitneyUTest(context);
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second));
}
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
@ -40,41 +41,23 @@ public class RegionalAssociationHandler {
/**
* Switches what formatting to use based on wiggle or standard
* @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:)
* @param bedGraphFormat - use bedgraph format (s p q) or standard (S: s P: p Q: q)
* @return - test results in proper format
*/
public Map<AssociationContext,String> runTests(boolean wiggleFormat) {
if ( wiggleFormat ) {
return runWiggleTests();
} else {
return runTests();
}
}
public Map<AssociationContext,String> runWiggleTests() {
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
for ( AssociationContext w : associations ) {
if ( w.isFull() ) {
testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w)));
w.slide();
}
}
return testResults;
}
/**
* For those AssociationContexts with full windows:
* 1) Run their associated test(s)
* 2) Slide the windows
*/
public Map<AssociationContext,String> runTests() {
public Map<AssociationContext,String> runTests(boolean bedGraphFormat) {
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
for ( AssociationContext w : associations ) {
if ( w.isFull() ) {
String outVal;
if ( bedGraphFormat ) {
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w);
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second);
} else {
outVal = AssociationTestRunner.runTests(w);
}
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w)));
maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));
w.slide();
}
}

View File

@ -16,12 +16,12 @@ import java.util.*;
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
Set<Class<? extends AssociationContext>> contexts = null;
boolean wigOut = false;
boolean bedGraphOut = false;
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
super();
contexts = getAssociations(toUse);
wigOut = wiggleOutput;
bedGraphOut = wiggleOutput;
}
public Collection<Class<? extends AssociationContext>> multiplex() {
@ -29,8 +29,8 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
}
public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
if ( wigOut ) {
return String.format("%s.%s.wig",arg,context.getSimpleName());
if ( bedGraphOut ) {
return String.format("%s.%s.bedgraph",arg,context.getSimpleName());
}
return String.format("%s.%s.tdf", arg, context.getSimpleName());
}

View File

@ -31,22 +31,19 @@ import java.util.*;
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
public String[] associationsToUse = null;
@Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false)
public boolean wiggleFormat = false;
@Argument(doc="Change output file to bedgraph format (s p q, not STAT: s P: p Q: q",shortName="bg",fullName="bedgraph",required=false)
public boolean bedGraph = false;
@Output
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"})
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"})
Map<AssociationContext,PrintStream> out;
public void initialize() {
if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) {
throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output.");
}
Set<AssociationContext> validAssociations = getAssociations();
if ( wiggleFormat ) {
writeWiggleHeaders(validAssociations);
if ( bedGraph ) {
writeBedGraphHeaders(validAssociations);
}
}
@ -68,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
} catch (Exception e) {
throw new StingException("Error in map reduce",e);
}
Map<AssociationContext,String> testsHere = rac.runTests(wiggleFormat);
Map<AssociationContext,String> testsHere = rac.runTests(bedGraph);
// todo -- really awful shitty formatting
if ( testsHere.size() > 0 ) {
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
@ -132,10 +129,10 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
return getToolkit().getSAMFileSamples();
}
public void writeWiggleHeaders(Set<AssociationContext> cons) {
public void writeBedGraphHeaders(Set<AssociationContext> cons) {
for ( AssociationContext con : cons ) {
GenomeLoc first = getToolkit().getIntervals().iterator().next();
String header = String.format("fixedStep chrom=%s start=%d step=%d span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize());
String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
out.get(con.getClass()).printf("%s%n",header);
}
}