Switch from togglable wiggle output to togglable bedgraph format. Can be pulled directly into IGV to show the statistics values. I'll need to bug jim to allow value-toggling in a bedgraph, currently 2nd and 3rd columns are just ignored.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5492 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-03-22 17:58:53 +00:00
parent a6d873b268
commit 687b2e51b4
4 changed files with 46 additions and 67 deletions

View File

@ -22,7 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class AssociationTestRunner { public class AssociationTestRunner {
final static int MAX_Q_VALUE = 2000; final static int MAX_Q_VALUE = Integer.MAX_VALUE;
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
static Normal standardNormal = new Normal(0.0,1.0,null); static Normal standardNormal = new Normal(0.0,1.0,null);
@ -30,6 +30,25 @@ public class AssociationTestRunner {
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
} }
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
if ( context instanceof TStatistic) {
Pair<Double,Double> t = testStudentT((TStatistic) context);
return new Pair<Double,Pair<Double,Integer>> (t.first,new Pair<Double,Integer>(t.second,pToQ(t.second)));
}
if ( context instanceof ZStatistic) {
Pair<Double,Double> z = testZ((ZStatistic) context);
return new Pair<Double,Pair<Double,Integer>> (z.first,new Pair<Double,Integer>(z.second,pToQ(z.second)));
}
if ( context instanceof UStatistic ) {
Pair<Double,Double> u = mannWhitneyUTest((UStatistic) context);
return new Pair<Double,Pair<Double,Integer>> ( u.first, new Pair<Double,Integer>(u.second,pToQ(u.second)));
}
return null;
}
public static String runTests(AssociationContext context) { public static String runTests(AssociationContext context) {
List<String> results = new ArrayList<String>(); List<String> results = new ArrayList<String>();
if ( context instanceof TStatistic) { if ( context instanceof TStatistic) {
@ -56,32 +75,12 @@ public class AssociationTestRunner {
return buf.toString(); return buf.toString();
} }
/**
* Just gets the Q value associated with a particular association context.
* @param context
* @return
*/
public static int getQValue(AssociationContext context) {
if ( context instanceof TStatistic ) {
return pToQ(testStudentT((TStatistic) context).second);
}
if ( context instanceof ZStatistic ) {
return pToQ(testZ((ZStatistic) context).second);
}
if ( context instanceof UStatistic ) {
return pToQ(mannWhitneyUTest((UStatistic) context).second);
}
return -1;
}
public static String runStudentT(TStatistic context) { public static String runStudentT(TStatistic context) {
Pair<Double,Double> stats = testStudentT(context); Pair<Double,Double> stats = testStudentT(context);
double t = stats.first; double t = stats.first;
double p = stats.second; double p = stats.second;
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p))); return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p));
} }
public static Pair<Double,Double> testStudentT(TStatistic context) { public static Pair<Double,Double> testStudentT(TStatistic context) {
@ -110,7 +109,7 @@ public class AssociationTestRunner {
Pair<Double,Double> stats = testZ(context); Pair<Double,Double> stats = testZ(context);
double z = stats.first; double z = stats.first;
double p = stats.second; double p = stats.second;
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p))); return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p));
} }
public static Pair<Double,Double> testZ(ZStatistic context) { public static Pair<Double,Double> testZ(ZStatistic context) {
@ -137,7 +136,7 @@ public class AssociationTestRunner {
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n // note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12) // thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
Pair<Double,Double> results = mannWhitneyUTest(context); Pair<Double,Double> results = mannWhitneyUTest(context);
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second))); return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second));
} }
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) { public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association; package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*; import java.util.*;
@ -40,41 +41,23 @@ public class RegionalAssociationHandler {
/** /**
* Switches what formatting to use based on wiggle or standard * Switches what formatting to use based on wiggle or standard
* @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:) * @param bedGraphFormat - use bedgraph format (s p q) or standard (S: s P: p Q: q)
* @return - test results in proper format * @return - test results in proper format
*/ */
public Map<AssociationContext,String> runTests(boolean wiggleFormat) { public Map<AssociationContext,String> runTests(boolean bedGraphFormat) {
if ( wiggleFormat ) {
return runWiggleTests();
} else {
return runTests();
}
}
public Map<AssociationContext,String> runWiggleTests() {
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
for ( AssociationContext w : associations ) {
if ( w.isFull() ) {
testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w)));
w.slide();
}
}
return testResults;
}
/**
* For those AssociationContexts with full windows:
* 1) Run their associated test(s)
* 2) Slide the windows
*/
public Map<AssociationContext,String> runTests() {
// todo -- maybe the tdf should be the whole window rather than just the most recent loc? // todo -- maybe the tdf should be the whole window rather than just the most recent loc?
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size()); Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
for ( AssociationContext w : associations ) { for ( AssociationContext w : associations ) {
if ( w.isFull() ) { if ( w.isFull() ) {
String outVal;
if ( bedGraphFormat ) {
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w);
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second);
} else {
outVal = AssociationTestRunner.runTests(w);
}
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(), testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w))); maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));
w.slide(); w.slide();
} }
} }

View File

@ -16,12 +16,12 @@ import java.util.*;
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> { public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
Set<Class<? extends AssociationContext>> contexts = null; Set<Class<? extends AssociationContext>> contexts = null;
boolean wigOut = false; boolean bedGraphOut = false;
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) { public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
super(); super();
contexts = getAssociations(toUse); contexts = getAssociations(toUse);
wigOut = wiggleOutput; bedGraphOut = wiggleOutput;
} }
public Collection<Class<? extends AssociationContext>> multiplex() { public Collection<Class<? extends AssociationContext>> multiplex() {
@ -29,8 +29,8 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
} }
public String transformArgument(final Class<? extends AssociationContext> context, String arg) { public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
if ( wigOut ) { if ( bedGraphOut ) {
return String.format("%s.%s.wig",arg,context.getSimpleName()); return String.format("%s.%s.bedgraph",arg,context.getSimpleName());
} }
return String.format("%s.%s.tdf", arg, context.getSimpleName()); return String.format("%s.%s.tdf", arg, context.getSimpleName());
} }

View File

@ -31,22 +31,19 @@ import java.util.*;
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> { public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false) @Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
public String[] associationsToUse = null; public String[] associationsToUse = null;
@Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false) @Argument(doc="Change output file to bedgraph format (s p q, not STAT: s P: p Q: q",shortName="bg",fullName="bedgraph",required=false)
public boolean wiggleFormat = false; public boolean bedGraph = false;
@Output @Output
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"}) @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"})
Map<AssociationContext,PrintStream> out; Map<AssociationContext,PrintStream> out;
public void initialize() { public void initialize() {
if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) {
throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output.");
}
Set<AssociationContext> validAssociations = getAssociations(); Set<AssociationContext> validAssociations = getAssociations();
if ( wiggleFormat ) { if ( bedGraph ) {
writeWiggleHeaders(validAssociations); writeBedGraphHeaders(validAssociations);
} }
} }
@ -68,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
} catch (Exception e) { } catch (Exception e) {
throw new StingException("Error in map reduce",e); throw new StingException("Error in map reduce",e);
} }
Map<AssociationContext,String> testsHere = rac.runTests(wiggleFormat); Map<AssociationContext,String> testsHere = rac.runTests(bedGraph);
// todo -- really awful shitty formatting // todo -- really awful shitty formatting
if ( testsHere.size() > 0 ) { if ( testsHere.size() > 0 ) {
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) { for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
@ -132,10 +129,10 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
return getToolkit().getSAMFileSamples(); return getToolkit().getSAMFileSamples();
} }
public void writeWiggleHeaders(Set<AssociationContext> cons) { public void writeBedGraphHeaders(Set<AssociationContext> cons) {
for ( AssociationContext con : cons ) { for ( AssociationContext con : cons ) {
GenomeLoc first = getToolkit().getIntervals().iterator().next(); GenomeLoc first = getToolkit().getIntervals().iterator().next();
String header = String.format("fixedStep chrom=%s start=%d step=%d span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize()); String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
out.get(con.getClass()).printf("%s%n",header); out.get(con.getClass()).printf("%s%n",header);
} }
} }