Switch from togglable wiggle output to togglable bedgraph format. Can be pulled directly into IGV to show the statistics values. I'll need to bug jim to allow value-toggling in a bedgraph, currently 2nd and 3rd columns are just ignored.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5492 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a6d873b268
commit
687b2e51b4
|
|
@ -22,7 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AssociationTestRunner {
|
||||
final static int MAX_Q_VALUE = 2000;
|
||||
final static int MAX_Q_VALUE = Integer.MAX_VALUE;
|
||||
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
|
||||
static Normal standardNormal = new Normal(0.0,1.0,null);
|
||||
|
||||
|
|
@ -30,6 +30,25 @@ public class AssociationTestRunner {
|
|||
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
|
||||
}
|
||||
|
||||
public static Pair<Double,Pair<Double,Integer>> getTestValues(AssociationContext context) {
|
||||
if ( context instanceof TStatistic) {
|
||||
Pair<Double,Double> t = testStudentT((TStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> (t.first,new Pair<Double,Integer>(t.second,pToQ(t.second)));
|
||||
}
|
||||
|
||||
if ( context instanceof ZStatistic) {
|
||||
Pair<Double,Double> z = testZ((ZStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> (z.first,new Pair<Double,Integer>(z.second,pToQ(z.second)));
|
||||
}
|
||||
|
||||
if ( context instanceof UStatistic ) {
|
||||
Pair<Double,Double> u = mannWhitneyUTest((UStatistic) context);
|
||||
return new Pair<Double,Pair<Double,Integer>> ( u.first, new Pair<Double,Integer>(u.second,pToQ(u.second)));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String runTests(AssociationContext context) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
if ( context instanceof TStatistic) {
|
||||
|
|
@ -56,32 +75,12 @@ public class AssociationTestRunner {
|
|||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Just gets the Q value associated with a particular association context.
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
public static int getQValue(AssociationContext context) {
|
||||
if ( context instanceof TStatistic ) {
|
||||
return pToQ(testStudentT((TStatistic) context).second);
|
||||
}
|
||||
|
||||
if ( context instanceof ZStatistic ) {
|
||||
return pToQ(testZ((ZStatistic) context).second);
|
||||
}
|
||||
|
||||
if ( context instanceof UStatistic ) {
|
||||
return pToQ(mannWhitneyUTest((UStatistic) context).second);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static String runStudentT(TStatistic context) {
|
||||
Pair<Double,Double> stats = testStudentT(context);
|
||||
double t = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
||||
return String.format("T: %.2f\tP: %.2e\tQ: %d",t,p,pToQ(p));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testStudentT(TStatistic context) {
|
||||
|
|
@ -110,7 +109,7 @@ public class AssociationTestRunner {
|
|||
Pair<Double,Double> stats = testZ(context);
|
||||
double z = stats.first;
|
||||
double p = stats.second;
|
||||
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,(int)Math.floor(QualityUtils.phredScaleErrorRate(p)));
|
||||
return String.format("Z: %.2f\tP: %.2e\tQ: %d",z,p,pToQ(p));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> testZ(ZStatistic context) {
|
||||
|
|
@ -137,7 +136,7 @@ public class AssociationTestRunner {
|
|||
// note: u statistic (U) is relatively useless for recalibrating outside of the context of m and n
|
||||
// thus we report V = (U - (m*n+1)/2)/(n*m*(n+m+1)/12)
|
||||
Pair<Double,Double> results = mannWhitneyUTest(context);
|
||||
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,(int)Math.floor(QualityUtils.phredScaleErrorRate(results.second)));
|
||||
return String.format("V: %.2f\tP: %.2e\tQ: %d",results.first,results.second,pToQ(results.second));
|
||||
}
|
||||
|
||||
public static Pair<Double,Double> mannWhitneyUTest(UStatistic context) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -40,41 +41,23 @@ public class RegionalAssociationHandler {
|
|||
|
||||
/**
|
||||
* Switches what formatting to use based on wiggle or standard
|
||||
* @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:)
|
||||
* @param bedGraphFormat - use bedgraph format (s p q) or standard (S: s P: p Q: q)
|
||||
* @return - test results in proper format
|
||||
*/
|
||||
public Map<AssociationContext,String> runTests(boolean wiggleFormat) {
|
||||
if ( wiggleFormat ) {
|
||||
return runWiggleTests();
|
||||
} else {
|
||||
return runTests();
|
||||
}
|
||||
}
|
||||
|
||||
public Map<AssociationContext,String> runWiggleTests() {
|
||||
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
||||
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
||||
for ( AssociationContext w : associations ) {
|
||||
if ( w.isFull() ) {
|
||||
testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w)));
|
||||
w.slide();
|
||||
}
|
||||
}
|
||||
return testResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* For those AssociationContexts with full windows:
|
||||
* 1) Run their associated test(s)
|
||||
* 2) Slide the windows
|
||||
*/
|
||||
public Map<AssociationContext,String> runTests() {
|
||||
public Map<AssociationContext,String> runTests(boolean bedGraphFormat) {
|
||||
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
||||
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
||||
for ( AssociationContext w : associations ) {
|
||||
if ( w.isFull() ) {
|
||||
String outVal;
|
||||
if ( bedGraphFormat ) {
|
||||
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w);
|
||||
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second);
|
||||
} else {
|
||||
outVal = AssociationTestRunner.runTests(w);
|
||||
}
|
||||
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
|
||||
maps.getReferenceContext().getLocus().getStart(),maps.getReferenceContext().getLocus().getStart()+1,AssociationTestRunner.runTests(w)));
|
||||
maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));
|
||||
w.slide();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,12 +16,12 @@ import java.util.*;
|
|||
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
|
||||
|
||||
Set<Class<? extends AssociationContext>> contexts = null;
|
||||
boolean wigOut = false;
|
||||
boolean bedGraphOut = false;
|
||||
|
||||
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
|
||||
super();
|
||||
contexts = getAssociations(toUse);
|
||||
wigOut = wiggleOutput;
|
||||
bedGraphOut = wiggleOutput;
|
||||
}
|
||||
|
||||
public Collection<Class<? extends AssociationContext>> multiplex() {
|
||||
|
|
@ -29,8 +29,8 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
|
|||
}
|
||||
|
||||
public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
|
||||
if ( wigOut ) {
|
||||
return String.format("%s.%s.wig",arg,context.getSimpleName());
|
||||
if ( bedGraphOut ) {
|
||||
return String.format("%s.%s.bedgraph",arg,context.getSimpleName());
|
||||
}
|
||||
return String.format("%s.%s.tdf", arg, context.getSimpleName());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,22 +31,19 @@ import java.util.*;
|
|||
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
|
||||
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
|
||||
public String[] associationsToUse = null;
|
||||
@Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false)
|
||||
public boolean wiggleFormat = false;
|
||||
@Argument(doc="Change output file to bedgraph format (s p q, not STAT: s P: p Q: q",shortName="bg",fullName="bedgraph",required=false)
|
||||
public boolean bedGraph = false;
|
||||
|
||||
@Output
|
||||
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"})
|
||||
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"})
|
||||
Map<AssociationContext,PrintStream> out;
|
||||
|
||||
public void initialize() {
|
||||
if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) {
|
||||
throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output.");
|
||||
}
|
||||
|
||||
Set<AssociationContext> validAssociations = getAssociations();
|
||||
|
||||
if ( wiggleFormat ) {
|
||||
writeWiggleHeaders(validAssociations);
|
||||
if ( bedGraph ) {
|
||||
writeBedGraphHeaders(validAssociations);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
} catch (Exception e) {
|
||||
throw new StingException("Error in map reduce",e);
|
||||
}
|
||||
Map<AssociationContext,String> testsHere = rac.runTests(wiggleFormat);
|
||||
Map<AssociationContext,String> testsHere = rac.runTests(bedGraph);
|
||||
// todo -- really awful shitty formatting
|
||||
if ( testsHere.size() > 0 ) {
|
||||
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
|
||||
|
|
@ -132,10 +129,10 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
return getToolkit().getSAMFileSamples();
|
||||
}
|
||||
|
||||
public void writeWiggleHeaders(Set<AssociationContext> cons) {
|
||||
public void writeBedGraphHeaders(Set<AssociationContext> cons) {
|
||||
for ( AssociationContext con : cons ) {
|
||||
GenomeLoc first = getToolkit().getIntervals().iterator().next();
|
||||
String header = String.format("fixedStep chrom=%s start=%d step=%d span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize());
|
||||
String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
|
||||
out.get(con.getClass()).printf("%s%n",header);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue