Testing modules now use wider windows and heftier shift, hopefully this will remove some of the noisiness of the results. Some UStatistics were changed to TStatistics to try and limit noisiness as well. Walker will also additionally write out wiggle files directly (which can be converted into "proper" tdf files via igvtools tile [args] [in].wig [out].tdf [ref]) subject to some restrictions. MWU could get stuck in a long-running recursive regime, it'd be nice to have a table lookup or a good small-n large-m approximation, for now the uniform should work just fine.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5403 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b733cba7c7
commit
77fe902dbd
|
|
@ -22,6 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AssociationTestRunner {
|
||||
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
|
||||
static Normal standardNormal = new Normal(0.0,1.0,null);
|
||||
|
||||
public static String runTests(AssociationContext context) {
|
||||
|
|
@ -50,6 +51,27 @@ public class AssociationTestRunner {
|
|||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Just gets the Q value associated with a particular association context.
|
||||
* @param context
|
||||
* @return
|
||||
*/
|
||||
public static int getQValue(AssociationContext context) {
|
||||
if ( context instanceof TStatistic ) {
|
||||
return (int) Math.floor(QualityUtils.phredScaleErrorRate(testStudentT((TStatistic) context).second));
|
||||
}
|
||||
|
||||
if ( context instanceof ZStatistic ) {
|
||||
return (int) Math.floor(QualityUtils.phredScaleErrorRate(testZ((ZStatistic) context).second));
|
||||
}
|
||||
|
||||
if ( context instanceof UStatistic ) {
|
||||
return (int) Math.floor(QualityUtils.phredScaleErrorRate(mannWhitneyUTest((UStatistic) context).second));
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static String runStudentT(TStatistic context) {
|
||||
Pair<Double,Double> stats = testStudentT(context);
|
||||
double t = stats.first;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,31 @@ public class RegionalAssociationHandler {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Switches what formatting to use based on wiggle or standard
|
||||
* @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:)
|
||||
* @return - test results in proper format
|
||||
*/
|
||||
public Map<AssociationContext,String> runTests(boolean wiggleFormat) {
|
||||
if ( wiggleFormat ) {
|
||||
return runWiggleTests();
|
||||
} else {
|
||||
return runTests();
|
||||
}
|
||||
}
|
||||
|
||||
public Map<AssociationContext,String> runWiggleTests() {
|
||||
// todo -- maybe the tdf should be the whole window rather than just the most recent loc?
|
||||
Map<AssociationContext,String> testResults = new HashMap<AssociationContext,String>(associations.size());
|
||||
for ( AssociationContext w : associations ) {
|
||||
if ( w.isFull() ) {
|
||||
testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w)));
|
||||
w.slide();
|
||||
}
|
||||
}
|
||||
return testResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* For those AssociationContexts with full windows:
|
||||
* 1) Run their associated test(s)
|
||||
|
|
|
|||
|
|
@ -16,11 +16,12 @@ import java.util.*;
|
|||
public class RegionalAssociationMultiplexer implements Multiplexer<Class<? extends AssociationContext>> {
|
||||
|
||||
Set<Class<? extends AssociationContext>> contexts = null;
|
||||
boolean wigOut = false;
|
||||
|
||||
public RegionalAssociationMultiplexer(String[] toUse) {
|
||||
public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) {
|
||||
super();
|
||||
contexts = getAssociations(toUse);
|
||||
|
||||
wigOut = wiggleOutput;
|
||||
}
|
||||
|
||||
public Collection<Class<? extends AssociationContext>> multiplex() {
|
||||
|
|
@ -28,6 +29,9 @@ public class RegionalAssociationMultiplexer implements Multiplexer<Class<? exten
|
|||
}
|
||||
|
||||
public String transformArgument(final Class<? extends AssociationContext> context, String arg) {
|
||||
if ( wigOut ) {
|
||||
return String.format("%s.%s.wig",arg,context.getSimpleName());
|
||||
}
|
||||
return String.format("%s.%s.tdf", arg, context.getSimpleName());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
|||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Downsample;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
@ -24,14 +27,29 @@ import java.util.*;
|
|||
* @Date 2011-02-23
|
||||
* Generalized framework for regional (windowed) associations
|
||||
*/
|
||||
@Downsample(by= DownsampleType.NONE)
|
||||
public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAssociationHandler> implements TreeReducible<RegionalAssociationHandler> {
|
||||
@Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false)
|
||||
public String[] associationsToUse = null;
|
||||
@Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false)
|
||||
public boolean wiggleFormat = false;
|
||||
|
||||
@Output
|
||||
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse"})
|
||||
@Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"})
|
||||
Map<AssociationContext,PrintStream> out;
|
||||
|
||||
public void initialize() {
|
||||
if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) {
|
||||
throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output.");
|
||||
}
|
||||
|
||||
Set<AssociationContext> validAssociations = getAssociations();
|
||||
|
||||
if ( wiggleFormat ) {
|
||||
writeWiggleHeaders(validAssociations);
|
||||
}
|
||||
}
|
||||
|
||||
public RegionalAssociationHandler reduceInit() {
|
||||
Set<AssociationContext> validAssociations = getAssociations();
|
||||
RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations);
|
||||
|
|
@ -50,7 +68,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
} catch (Exception e) {
|
||||
throw new StingException("Error in map reduce",e);
|
||||
}
|
||||
Map<AssociationContext,String> testsHere = rac.runTests();
|
||||
Map<AssociationContext,String> testsHere = rac.runTests(wiggleFormat);
|
||||
// todo -- really awful shitty formatting
|
||||
if ( testsHere.size() > 0 ) {
|
||||
for ( Map.Entry<AssociationContext,String> result : testsHere.entrySet() ) {
|
||||
|
|
@ -113,4 +131,12 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
public Set<Sample> getSamples() {
|
||||
return getToolkit().getSAMFileSamples();
|
||||
}
|
||||
|
||||
public void writeWiggleHeaders(Set<AssociationContext> cons) {
|
||||
for ( AssociationContext con : cons ) {
|
||||
GenomeLoc first = getToolkit().getIntervals().iterator().next();
|
||||
String header = String.format("fixedStep\tchrom=%s,start=%d,step=%d,span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize());
|
||||
out.get(con.getClass()).printf("%s%n",header);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -25,7 +26,7 @@ public class BaseQualityScore extends TStatistic {
|
|||
return (Collection) baseQuals;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 5; }
|
||||
public int slideByValue() { return 1; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return true; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,9 +12,9 @@ import java.util.List;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class InsertSizeDistribution extends UStatistic {
|
||||
public int getWindowSize() { return 100; }
|
||||
public int slideByValue() { return 10; }
|
||||
public class InsertSizeDistribution extends TStatistic {
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup pileup) {
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ public class MappingQuality0 extends ZStatistic {
|
|||
return new Pair<Number,Number>(mq0,total);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 50; }
|
||||
public int slideByValue() { return 10; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class MateMappingQuality extends UStatistic {
|
|||
return (Collection) mateMapQ;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 20; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
*/
|
||||
public class MateOtherContig extends ZStatistic {
|
||||
|
||||
public int getWindowSize() { return 50; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class MateSameStrand extends ZStatistic {
|
|||
return new Pair<Number,Number>(mateSameStrand,numPairs);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 5; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class MateUnmapped extends ZStatistic {
|
|||
return new Pair<Number,Number>(numPairUnmapped,numMatedReads);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 100; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class ProperPairs extends ZStatistic {
|
|||
return new Pair<Number,Number>(numPropPair,numReads);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 30; }
|
||||
public int slideByValue() { return 10; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class ReadClipping extends TStatistic {
|
|||
return (Collection) clipping;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 30; }
|
||||
public int slideByValue() { return 5; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
|||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -16,7 +17,7 @@ import java.util.Collection;
|
|||
* Time: 1:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadIndels extends UStatistic {
|
||||
public class ReadIndels extends TStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> indelElements = new ArrayList<Integer>(rbp.size());
|
||||
|
|
@ -32,7 +33,7 @@ public class ReadIndels extends UStatistic {
|
|||
return (Collection) indelElements;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 50; }
|
||||
public int slideByValue() { return 10; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -14,7 +15,7 @@ import java.util.Collection;
|
|||
* Time: 1:41 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadMappingQuality extends UStatistic {
|
||||
public class ReadMappingQuality extends TStatistic {
|
||||
|
||||
public Collection<Number> map(ReadBackedPileup rbp) {
|
||||
ArrayList<Integer> mapQuals = new ArrayList<Integer>(rbp.size());
|
||||
|
|
@ -25,7 +26,7 @@ public class ReadMappingQuality extends UStatistic {
|
|||
return (Collection) mapQuals;
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 40; }
|
||||
public int slideByValue() { return 5; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import java.util.*;
|
|||
/**
|
||||
* @author chartl
|
||||
*/
|
||||
public class SampleDepth extends TStatistic {
|
||||
public class SampleDepth extends UStatistic {
|
||||
|
||||
public Map<Sample,Object> sampleStats = null;
|
||||
|
||||
|
|
@ -32,8 +32,8 @@ public class SampleDepth extends TStatistic {
|
|||
Set<Sample> samples = walker.getSamples();
|
||||
for ( Sample s : samples ) {
|
||||
if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) {
|
||||
double mn = Double.parseDouble((String) s.getProperty("doc.mean"));
|
||||
double std = Double.parseDouble((String) s.getProperty("doc.std"));
|
||||
double mn = (Double) s.getProperty("doc.mean");
|
||||
double std = (Double) s.getProperty("doc.std");
|
||||
sampleStats.put(s,new Pair<Double,Double>(mn,std));
|
||||
} else {
|
||||
sampleStats.put(s,new MathUtils.RunningAverage());
|
||||
|
|
@ -75,8 +75,8 @@ public class SampleDepth extends TStatistic {
|
|||
// note: this is to satisfy the interface, and is never called due to override
|
||||
public Collection<Number> map(ReadBackedPileup pileup) { return null; }
|
||||
|
||||
public int getWindowSize() { return 25; }
|
||||
public int slideByValue() { return 5; }
|
||||
public int getWindowSize() { return 200; }
|
||||
public int slideByValue() { return 25; }
|
||||
public boolean usePreviouslySeenReads() { return true; }
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ public class MannWhitneyU {
|
|||
} else if ( n > 4 && m > 7 ) {
|
||||
// large m, small n - sum uniform approx
|
||||
pval = calculatePUniformApproximation(n,m,u);
|
||||
} else if ( n > 5 || m > 5 ) {
|
||||
pval = calculatePFromTable(n,m,u);
|
||||
} else {
|
||||
// small m [possibly small n] - full approx
|
||||
pval = calculatePRecursively(n,m,u);
|
||||
|
|
@ -79,6 +81,13 @@ public class MannWhitneyU {
|
|||
return twoSided ? 2*pval : pval;
|
||||
}
|
||||
|
||||
public static double calculatePFromTable(int n, int m, long u) {
|
||||
// todo -- actually use a table for:
|
||||
// todo - n small, m large
|
||||
// todo - n large, m small
|
||||
return calculatePUniformApproximation(n,m,u);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses a normal approximation to the U statistic in order to return a cdf p-value. See Mann, Whitney [1947]
|
||||
* @param n - The number of entries in the DOMINATED set
|
||||
|
|
|
|||
Loading…
Reference in New Issue