From 77fe902dbd1182bd6a9686c14a661dbb58ff95b0 Mon Sep 17 00:00:00 2001 From: chartl Date: Tue, 8 Mar 2011 15:26:13 +0000 Subject: [PATCH] Testing modules now use wider windows and heftier shift, hopefully this will remove some of the noisiness of the results. Some UStatistics were changed to TStatistics to try and limit noisiness as well. Walker will also additionally write out wiggle files directly (which can be converted into "proper" tdf files via igvtools tile [args] [in].wig [out].tdf [ref]) subject to some restrictions. MWU could get stuck in a long-running recursive regime, it'd be nice to have a table lookup or a good small-n large-m approximation, for now the uniform should work just fine. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5403 348d0f76-0448-11de-a6fe-93d51630548a --- .../association/AssociationTestRunner.java | 22 ++++++++++++++ .../RegionalAssociationHandler.java | 25 ++++++++++++++++ .../RegionalAssociationMultiplexer.java | 8 +++-- .../RegionalAssociationWalker.java | 30 +++++++++++++++++-- .../association/modules/BaseQualityScore.java | 5 ++-- .../modules/InsertSizeDistribution.java | 6 ++-- .../association/modules/MappingQuality0.java | 4 +-- .../modules/MateMappingQuality.java | 4 +-- .../association/modules/MateOtherContig.java | 2 +- .../association/modules/MateSameStrand.java | 4 +-- .../association/modules/MateUnmapped.java | 2 +- .../association/modules/ProperPairs.java | 4 +-- .../association/modules/ReadClipping.java | 4 +-- .../association/modules/ReadIndels.java | 7 +++-- .../modules/ReadMappingQuality.java | 7 +++-- .../association/modules/SampleDepth.java | 10 +++---- .../sting/utils/MannWhitneyU.java | 9 ++++++ 17 files changed, 121 insertions(+), 32 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 13f4e6c09..19a5dd8ef 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -22,6 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair; * To change this template use File | Settings | File Templates. */ public class AssociationTestRunner { + // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance static Normal standardNormal = new Normal(0.0,1.0,null); public static String runTests(AssociationContext context) { @@ -50,6 +51,27 @@ public class AssociationTestRunner { return buf.toString(); } + /** + * Just gets the Q value associated with a particular association context. + * @param context + * @return + */ + public static int getQValue(AssociationContext context) { + if ( context instanceof TStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(testStudentT((TStatistic) context).second)); + } + + if ( context instanceof ZStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(testZ((ZStatistic) context).second)); + } + + if ( context instanceof UStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(mannWhitneyUTest((UStatistic) context).second)); + } + + return -1; + } + public static String runStudentT(TStatistic context) { Pair stats = testStudentT(context); double t = stats.first; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java index 4bbc1c1aa..5ed95febb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java @@ -38,6 +38,31 @@ public class RegionalAssociationHandler { } } + /** + * Switches what formatting to use based on wiggle or standard + * @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:) + * @return - test results in proper format + */ + public Map runTests(boolean wiggleFormat) { + if ( wiggleFormat ) { + return runWiggleTests(); + } else { + return runTests(); + } + } + + public Map runWiggleTests() { + // todo -- maybe the tdf should be the whole window rather than just the most recent loc? + Map testResults = new HashMap(associations.size()); + for ( AssociationContext w : associations ) { + if ( w.isFull() ) { + testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w))); + w.slide(); + } + } + return testResults; + } + /** * For those AssociationContexts with full windows: * 1) Run their associated test(s) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java index 5969754b2..5daf9fc36 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java @@ -16,11 +16,12 @@ import java.util.*; public class RegionalAssociationMultiplexer implements Multiplexer> { Set> contexts = null; + boolean wigOut = false; - public RegionalAssociationMultiplexer(String[] toUse) { + public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) { super(); contexts = getAssociations(toUse); - + wigOut = wiggleOutput; } public Collection> multiplex() { @@ -28,6 +29,9 @@ public class RegionalAssociationMultiplexer implements Multiplexer context, String arg) { + if ( wigOut ) { + return String.format("%s.%s.wig",arg,context.getSimpleName()); + } return String.format("%s.%s.tdf", arg, context.getSimpleName()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index dcff890b6..dab581751 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -2,13 +2,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Downsample; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -24,14 +27,29 @@ import java.util.*; * @Date 2011-02-23 * Generalized framework for regional (windowed) associations */ +@Downsample(by= DownsampleType.NONE) public class RegionalAssociationWalker extends LocusWalker implements TreeReducible { @Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false) public String[] associationsToUse = null; + @Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false) + public boolean wiggleFormat = false; @Output - @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse"}) + @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"}) Map out; + public void initialize() { + if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) { + throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output."); + } + + Set validAssociations = getAssociations(); + + if ( wiggleFormat ) { + writeWiggleHeaders(validAssociations); + } + } + public RegionalAssociationHandler reduceInit() { Set validAssociations = getAssociations(); RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations); @@ -50,7 +68,7 @@ public class RegionalAssociationWalker extends LocusWalker testsHere = rac.runTests(); + Map testsHere = rac.runTests(wiggleFormat); // todo -- really awful shitty formatting if ( testsHere.size() > 0 ) { for ( Map.Entry result : testsHere.entrySet() ) { @@ -113,4 +131,12 @@ public class RegionalAssociationWalker extends LocusWalker getSamples() { return getToolkit().getSAMFileSamples(); } + + public void writeWiggleHeaders(Set cons) { + for ( AssociationContext con : cons ) { + GenomeLoc first = getToolkit().getIntervals().iterator().next(); + String header = String.format("fixedStep\tchrom=%s,start=%d,step=%d,span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize()); + out.get(con.getClass()).printf("%s%n",header); + } + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java index a15600322..e83f68cb9 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -25,7 +26,7 @@ public class BaseQualityScore extends TStatistic { return (Collection) baseQuals; } - public int getWindowSize() { return 5; } - public int slideByValue() { return 1; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return true; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java index 449a61c99..0c6226c41 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java @@ -12,9 +12,9 @@ import java.util.List; /** * @author chartl */ -public class InsertSizeDistribution extends UStatistic { - public int getWindowSize() { return 100; } - public int slideByValue() { return 10; } +public class InsertSizeDistribution extends TStatistic { + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } public Collection map(ReadBackedPileup pileup) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java index 531cba002..4717d9732 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java @@ -26,8 +26,8 @@ public class MappingQuality0 extends ZStatistic { return new Pair(mq0,total); } - public int getWindowSize() { return 50; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java index f3718bce3..579cd3605 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java @@ -27,7 +27,7 @@ public class MateMappingQuality extends UStatistic { return (Collection) mateMapQ; } - public int getWindowSize() { return 40; } - public int slideByValue() { return 20; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java index b2af3d6a7..152f1f505 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java @@ -14,7 +14,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; */ public class MateOtherContig extends ZStatistic { - public int getWindowSize() { return 50; } + public int getWindowSize() { return 200; } public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java index 143c71c71..1129313f7 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java @@ -29,7 +29,7 @@ public class MateSameStrand extends ZStatistic { return new Pair(mateSameStrand,numPairs); } - public int getWindowSize() { return 40; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java index f0c0c7df6..5ee868714 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java @@ -29,7 +29,7 @@ public class MateUnmapped extends ZStatistic { return new Pair(numPairUnmapped,numMatedReads); } - public int getWindowSize() { return 100; } + public int getWindowSize() { return 200; } public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java index 6045fcf4b..8cef17753 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java @@ -27,7 +27,7 @@ public class ProperPairs extends ZStatistic { return new Pair(numPropPair,numReads); } - public int getWindowSize() { return 30; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java index 8692da8db..e84cb9768 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java @@ -33,7 +33,7 @@ public class ReadClipping extends TStatistic { return (Collection) clipping; } - public int getWindowSize() { return 30; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java index f5ffd6fe0..1990ccd46 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -16,7 +17,7 @@ import java.util.Collection; * Time: 1:54 PM * To change this template use File | Settings | File Templates. */ -public class ReadIndels extends UStatistic { +public class ReadIndels extends TStatistic { public Collection map(ReadBackedPileup rbp) { ArrayList indelElements = new ArrayList(rbp.size()); @@ -32,7 +33,7 @@ public class ReadIndels extends UStatistic { return (Collection) indelElements; } - public int getWindowSize() { return 50; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java index 9d6daab09..a4ad51f55 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -14,7 +15,7 @@ import java.util.Collection; * Time: 1:41 PM * To change this template use File | Settings | File Templates. */ -public class ReadMappingQuality extends UStatistic { +public class ReadMappingQuality extends TStatistic { public Collection map(ReadBackedPileup rbp) { ArrayList mapQuals = new ArrayList(rbp.size()); @@ -25,7 +26,7 @@ public class ReadMappingQuality extends UStatistic { return (Collection) mapQuals; } - public int getWindowSize() { return 40; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java index f1001115e..a6ead5952 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java @@ -17,7 +17,7 @@ import java.util.*; /** * @author chartl */ -public class SampleDepth extends TStatistic { +public class SampleDepth extends UStatistic { public Map sampleStats = null; @@ -32,8 +32,8 @@ public class SampleDepth extends TStatistic { Set samples = walker.getSamples(); for ( Sample s : samples ) { if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) { - double mn = Double.parseDouble((String) s.getProperty("doc.mean")); - double std = Double.parseDouble((String) s.getProperty("doc.std")); + double mn = (Double) s.getProperty("doc.mean"); + double std = (Double) s.getProperty("doc.std"); sampleStats.put(s,new Pair(mn,std)); } else { sampleStats.put(s,new MathUtils.RunningAverage()); @@ -75,8 +75,8 @@ public class SampleDepth extends TStatistic { // note: this is to satisfy the interface, and is never called due to override public Collection map(ReadBackedPileup pileup) { return null; } - public int getWindowSize() { return 25; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return true; } diff --git a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 4d89d4894..f784fadf2 100755 --- a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -71,6 +71,8 @@ public class MannWhitneyU { } else if ( n > 4 && m > 7 ) { // large m, small n - sum uniform approx pval = calculatePUniformApproximation(n,m,u); + } else if ( n > 5 || m > 5 ) { + pval = calculatePFromTable(n,m,u); } else { // small m [possibly small n] - full approx pval = calculatePRecursively(n,m,u); @@ -79,6 +81,13 @@ public class MannWhitneyU { return twoSided ? 2*pval : pval; } + public static double calculatePFromTable(int n, int m, long u) { + // todo -- actually use a table for: + // todo - n small, m large + // todo - n large, m small + return calculatePUniformApproximation(n,m,u); + } + /** * Uses a normal approximation to the U statistic in order to return a cdf p-value. See Mann, Whitney [1947] * @param n - The number of entries in the DOMINATED set