diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 13f4e6c09..19a5dd8ef 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -22,6 +22,7 @@ import org.broadinstitute.sting.utils.collections.Pair; * To change this template use File | Settings | File Templates. */ public class AssociationTestRunner { + // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance static Normal standardNormal = new Normal(0.0,1.0,null); public static String runTests(AssociationContext context) { @@ -50,6 +51,27 @@ public class AssociationTestRunner { return buf.toString(); } + /** + * Just gets the Q value associated with a particular association context. + * @param context + * @return + */ + public static int getQValue(AssociationContext context) { + if ( context instanceof TStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(testStudentT((TStatistic) context).second)); + } + + if ( context instanceof ZStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(testZ((ZStatistic) context).second)); + } + + if ( context instanceof UStatistic ) { + return (int) Math.floor(QualityUtils.phredScaleErrorRate(mannWhitneyUTest((UStatistic) context).second)); + } + + return -1; + } + public static String runStudentT(TStatistic context) { Pair stats = testStudentT(context); double t = stats.first; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java index 4bbc1c1aa..5ed95febb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java @@ -38,6 +38,31 @@ public class RegionalAssociationHandler { } } + /** + * Switches what formatting to use based on wiggle or standard + * @param wiggleFormat - use wiggle format (just Q) or standard (S: P: Q:) + * @return - test results in proper format + */ + public Map runTests(boolean wiggleFormat) { + if ( wiggleFormat ) { + return runWiggleTests(); + } else { + return runTests(); + } + } + + public Map runWiggleTests() { + // todo -- maybe the tdf should be the whole window rather than just the most recent loc? + Map testResults = new HashMap(associations.size()); + for ( AssociationContext w : associations ) { + if ( w.isFull() ) { + testResults.put(w,String.format("%d",AssociationTestRunner.getQValue(w))); + w.slide(); + } + } + return testResults; + } + /** * For those AssociationContexts with full windows: * 1) Run their associated test(s) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java index 5969754b2..5daf9fc36 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java @@ -16,11 +16,12 @@ import java.util.*; public class RegionalAssociationMultiplexer implements Multiplexer> { Set> contexts = null; + boolean wigOut = false; - public RegionalAssociationMultiplexer(String[] toUse) { + public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) { super(); contexts = getAssociations(toUse); - + wigOut = wiggleOutput; } public Collection> multiplex() { @@ -28,6 +29,9 @@ public class RegionalAssociationMultiplexer implements Multiplexer context, String arg) { + if ( wigOut ) { + return String.format("%s.%s.wig",arg,context.getSimpleName()); + } return String.format("%s.%s.tdf", arg, context.getSimpleName()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index dcff890b6..dab581751 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -2,13 +2,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Downsample; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -24,14 +27,29 @@ import java.util.*; * @Date 2011-02-23 * Generalized framework for regional (windowed) associations */ +@Downsample(by= DownsampleType.NONE) public class RegionalAssociationWalker extends LocusWalker implements TreeReducible { @Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false) public String[] associationsToUse = null; + @Argument(doc="Change output file to wiggle format (will only see phred-scaled Q values, not STAT: x P: y Q: z",shortName="w",fullName="wiggle",required=false) + public boolean wiggleFormat = false; @Output - @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse"}) + @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","wiggleFormat"}) Map out; + public void initialize() { + if ( wiggleFormat && getToolkit().getIntervals().size() > 1 ) { + throw new UserException("Wiggle format (fixedStep) is only consistent with a contiguous region (one interval) on one contig. Otherwise use the default output."); + } + + Set validAssociations = getAssociations(); + + if ( wiggleFormat ) { + writeWiggleHeaders(validAssociations); + } + } + public RegionalAssociationHandler reduceInit() { Set validAssociations = getAssociations(); RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations); @@ -50,7 +68,7 @@ public class RegionalAssociationWalker extends LocusWalker testsHere = rac.runTests(); + Map testsHere = rac.runTests(wiggleFormat); // todo -- really awful shitty formatting if ( testsHere.size() > 0 ) { for ( Map.Entry result : testsHere.entrySet() ) { @@ -113,4 +131,12 @@ public class RegionalAssociationWalker extends LocusWalker getSamples() { return getToolkit().getSAMFileSamples(); } + + public void writeWiggleHeaders(Set cons) { + for ( AssociationContext con : cons ) { + GenomeLoc first = getToolkit().getIntervals().iterator().next(); + String header = String.format("fixedStep\tchrom=%s,start=%d,step=%d,span=%d",first.getContig(),first.getStart(),con.slideByValue(),con.getWindowSize()); + out.get(con.getClass()).printf("%s%n",header); + } + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java index a15600322..e83f68cb9 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/BaseQualityScore.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -25,7 +26,7 @@ public class BaseQualityScore extends TStatistic { return (Collection) baseQuals; } - public int getWindowSize() { return 5; } - public int slideByValue() { return 1; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return true; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java index 449a61c99..0c6226c41 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/InsertSizeDistribution.java @@ -12,9 +12,9 @@ import java.util.List; /** * @author chartl */ -public class InsertSizeDistribution extends UStatistic { - public int getWindowSize() { return 100; } - public int slideByValue() { return 10; } +public class InsertSizeDistribution extends TStatistic { + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } public Collection map(ReadBackedPileup pileup) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java index 531cba002..4717d9732 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MappingQuality0.java @@ -26,8 +26,8 @@ public class MappingQuality0 extends ZStatistic { return new Pair(mq0,total); } - public int getWindowSize() { return 50; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java index f3718bce3..579cd3605 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateMappingQuality.java @@ -27,7 +27,7 @@ public class MateMappingQuality extends UStatistic { return (Collection) mateMapQ; } - public int getWindowSize() { return 40; } - public int slideByValue() { return 20; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java index b2af3d6a7..152f1f505 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateOtherContig.java @@ -14,7 +14,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; */ public class MateOtherContig extends ZStatistic { - public int getWindowSize() { return 50; } + public int getWindowSize() { return 200; } public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java index 143c71c71..1129313f7 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateSameStrand.java @@ -29,7 +29,7 @@ public class MateSameStrand extends ZStatistic { return new Pair(mateSameStrand,numPairs); } - public int getWindowSize() { return 40; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java index f0c0c7df6..5ee868714 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/MateUnmapped.java @@ -29,7 +29,7 @@ public class MateUnmapped extends ZStatistic { return new Pair(numPairUnmapped,numMatedReads); } - public int getWindowSize() { return 100; } + public int getWindowSize() { return 200; } public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java index 6045fcf4b..8cef17753 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ProperPairs.java @@ -27,7 +27,7 @@ public class ProperPairs extends ZStatistic { return new Pair(numPropPair,numReads); } - public int getWindowSize() { return 30; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java index 8692da8db..e84cb9768 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadClipping.java @@ -33,7 +33,7 @@ public class ReadClipping extends TStatistic { return (Collection) clipping; } - public int getWindowSize() { return 30; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java index f5ffd6fe0..1990ccd46 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadIndels.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -16,7 +17,7 @@ import java.util.Collection; * Time: 1:54 PM * To change this template use File | Settings | File Templates. */ -public class ReadIndels extends UStatistic { +public class ReadIndels extends TStatistic { public Collection map(ReadBackedPileup rbp) { ArrayList indelElements = new ArrayList(rbp.size()); @@ -32,7 +33,7 @@ public class ReadIndels extends UStatistic { return (Collection) indelElements; } - public int getWindowSize() { return 50; } - public int slideByValue() { return 10; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java index 9d6daab09..a4ad51f55 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/ReadMappingQuality.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.UStatistic; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -14,7 +15,7 @@ import java.util.Collection; * Time: 1:41 PM * To change this template use File | Settings | File Templates. */ -public class ReadMappingQuality extends UStatistic { +public class ReadMappingQuality extends TStatistic { public Collection map(ReadBackedPileup rbp) { ArrayList mapQuals = new ArrayList(rbp.size()); @@ -25,7 +26,7 @@ public class ReadMappingQuality extends UStatistic { return (Collection) mapQuals; } - public int getWindowSize() { return 40; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java index f1001115e..a6ead5952 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/SampleDepth.java @@ -17,7 +17,7 @@ import java.util.*; /** * @author chartl */ -public class SampleDepth extends TStatistic { +public class SampleDepth extends UStatistic { public Map sampleStats = null; @@ -32,8 +32,8 @@ public class SampleDepth extends TStatistic { Set samples = walker.getSamples(); for ( Sample s : samples ) { if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) { - double mn = Double.parseDouble((String) s.getProperty("doc.mean")); - double std = Double.parseDouble((String) s.getProperty("doc.std")); + double mn = (Double) s.getProperty("doc.mean"); + double std = (Double) s.getProperty("doc.std"); sampleStats.put(s,new Pair(mn,std)); } else { sampleStats.put(s,new MathUtils.RunningAverage()); @@ -75,8 +75,8 @@ public class SampleDepth extends TStatistic { // note: this is to satisfy the interface, and is never called due to override public Collection map(ReadBackedPileup pileup) { return null; } - public int getWindowSize() { return 25; } - public int slideByValue() { return 5; } + public int getWindowSize() { return 200; } + public int slideByValue() { return 25; } public boolean usePreviouslySeenReads() { return true; } diff --git a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 4d89d4894..f784fadf2 100755 --- a/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -71,6 +71,8 @@ public class MannWhitneyU { } else if ( n > 4 && m > 7 ) { // large m, small n - sum uniform approx pval = calculatePUniformApproximation(n,m,u); + } else if ( n > 5 || m > 5 ) { + pval = calculatePFromTable(n,m,u); } else { // small m [possibly small n] - full approx pval = calculatePRecursively(n,m,u); @@ -79,6 +81,13 @@ public class MannWhitneyU { return twoSided ? 2*pval : pval; } + public static double calculatePFromTable(int n, int m, long u) { + // todo -- actually use a table for: + // todo - n small, m large + // todo - n large, m small + return calculatePUniformApproximation(n,m,u); + } + /** * Uses a normal approximation to the U statistic in order to return a cdf p-value. See Mann, Whitney [1947] * @param n - The number of entries in the DOMINATED set