From 0723b0f44c56fa868727a3e229dc7378e8f79871 Mon Sep 17 00:00:00 2001 From: chartl Date: Tue, 1 Mar 2011 01:23:37 +0000 Subject: [PATCH] Generalized association is now working. Output is in a horrific format. Implementation of T-testing. Improvements are to look for classes dynamically (a la VariantEval/VariantAnnotator), beautify output, and do optimizations where they exist. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5341 348d0f76-0448-11de-a6fe-93d51630548a --- .../association/AssociationContext.java | 6 +- .../association/AssociationTestRunner.java | 18 ++-- .../walkers/association/MapExtender.java | 12 +-- .../RegionalAssociationHandler.java | 2 +- .../RegionalAssociationWalker.java | 17 +++- .../association/interfaces/TStatistic.java | 4 +- .../modules/LowMappingQuality.java | 49 +++++++++- .../modules/LowMappingQualityAtom.java | 98 ++++++++++++++----- .../broadinstitute/sting/utils/MathUtils.java | 7 ++ scala/qscript/oneoffs/QTools.q | 3 +- .../broadinstitute/sting/queue/QScript.scala | 5 + 11 files changed, 173 insertions(+), 48 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java index 95f177fef..44d3aa2ba 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java @@ -18,7 +18,7 @@ public abstract class AssociationContext { } public X map(MapExtender e) throws NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException, ClassCastException { - return (X) clazz.getConstructor().newInstance(e); + return (X) clazz.getConstructor(new Class[] {MapExtender.class}).newInstance(new Object[] {e}); } public boolean filter(MapExtender m) { return true; } @@ -35,6 +35,8 @@ public abstract class AssociationContext { } public void slide() { - window = window.subList(slideByValue(),window.size()); + ArrayList newWindow = new ArrayList((window.subList(slideByValue(),window.size()))); + newWindow.ensureCapacity(getWindowSize()); + window = newWindow; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 351ce3cc3..39b6bfd3a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.List; import cern.jet.random.Normal; +import cern.jet.random.StudentT; import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.*; /** @@ -18,29 +19,32 @@ public class AssociationTestRunner { public static List runTests(AssociationContext context) { List results = new ArrayList(); - if ( context.getClass().isInstance(TStatistic.class)) { - results.add(runStudentT(context)); + if ( context instanceof TStatistic) { + results.add(runStudentT((TStatistic) context)); } - if ( context.getClass().isInstance(ZStatistic.class)) { + if ( context instanceof ZStatistic) { results.add(runZ((ZStatistic) context)); } - if ( context.getClass().isInstance(FisherExact.class) ) { + if ( context instanceof FisherExact ) { results.add(runFisherExact(context)); } return results; } - public static String runStudentT(AssociationContext context) { - return "Test not yet implemented"; + public static String runStudentT(TStatistic context) { + double t = context.getTStatistic(); + StudentT studentT = new StudentT(context.getDegreesOfFreedom(),null); + double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t)); + return String.format("T: %.2f\tP: %.2e",t,p); } public static String runZ(ZStatistic context) { double z = context.getZStatistic(); double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z)); - return String.format("Z: %.2f\tP: %.2f",z,p); + return String.format("Z: %.2f\tP: %.2e",z,p); } public static String runFisherExact(AssociationContext context) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java index a46e012ed..a9153aad3 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java @@ -27,26 +27,26 @@ public class MapExtender { } public Map getPreviousContext() { - return previous.getContext(); + return previous != null ? previous.getContext() : null; } public ReferenceContext getPreviousRef() { - return previous.getRef(); + return previous != null ? previous.getRef() : null; } public RefMetaDataTracker getPreviousTracker() { - return previous.getTracker(); + return previous != null ? previous.getTracker() : null; } public Map getContext() { - return current.getContext(); + return current != null ? current.getContext() : null; } public ReferenceContext getReferenceContext() { - return current.getRef(); + return current != null ? current.getRef() : null; } public RefMetaDataTracker getTracker() { - return current.getTracker(); + return current != null ? current.getTracker() : null; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java index 1af6d06a2..e9cbf5c47 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java @@ -16,7 +16,7 @@ public class RegionalAssociationHandler { private Set associations; public RegionalAssociationHandler(Set contexts) { - maps = null; + maps = new MapExtender(); associations = contexts; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index 72511a1b6..ad469539f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -7,7 +7,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQuality; +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQualityAtom; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.PrintStream; import java.util.HashSet; @@ -46,15 +49,21 @@ public class RegionalAssociationWalker extends LocusWalker testsHere = rac.runTests(); // todo -- really awful shitty formatting - out.printf("%s%n",rac.getLocation().toString()); - for ( String s : testsHere ) { - out.printf("%s%n",s); + if ( testsHere.size() > 0 ) { + out.printf("%s%n",rac.getLocation().toString()); + for ( String s : testsHere ) { + out.printf("%s%n",s); + } } return rac; } private AssociationContext stringToAssociationContext(String s) { - return null; + if ( s.equals("LowMappingQuality") ) { + return new LowMappingQuality(); + } + + throw new UserException(String.format("AssociationContext type %s not found.",s)); } private Set getAssociations() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/interfaces/TStatistic.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/interfaces/TStatistic.java index a2125004b..ba0230f7e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/interfaces/TStatistic.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/interfaces/TStatistic.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces; +import org.broadinstitute.sting.utils.collections.Pair; + /** * Created by IntelliJ IDEA. * User: chartl @@ -9,5 +11,5 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces; */ public interface TStatistic { public abstract double getTStatistic(); - public abstract int getDegreesOfFreedom(); + public abstract double getDegreesOfFreedom(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQuality.java index 195475a73..86234be8a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQuality.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQuality.java @@ -1,11 +1,14 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules; +import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext; import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContextAtom; +import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.TStatistic; import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.ZStatistic; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -14,12 +17,17 @@ import java.util.Map; * Time: 1:49:37 PM * To change this template use File | Settings | File Templates. */ -public class LowMappingQuality extends AssociationContext implements ZStatistic { +public class LowMappingQuality extends AssociationContext implements ZStatistic, TStatistic { + private double df = -1; public LowMappingQuality(Class atomClass ) { super(atomClass); } + public LowMappingQuality() { + this(LowMappingQualityAtom.class); + } + public int getWindowSize() { return 20; } public int slideByValue() { return 5; } @@ -41,4 +49,41 @@ public class LowMappingQuality extends AssociationContext double SE = Math.sqrt( p_pool*(1-p_pool)*( 1/((double)ctrl_tot) + 1/((double)case_tot) )); return (p_case - p_ctrl)/SE; } + + public double getTStatistic() { + // todo -- running average is unnecessary and inefficient here, but was quick to write + MathUtils.RunningAverage cases = new MathUtils.RunningAverage(); + MathUtils.RunningAverage controls = new MathUtils.RunningAverage(); + Set caseSamples = new HashSet(); + Set controlSamples = new HashSet(); + for ( LowMappingQualityAtom atom : window ) { + Map> caseControlQuals = atom.getCaseControlMappingQuals(); + cases.addAll((Collection) caseControlQuals.get("case")); + controls.addAll((Collection) caseControlQuals.get("control")); + for ( Sample s : atom.getSamples() ) { + if ( s.getProperty("cohort").equals("case") ) { + caseSamples.add(s); + } + if ( s.getProperty("cohort").equals("control")) { + controlSamples.add(s); + } + } + } + double a = cases.var()/caseSamples.size(); + double b = controls.var()/controlSamples.size(); + + df = ( a + b )/( a*a/(caseSamples.size()-1) + b*b/(controlSamples.size()-1)); + + return ( cases.mean() - controls.mean() )/Math.sqrt( (cases.var()/caseSamples.size()) + (controls.var()/controlSamples.size())); + } + + // todo -- this is super hacky + public double getDegreesOfFreedom() { + if ( df < 0 ) { + getTStatistic(); + } + double holder = df; + df = -1; + return holder; + } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQualityAtom.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQualityAtom.java index 4b1c6db43..200ee9ec6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQualityAtom.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/LowMappingQualityAtom.java @@ -8,9 +8,7 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.MapHolder; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -24,10 +22,12 @@ public class LowMappingQualityAtom extends AssociationContextAtom { private static StratifiedAlignmentContext.StratifiedContextType TYPE = StratifiedAlignmentContext.StratifiedContextType.COMPLETE; protected Map> stratifiedCounts; + protected Map> mappingQualities; public LowMappingQualityAtom(MapExtender e) { super(e); stratifiedCounts = new HashMap>(); + mappingQualities = new HashMap>(); for ( Sample s : e.getContext().keySet() ) { makeCounts(s,e.getPreviousContext(),e.getContext()); } @@ -36,31 +36,50 @@ public class LowMappingQualityAtom extends AssociationContextAtom { public void makeCounts( Sample sample, Map prev, Map cur ) { int low = 0; int tot = 0; - HashSet rnames = new HashSet(prev.get(sample).getContext(TYPE).size()); - for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) { - rnames.add(e.getRead().getReadName()); - } - for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) { - rnames.add(e.getRead().getReadName()); - } - for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) { - if ( ! rnames.contains(e.getRead().getReadName()) ) { - ++tot; - if ( e.getMappingQual() < LOW_THRESH ) { - ++low; + if ( cur != null && cur.get(sample) != null && cur.get(sample).getContext(TYPE) != null) { + HashSet rnames; + List mapQuals = new ArrayList(cur.size()); + if ( prev != null && prev.get(sample) != null && prev.get(sample).getContext(TYPE) != null) { + rnames = new HashSet(prev.get(sample).getContext(TYPE).size()); + if ( prev.get(sample).getContext(TYPE).hasBasePileup()) { + for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) { + rnames.add(e.getRead().getReadName()); + } + } + if ( prev.get(sample).getContext(TYPE).hasExtendedEventPileup()) { + for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) { + rnames.add(e.getRead().getReadName()); + } + } + } else { + rnames = new HashSet(0); + } + if ( cur.get(sample).getContext(TYPE).hasBasePileup()) { + for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) { + if ( ! rnames.contains(e.getRead().getReadName()) ) { + ++tot; + if ( e.getMappingQual() < LOW_THRESH ) { + ++low; + } + mapQuals.add(e.getMappingQual()); + } } } - } - for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) { - if ( ! rnames.contains(e.getRead().getReadName()) ) { - ++tot; - if ( e.getMappingQual() < LOW_THRESH ) { - ++low; + if ( cur.get(sample).getContext(TYPE).hasExtendedEventPileup()) { + for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) { + if ( ! rnames.contains(e.getRead().getReadName()) ) { + ++tot; + if ( e.getMappingQual() < LOW_THRESH ) { + ++low; + } + mapQuals.add(e.getMappingQual()); + } } } - } - stratifiedCounts.put(sample,new Pair(low,tot)); + stratifiedCounts.put(sample,new Pair(low,tot)); + mappingQualities.put(sample,mapQuals); + } } @@ -88,4 +107,37 @@ public class LowMappingQualityAtom extends AssociationContextAtom { return caseControlCounts; } + public Map> getCaseControlMappingQuals() { + int caseSize = 0; + int controlSize = 0; + for ( Map.Entry> v : mappingQualities.entrySet() ) { + if ( v.getKey().getProperty("cohort").equals("case") ) { + caseSize += v.getValue().size(); + } + if ( v.getKey().getProperty("cohort").equals("control") ) { + controlSize += v.getValue().size(); + } + } + List caseQuals = new ArrayList(caseSize); + List controlQuals = new ArrayList(controlSize); + for ( Map.Entry> v : mappingQualities.entrySet() ) { + if ( v.getKey().getProperty("cohort").equals("case") ) { + caseQuals.addAll(v.getValue()); + } + + if ( v.getKey().getProperty("cohort").equals("control")) { + controlQuals.addAll(v.getValue()); + } + } + + Map> caseControlQuals = new HashMap>(2); + caseControlQuals.put("case",caseQuals); + caseControlQuals.put("control",controlQuals); + return caseControlQuals; + } + + public Set getSamples() { + return stratifiedCounts.keySet(); + } + } diff --git a/java/src/org/broadinstitute/sting/utils/MathUtils.java b/java/src/org/broadinstitute/sting/utils/MathUtils.java index c6ab656d7..582ea4360 100755 --- a/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -804,8 +804,15 @@ public class MathUtils { s += ( obs - oldMean ) * ( obs - mean ); } + public void addAll(Collection col) { + for ( Number o : col ) { + add(o.doubleValue()); + } + } + public double mean() { return mean; } public double stddev() { return Math.sqrt(s/(obs_count - 1)); } + public double var() { return s/(obs_count - 1); } public long observationCount() { return obs_count; } } diff --git a/scala/qscript/oneoffs/QTools.q b/scala/qscript/oneoffs/QTools.q index 92a4482b3..84d4dbbea 100755 --- a/scala/qscript/oneoffs/QTools.q +++ b/scala/qscript/oneoffs/QTools.q @@ -15,7 +15,6 @@ class QTools extends QScript { @Argument(doc="The samples to extract",shortName="sm",required=false) var samples : String = _ @Argument(doc="Keep filtered sites when merging or extracting?",shortName="kf",required=false) var keepFilters : Boolean = false @Argument(doc="Input interval list (not used with VCF tools)",shortName="il",required=false) var intervalList : File = _ - @Argument(doc="Reference fai file",shortName="fai",required=false) var fai : File = _ @Argument(doc="interval list expand start",shortName="il_start",required=false) var ilStart : Int = 1 @Argument(doc="interval list expand size",shortName="il_size",required=false) var ilSize : Int = 50 // todo -- additional arguments or argument collection @@ -76,7 +75,7 @@ class QTools extends QScript { } def runExpandTargets = { - var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,fai,"INTERVALS","INTERVALS") + var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,ref,"INTERVALS","INTERVALS") add(ets) } } \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/QScript.scala b/scala/src/org/broadinstitute/sting/queue/QScript.scala index 8cb9221f2..f7224a4d6 100755 --- a/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -2,6 +2,7 @@ package org.broadinstitute.sting.queue import org.broadinstitute.sting.queue.util.Logging import org.broadinstitute.sting.queue.function.QFunction +import org.broadinstitute.sting.utils.text.XReadLines /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -73,6 +74,10 @@ trait QScript extends Logging { def addAll(functions: List[QFunction] ) = { functions.foreach( f => add(f) ) } + + def extractFileEntries(in: File): List[File] = { + return collection.JavaConversions.asScalaIterator((new XReadLines(in))).toList.map( new File(_) ) + } } object QScript {