From de4eaa455e234627cae96930eb08cb83ca5f1c13 Mon Sep 17 00:00:00 2001 From: chartl Date: Sat, 9 Apr 2011 21:07:52 +0000 Subject: [PATCH] Squashing some bugs. Current implementation of AlignmentContextUtils.splitContextBySample() eliminates all sample meta data. Per Mark's request I'm working around this rather than fixing it -- the extender now maintains a mapping from sample id to sample object. Addition of a proportion test for large-insert-size reads, and slight refactoring of code to deal with bad window initialization of subclasses (e.g. chris forgot that constructors aren't inherited) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5608 348d0f76-0448-11de-a6fe-93d51630548a --- .../association/AssociationContext.java | 18 ++++---- .../association/AssociationTestRunner.java | 18 ++++++++ .../walkers/association/MapExtender.java | 14 ++++--- .../walkers/association/MapHolder.java | 13 ++++-- .../RegionalAssociationHandler.java | 13 ++++-- .../RegionalAssociationWalker.java | 12 +++--- .../modules/casecontrol/CaseControl.java | 7 ++++ .../modules/casecontrol/MismatchRate.java | 9 ++-- .../casecontrol/ReadsLargeInsertSize.java | 42 +++++++++++++++++++ .../modules/casecontrol/SampleDepth.java | 15 +++---- .../modules/casecontrol/ValueTest.java | 4 +- .../statistics/Dichotomizer1D.java | 14 ++++++- 12 files changed, 134 insertions(+), 45 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java index 06bf3738a..3c241ca37 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java @@ -15,21 +15,13 @@ import java.util.*; public abstract class AssociationContext { protected List> window; - private final int size; - private final int slide; + private int size; + private int slide; public AssociationContext() { - this(0,0); - } - - public AssociationContext(final int winSize, final int winSlide) { - window = new ArrayList>(winSize); - size = winSize; - slide = winSlide; } public AssociationContext(final RegionalAssociationWalker parent) { - this(parent.windowSize,parent.slideBy); this.init(parent); } @@ -46,7 +38,11 @@ public abstract class AssociationContext { public boolean filter(MapExtender m) { return true; } // a basic initialization of the context (give the walker for access to object?) - public void init(RegionalAssociationWalker walker) { } + public void init(RegionalAssociationWalker walker) { + size = walker.windowSize; + slide = walker.slideBy; + window = new ArrayList>(size); + } public Map mapLocus(MapExtender extender) { Map pileups; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index fb0b6d6c2..4dfa8873e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -6,6 +6,8 @@ import java.util.Map; import cern.jet.random.Normal; import cern.jet.random.StudentT; import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; +import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizer1D; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; @@ -21,6 +23,12 @@ import org.broadinstitute.sting.utils.collections.Pair; public class AssociationTestRunner { final static int MAX_Q_VALUE = Integer.MAX_VALUE; // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance + final static Dichotomizer1D.Transform LOG_TRANSFORM = (new Dichotomizer1D()).new Transform() { + @Override + public double transform(double d) { + return Math.log(d); + } + }; public static int pToQ(double p) { return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); @@ -62,4 +70,14 @@ public class AssociationTestRunner { } return mwu.runTwoSidedTest(); } + + public static Pair getDichotomizedValues(AssociationContext context) { + if ( context instanceof Dichotomizable ) { + double raw = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context)); + double log = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context),LOG_TRANSFORM); + return new Pair(raw,log); + } + + return new Pair(Double.NaN,Double.NaN); + } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java index 268bd07c6..ff414f536 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java @@ -24,9 +24,13 @@ public class MapExtender { private MapHolder current = null; private Map fullPileup = null; private Map readFilteredPileup = null; + private Map idToSampleObject = null; - public MapExtender() { - // no need to do anything + public MapExtender(Set samples) { + idToSampleObject = new HashMap(samples.size()); + for ( Sample s : samples ) { + idToSampleObject.put(s.getId(),s); + } } public void set(MapHolder holder) { @@ -38,7 +42,7 @@ public class MapExtender { readFilteredPileup = new HashMap(); if ( current != null ) { - for ( Map.Entry sac : current.getContext().entrySet() ) { + for ( Map.Entry sac : current.getContext(idToSampleObject).entrySet() ) { AlignmentContext context = AlignmentContextUtils.stratify(sac.getValue(), TYPE); if ( context.hasBasePileup() ) { fullPileup.put(sac.getKey(),context.getBasePileup()); @@ -67,7 +71,7 @@ public class MapExtender { public Map getReadFilteredPileup(){ return readFilteredPileup; } public Map getPreviousContext() { - return previous != null ? previous.getContext() : null; + return previous != null ? previous.getContext(idToSampleObject) : null; } public ReferenceContext getPreviousRef() { @@ -79,7 +83,7 @@ public class MapExtender { } public Map getContext() { - return current != null ? current.getContext() : null; + return current != null ? current.getContext(idToSampleObject) : null; } public ReferenceContext getReferenceContext() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java index 589c6247b..0a4e624e0 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java @@ -6,21 +6,26 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import java.util.HashMap; import java.util.Map; public class MapHolder { private RefMetaDataTracker tracker; private ReferenceContext ref; - private Map alignments; + private Map alignments; public MapHolder(RefMetaDataTracker t, ReferenceContext r, AlignmentContext a) { tracker = t; ref = r; - alignments = AlignmentContextUtils.splitContextBySample(a); + alignments = AlignmentContextUtils.splitContextBySampleName(a); } - public Map getContext() { - return alignments; + public Map getContext(Map stringSampleMap) { + Map mappedContexts = new HashMap(alignments.size()); + for ( Map.Entry entry : alignments.entrySet() ) { + mappedContexts.put(stringSampleMap.get(entry.getKey()),entry.getValue()); + } + return mappedContexts; } public ReferenceContext getRef() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java index e016da0bd..c8ef6b7fd 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association; +import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; @@ -15,8 +16,8 @@ public class RegionalAssociationHandler { // todo -- the correct way to do this is via the PluginManager (a la VariantEval) but this is a QND implementation private Set associations; - public RegionalAssociationHandler(Set contexts) { - maps = new MapExtender(); + public RegionalAssociationHandler(Set contexts, Set samples) { + maps = new MapExtender(samples); associations = contexts; } @@ -51,10 +52,14 @@ public class RegionalAssociationHandler { if ( w.isFull() ) { String outVal; if ( bedGraphFormat ) { - Pair> vals = AssociationTestRunner.getTestValues(w); - outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second); + Pair> statVals = AssociationTestRunner.getTestValues(w); + Pair simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w); + outVal = String.format("%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first,statVals.second.first,statVals.second.second, + simpleDichotVals.first,simpleDichotVals.second); } else { outVal = AssociationTestRunner.runTests(w); + Pair simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w); + outVal += String.format("\tD: %.2f\tLogD: %.2f",simpleDichotVals.first,simpleDichotVals.second); } testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(), maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal)); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index bcd62a3b8..8495b54a2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; @@ -64,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker validAssociations = getAssociations(); - RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations); + RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations,getSamples()); return wac; } @@ -97,7 +98,8 @@ public class RegionalAssociationWalker extends LocusWalker clazz : contexts ) { AssociationContext context; try { - context = clazz.getConstructor(new Class[] {RegionalAssociationWalker.class}).newInstance(new Object[] {this}); + context = clazz.getConstructor(new Class[] {}).newInstance(new Object[] {}); + context.init(this); } catch (Exception e ) { throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e); } @@ -116,7 +118,8 @@ public class RegionalAssociationWalker extends LocusWalker cons) { for ( AssociationContext con : cons ) { - GenomeLoc first = getToolkit().getIntervals().iterator().next(); - String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName()); + String header = String.format("track type=bedGraph name=%s description=stat,p,q,dichot,logdichot",con.getClass().getSimpleName()); out.get(con.getClass()).printf("%s%n",header); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java index 695c9cdcb..bd57d8488 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.case import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.HashMap; @@ -24,6 +25,12 @@ public abstract class CaseControl extends AssociationContext { X accumControl = null; for ( Map sampleXMap : window ) { for ( Map.Entry entry : sampleXMap.entrySet() ) { + boolean isCase; + try { + isCase = entry.getKey().getProperty("cohort").equals("case"); + } catch (NullPointerException e) { + throw new UserException("Sample "+entry.getKey().getId()+" does not have a cohort assigned"); + } if ( entry.getKey().getProperty("cohort").equals("case") ) { accumCase = accum(accumCase, entry.getValue()); } else if ( entry.getKey().getProperty("cohort").equals("control") ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java index c05bd0bd2..8d76f06bc 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java @@ -19,18 +19,19 @@ import java.util.*; */ public class MismatchRate extends ValueTest { - private Map sampleStats = new HashMap(); + private Map sampleStats = new HashMap(); private int currentRefBase = 0; public void init(RegionalAssociationWalker walker) { + super.init(walker); Set samples = walker.getSamples(); for ( Sample s : samples ) { if ( s.hasProperty("mismatch_rate.mean") && s.hasProperty("mismatch_rate.std") ) { double mn = (Double) s.getProperty("mismatch_rate.mean"); double std = (Double) s.getProperty("mismatch_rate.std"); - sampleStats.put(s,new Pair(mn,std)); + sampleStats.put(s.getId(),new Pair(mn,std)); } else { - sampleStats.put(s,new MathUtils.RunningAverage()); + sampleStats.put(s.getId(),new MathUtils.RunningAverage()); } } } @@ -48,7 +49,7 @@ public class MismatchRate extends ValueTest { } public Collection map(Sample sample, ReadBackedPileup pileup) { - Object stats = sampleStats.get(sample); + Object stats = sampleStats.get(sample.getId()); double mn; double std; if ( stats instanceof Pair ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java new file mode 100755 index 000000000..bcd281d6b --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsLargeInsertSize.java @@ -0,0 +1,42 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; + +import ch.qos.logback.classic.filter.ThresholdFilter; +import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 4/9/11 + * Time: 3:31 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadsLargeInsertSize extends ProportionTest { + + private int THRESHOLD; + + @Override + public Pair map(ReadBackedPileup rbp) { + int total = 0; + int wonky = 0; + for (PileupElement e : rbp ) { + if ( e.getRead().getReadPairedFlag() ) { + ++total; + if ( e.getRead().getInferredInsertSize() > THRESHOLD ) { + ++wonky; + } + } + } + + return new Pair(wonky,total); + } + + public void init(RegionalAssociationWalker parent) { + super.init(parent); + THRESHOLD = 150; + } + + public boolean usePreviouslySeenReads() { return false; } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java index 99c5e5ddb..9110ce739 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java @@ -14,24 +14,21 @@ import java.util.*; */ public class SampleDepth extends ValueTest { - public Map sampleStats = null; - - public SampleDepth() { - super(); - sampleStats = new HashMap(); - } + public Map sampleStats = null; // either: the user associates data with the sample (doc.mean, doc.std) // >OR we calculate the values on-the-fly (e.g. running mean/stdev) public void init(RegionalAssociationWalker walker) { + super.init(walker); + sampleStats = new HashMap(); Set samples = walker.getSamples(); for ( Sample s : samples ) { if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) { double mn = (Double) s.getProperty("doc.mean"); double std = (Double) s.getProperty("doc.std"); - sampleStats.put(s,new Pair(mn,std)); + sampleStats.put(s.getId(),new Pair(mn,std)); } else { - sampleStats.put(s,new MathUtils.RunningAverage()); + sampleStats.put(s.getId(),new MathUtils.RunningAverage()); } } } @@ -48,7 +45,7 @@ public class SampleDepth extends ValueTest { } public Collection map(Sample sample, ReadBackedPileup pileup) { - Object stats = sampleStats.get(sample); + Object stats = sampleStats.get(sample.getId()); double mn; double std; if ( stats instanceof Pair ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java index 6a21ecb7f..47690c1f4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java @@ -60,11 +60,11 @@ public abstract class ValueTest extends CaseControl> implemen MannWhitneyU mwu = new MannWhitneyU(); for ( Map.Entry> cohortEntry : getCaseControl().entrySet() ) { - if ( cohortEntry.getKey().equals(Cohort.CASE) ) { + if ( cohortEntry.getKey().equals(Cohort.CASE) && cohortEntry.getValue() != null ) { for ( Number n : cohortEntry.getValue() ) { mwu.add(n,MannWhitneyU.USet.SET1); } - } else if ( cohortEntry.getKey().equals(Cohort.CONTROL)) { + } else if ( cohortEntry.getKey().equals(Cohort.CONTROL) && cohortEntry.getValue() != null) { for ( Number n : cohortEntry.getValue() ) { mwu.add(n,MannWhitneyU.USet.SET2); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java index c6b977f9c..5f7df6146 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java @@ -15,7 +15,7 @@ import java.util.Collection; */ public class Dichotomizer1D { - private Dichotomizer1D() { } // no instantiation + public Dichotomizer1D() { } // instantiation required for access to transform public abstract class Transform { public abstract double transform(double n); @@ -28,8 +28,18 @@ public class Dichotomizer1D { return tData; } + + public Transform() { + + } } + /** + * Tries to measure effect size by the separation of setOne and setTwo clusters, with a window spread factor of 3 + * @param setOne - collection of data from set one + * @param setTwo - colleciton of data from set two + * @return - the so-called "Z"-factor (effect size/spread) + */ public static double simpleGaussianDichotomy(Collection setOne, Collection setTwo) { double meanOne = MathUtils.sum(setOne)/setOne.size(); double meanTwo = MathUtils.sum(setTwo)/setTwo.size(); @@ -53,4 +63,6 @@ public class Dichotomizer1D { return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform); } + public static Pair twoMeansDichotomy() { return null; } + }