Squashing some bugs. Current implementation of AlignmentContextUtils.splitContextBySample() eliminates all sample meta data. Per Mark's request I'm working around this rather than fixing it -- the extender now maintains a mapping from sample id to sample object. Addition of a proportion test for large-insert-size reads, and slight refactoring of code to deal with bad window initialization of subclasses (e.g. chris forgot that constructors aren't inherited)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5608 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-04-09 21:07:52 +00:00
parent b4b52cc0fe
commit de4eaa455e
12 changed files with 134 additions and 45 deletions

View File

@ -15,21 +15,13 @@ import java.util.*;
public abstract class AssociationContext<X,Y> { public abstract class AssociationContext<X,Y> {
protected List<Map<Sample,Y>> window; protected List<Map<Sample,Y>> window;
private final int size; private int size;
private final int slide; private int slide;
public AssociationContext() { public AssociationContext() {
this(0,0);
}
public AssociationContext(final int winSize, final int winSlide) {
window = new ArrayList<Map<Sample, Y>>(winSize);
size = winSize;
slide = winSlide;
} }
public AssociationContext(final RegionalAssociationWalker parent) { public AssociationContext(final RegionalAssociationWalker parent) {
this(parent.windowSize,parent.slideBy);
this.init(parent); this.init(parent);
} }
@ -46,7 +38,11 @@ public abstract class AssociationContext<X,Y> {
public boolean filter(MapExtender m) { return true; } public boolean filter(MapExtender m) { return true; }
// a basic initialization of the context (give the walker for access to object?) // a basic initialization of the context (give the walker for access to object?)
public void init(RegionalAssociationWalker walker) { } public void init(RegionalAssociationWalker walker) {
size = walker.windowSize;
slide = walker.slideBy;
window = new ArrayList<Map<Sample,Y>>(size);
}
public Map<Sample,Object> mapLocus(MapExtender extender) { public Map<Sample,Object> mapLocus(MapExtender extender) {
Map<Sample,ReadBackedPileup> pileups; Map<Sample,ReadBackedPileup> pileups;

View File

@ -6,6 +6,8 @@ import java.util.Map;
import cern.jet.random.Normal; import cern.jet.random.Normal;
import cern.jet.random.StudentT; import cern.jet.random.StudentT;
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*; import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizer1D;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest;
import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
@ -21,6 +23,12 @@ import org.broadinstitute.sting.utils.collections.Pair;
public class AssociationTestRunner { public class AssociationTestRunner {
final static int MAX_Q_VALUE = Integer.MAX_VALUE; final static int MAX_Q_VALUE = Integer.MAX_VALUE;
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
final static Dichotomizer1D.Transform LOG_TRANSFORM = (new Dichotomizer1D()).new Transform() {
@Override
public double transform(double d) {
return Math.log(d);
}
};
public static int pToQ(double p) { public static int pToQ(double p) {
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
@ -62,4 +70,14 @@ public class AssociationTestRunner {
} }
return mwu.runTwoSidedTest(); return mwu.runTwoSidedTest();
} }
public static Pair<Double,Double> getDichotomizedValues(AssociationContext context) {
if ( context instanceof Dichotomizable ) {
double raw = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context));
double log = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context),LOG_TRANSFORM);
return new Pair<Double,Double>(raw,log);
}
return new Pair<Double,Double>(Double.NaN,Double.NaN);
}
} }

View File

@ -24,9 +24,13 @@ public class MapExtender {
private MapHolder current = null; private MapHolder current = null;
private Map<Sample,ReadBackedPileup> fullPileup = null; private Map<Sample,ReadBackedPileup> fullPileup = null;
private Map<Sample,ReadBackedPileup> readFilteredPileup = null; private Map<Sample,ReadBackedPileup> readFilteredPileup = null;
private Map<String,Sample> idToSampleObject = null;
public MapExtender() { public MapExtender(Set<Sample> samples) {
// no need to do anything idToSampleObject = new HashMap<String,Sample>(samples.size());
for ( Sample s : samples ) {
idToSampleObject.put(s.getId(),s);
}
} }
public void set(MapHolder holder) { public void set(MapHolder holder) {
@ -38,7 +42,7 @@ public class MapExtender {
readFilteredPileup = new HashMap<Sample,ReadBackedPileup>(); readFilteredPileup = new HashMap<Sample,ReadBackedPileup>();
if ( current != null ) { if ( current != null ) {
for ( Map.Entry<Sample,AlignmentContext> sac : current.getContext().entrySet() ) { for ( Map.Entry<Sample,AlignmentContext> sac : current.getContext(idToSampleObject).entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sac.getValue(), TYPE); AlignmentContext context = AlignmentContextUtils.stratify(sac.getValue(), TYPE);
if ( context.hasBasePileup() ) { if ( context.hasBasePileup() ) {
fullPileup.put(sac.getKey(),context.getBasePileup()); fullPileup.put(sac.getKey(),context.getBasePileup());
@ -67,7 +71,7 @@ public class MapExtender {
public Map<Sample,ReadBackedPileup> getReadFilteredPileup(){ return readFilteredPileup; } public Map<Sample,ReadBackedPileup> getReadFilteredPileup(){ return readFilteredPileup; }
public Map<Sample,AlignmentContext> getPreviousContext() { public Map<Sample,AlignmentContext> getPreviousContext() {
return previous != null ? previous.getContext() : null; return previous != null ? previous.getContext(idToSampleObject) : null;
} }
public ReferenceContext getPreviousRef() { public ReferenceContext getPreviousRef() {
@ -79,7 +83,7 @@ public class MapExtender {
} }
public Map<Sample,AlignmentContext> getContext() { public Map<Sample,AlignmentContext> getContext() {
return current != null ? current.getContext() : null; return current != null ? current.getContext(idToSampleObject) : null;
} }
public ReferenceContext getReferenceContext() { public ReferenceContext getReferenceContext() {

View File

@ -6,21 +6,26 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
public class MapHolder { public class MapHolder {
private RefMetaDataTracker tracker; private RefMetaDataTracker tracker;
private ReferenceContext ref; private ReferenceContext ref;
private Map<Sample, AlignmentContext> alignments; private Map<String, AlignmentContext> alignments;
public MapHolder(RefMetaDataTracker t, ReferenceContext r, AlignmentContext a) { public MapHolder(RefMetaDataTracker t, ReferenceContext r, AlignmentContext a) {
tracker = t; tracker = t;
ref = r; ref = r;
alignments = AlignmentContextUtils.splitContextBySample(a); alignments = AlignmentContextUtils.splitContextBySampleName(a);
} }
public Map<Sample, AlignmentContext> getContext() { public Map<Sample, AlignmentContext> getContext(Map<String,Sample> stringSampleMap) {
return alignments; Map<Sample,AlignmentContext> mappedContexts = new HashMap<Sample,AlignmentContext>(alignments.size());
for ( Map.Entry<String,AlignmentContext> entry : alignments.entrySet() ) {
mappedContexts.put(stringSampleMap.get(entry.getKey()),entry.getValue());
}
return mappedContexts;
} }
public ReferenceContext getRef() { public ReferenceContext getRef() {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association; package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
@ -15,8 +16,8 @@ public class RegionalAssociationHandler {
// todo -- the correct way to do this is via the PluginManager (a la VariantEval) but this is a QND implementation // todo -- the correct way to do this is via the PluginManager (a la VariantEval) but this is a QND implementation
private Set<AssociationContext> associations; private Set<AssociationContext> associations;
public RegionalAssociationHandler(Set<AssociationContext> contexts) { public RegionalAssociationHandler(Set<AssociationContext> contexts, Set<Sample> samples) {
maps = new MapExtender(); maps = new MapExtender(samples);
associations = contexts; associations = contexts;
} }
@ -51,10 +52,14 @@ public class RegionalAssociationHandler {
if ( w.isFull() ) { if ( w.isFull() ) {
String outVal; String outVal;
if ( bedGraphFormat ) { if ( bedGraphFormat ) {
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w); Pair<Double,Pair<Double,Integer>> statVals = AssociationTestRunner.getTestValues(w);
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second); Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w);
outVal = String.format("%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first,statVals.second.first,statVals.second.second,
simpleDichotVals.first,simpleDichotVals.second);
} else { } else {
outVal = AssociationTestRunner.runTests(w); outVal = AssociationTestRunner.runTests(w);
Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w);
outVal += String.format("\tD: %.2f\tLogD: %.2f",simpleDichotVals.first,simpleDichotVals.second);
} }
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(), testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal)); maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association; package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsampleType;
@ -64,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
public RegionalAssociationHandler reduceInit() { public RegionalAssociationHandler reduceInit() {
Set<AssociationContext> validAssociations = getAssociations(); Set<AssociationContext> validAssociations = getAssociations();
RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations); RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations,getSamples());
return wac; return wac;
} }
@ -97,7 +98,8 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
for ( Class<? extends AssociationContext> clazz : contexts ) { for ( Class<? extends AssociationContext> clazz : contexts ) {
AssociationContext context; AssociationContext context;
try { try {
context = clazz.getConstructor(new Class[] {RegionalAssociationWalker.class}).newInstance(new Object[] {this}); context = clazz.getConstructor(new Class[] {}).newInstance(new Object[] {});
context.init(this);
} catch (Exception e ) { } catch (Exception e ) {
throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e); throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e);
} }
@ -116,7 +118,8 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
for ( String s : associationsToUse ) { for ( String s : associationsToUse ) {
AssociationContext context; AssociationContext context;
try { try {
context = classNameToClass.get(s).getConstructor(new Class[]{RegionalAssociationWalker.class}).newInstance(new Object[] {this}); context = classNameToClass.get(s).getConstructor(new Class[]{}).newInstance(new Object[] {});
context.init(this);
} catch ( Exception e ) { } catch ( Exception e ) {
throw new StingException("The class "+s+" could not be instantiated.",e); throw new StingException("The class "+s+" could not be instantiated.",e);
} }
@ -141,8 +144,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
public void writeBedGraphHeaders(Set<AssociationContext> cons) { public void writeBedGraphHeaders(Set<AssociationContext> cons) {
for ( AssociationContext con : cons ) { for ( AssociationContext con : cons ) {
GenomeLoc first = getToolkit().getIntervals().iterator().next(); String header = String.format("track type=bedGraph name=%s description=stat,p,q,dichot,logdichot",con.getClass().getSimpleName());
String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
out.get(con.getClass()).printf("%s%n",header); out.get(con.getClass()).printf("%s%n",header);
} }
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.case
import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext; import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.HashMap; import java.util.HashMap;
@ -24,6 +25,12 @@ public abstract class CaseControl<X> extends AssociationContext<X,X> {
X accumControl = null; X accumControl = null;
for ( Map<Sample,X> sampleXMap : window ) { for ( Map<Sample,X> sampleXMap : window ) {
for ( Map.Entry<Sample,X> entry : sampleXMap.entrySet() ) { for ( Map.Entry<Sample,X> entry : sampleXMap.entrySet() ) {
boolean isCase;
try {
isCase = entry.getKey().getProperty("cohort").equals("case");
} catch (NullPointerException e) {
throw new UserException("Sample "+entry.getKey().getId()+" does not have a cohort assigned");
}
if ( entry.getKey().getProperty("cohort").equals("case") ) { if ( entry.getKey().getProperty("cohort").equals("case") ) {
accumCase = accum(accumCase, entry.getValue()); accumCase = accum(accumCase, entry.getValue());
} else if ( entry.getKey().getProperty("cohort").equals("control") ) { } else if ( entry.getKey().getProperty("cohort").equals("control") ) {

View File

@ -19,18 +19,19 @@ import java.util.*;
*/ */
public class MismatchRate extends ValueTest { public class MismatchRate extends ValueTest {
private Map<Sample,Object> sampleStats = new HashMap<Sample,Object>(); private Map<String,Object> sampleStats = new HashMap<String,Object>();
private int currentRefBase = 0; private int currentRefBase = 0;
public void init(RegionalAssociationWalker walker) { public void init(RegionalAssociationWalker walker) {
super.init(walker);
Set<Sample> samples = walker.getSamples(); Set<Sample> samples = walker.getSamples();
for ( Sample s : samples ) { for ( Sample s : samples ) {
if ( s.hasProperty("mismatch_rate.mean") && s.hasProperty("mismatch_rate.std") ) { if ( s.hasProperty("mismatch_rate.mean") && s.hasProperty("mismatch_rate.std") ) {
double mn = (Double) s.getProperty("mismatch_rate.mean"); double mn = (Double) s.getProperty("mismatch_rate.mean");
double std = (Double) s.getProperty("mismatch_rate.std"); double std = (Double) s.getProperty("mismatch_rate.std");
sampleStats.put(s,new Pair<Double,Double>(mn,std)); sampleStats.put(s.getId(),new Pair<Double,Double>(mn,std));
} else { } else {
sampleStats.put(s,new MathUtils.RunningAverage()); sampleStats.put(s.getId(),new MathUtils.RunningAverage());
} }
} }
} }
@ -48,7 +49,7 @@ public class MismatchRate extends ValueTest {
} }
public Collection<Number> map(Sample sample, ReadBackedPileup pileup) { public Collection<Number> map(Sample sample, ReadBackedPileup pileup) {
Object stats = sampleStats.get(sample); Object stats = sampleStats.get(sample.getId());
double mn; double mn;
double std; double std;
if ( stats instanceof Pair ) { if ( stats instanceof Pair ) {

View File

@ -0,0 +1,42 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
import ch.qos.logback.classic.filter.ThresholdFilter;
import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: 4/9/11
* Time: 3:31 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadsLargeInsertSize extends ProportionTest {
private int THRESHOLD;
@Override
public Pair<Number,Number> map(ReadBackedPileup rbp) {
int total = 0;
int wonky = 0;
for (PileupElement e : rbp ) {
if ( e.getRead().getReadPairedFlag() ) {
++total;
if ( e.getRead().getInferredInsertSize() > THRESHOLD ) {
++wonky;
}
}
}
return new Pair<Number,Number>(wonky,total);
}
public void init(RegionalAssociationWalker parent) {
super.init(parent);
THRESHOLD = 150;
}
public boolean usePreviouslySeenReads() { return false; }
}

View File

@ -14,24 +14,21 @@ import java.util.*;
*/ */
public class SampleDepth extends ValueTest { public class SampleDepth extends ValueTest {
public Map<Sample,Object> sampleStats = null; public Map<String,Object> sampleStats = null;
public SampleDepth() {
super();
sampleStats = new HashMap<Sample,Object>();
}
// either: the user associates data with the sample (doc.mean, doc.std) // either: the user associates data with the sample (doc.mean, doc.std)
// >OR we calculate the values on-the-fly (e.g. running mean/stdev) // >OR we calculate the values on-the-fly (e.g. running mean/stdev)
public void init(RegionalAssociationWalker walker) { public void init(RegionalAssociationWalker walker) {
super.init(walker);
sampleStats = new HashMap<String,Object>();
Set<Sample> samples = walker.getSamples(); Set<Sample> samples = walker.getSamples();
for ( Sample s : samples ) { for ( Sample s : samples ) {
if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) { if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) {
double mn = (Double) s.getProperty("doc.mean"); double mn = (Double) s.getProperty("doc.mean");
double std = (Double) s.getProperty("doc.std"); double std = (Double) s.getProperty("doc.std");
sampleStats.put(s,new Pair<Double,Double>(mn,std)); sampleStats.put(s.getId(),new Pair<Double,Double>(mn,std));
} else { } else {
sampleStats.put(s,new MathUtils.RunningAverage()); sampleStats.put(s.getId(),new MathUtils.RunningAverage());
} }
} }
} }
@ -48,7 +45,7 @@ public class SampleDepth extends ValueTest {
} }
public Collection<Number> map(Sample sample, ReadBackedPileup pileup) { public Collection<Number> map(Sample sample, ReadBackedPileup pileup) {
Object stats = sampleStats.get(sample); Object stats = sampleStats.get(sample.getId());
double mn; double mn;
double std; double std;
if ( stats instanceof Pair ) { if ( stats instanceof Pair ) {

View File

@ -60,11 +60,11 @@ public abstract class ValueTest extends CaseControl<Collection<Number>> implemen
MannWhitneyU mwu = new MannWhitneyU(); MannWhitneyU mwu = new MannWhitneyU();
for ( Map.Entry<Cohort,Collection<Number>> cohortEntry : getCaseControl().entrySet() ) { for ( Map.Entry<Cohort,Collection<Number>> cohortEntry : getCaseControl().entrySet() ) {
if ( cohortEntry.getKey().equals(Cohort.CASE) ) { if ( cohortEntry.getKey().equals(Cohort.CASE) && cohortEntry.getValue() != null ) {
for ( Number n : cohortEntry.getValue() ) { for ( Number n : cohortEntry.getValue() ) {
mwu.add(n,MannWhitneyU.USet.SET1); mwu.add(n,MannWhitneyU.USet.SET1);
} }
} else if ( cohortEntry.getKey().equals(Cohort.CONTROL)) { } else if ( cohortEntry.getKey().equals(Cohort.CONTROL) && cohortEntry.getValue() != null) {
for ( Number n : cohortEntry.getValue() ) { for ( Number n : cohortEntry.getValue() ) {
mwu.add(n,MannWhitneyU.USet.SET2); mwu.add(n,MannWhitneyU.USet.SET2);
} }

View File

@ -15,7 +15,7 @@ import java.util.Collection;
*/ */
public class Dichotomizer1D { public class Dichotomizer1D {
private Dichotomizer1D() { } // no instantiation public Dichotomizer1D() { } // instantiation required for access to transform
public abstract class Transform { public abstract class Transform {
public abstract double transform(double n); public abstract double transform(double n);
@ -28,8 +28,18 @@ public class Dichotomizer1D {
return tData; return tData;
} }
public Transform() {
}
} }
/**
* Tries to measure effect size by the separation of setOne and setTwo clusters, with a window spread factor of 3
* @param setOne - collection of data from set one
* @param setTwo - colleciton of data from set two
* @return - the so-called "Z"-factor (effect size/spread)
*/
public static double simpleGaussianDichotomy(Collection<Number> setOne, Collection<Number> setTwo) { public static double simpleGaussianDichotomy(Collection<Number> setOne, Collection<Number> setTwo) {
double meanOne = MathUtils.sum(setOne)/setOne.size(); double meanOne = MathUtils.sum(setOne)/setOne.size();
double meanTwo = MathUtils.sum(setTwo)/setTwo.size(); double meanTwo = MathUtils.sum(setTwo)/setTwo.size();
@ -53,4 +63,6 @@ public class Dichotomizer1D {
return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform); return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform);
} }
public static Pair<Double,Double> twoMeansDichotomy() { return null; }
} }