Squashing some bugs. Current implementation of AlignmentContextUtils.splitContextBySample() eliminates all sample meta data. Per Mark's request I'm working around this rather than fixing it -- the extender now maintains a mapping from sample id to sample object. Addition of a proportion test for large-insert-size reads, and slight refactoring of code to deal with bad window initialization of subclasses (e.g. chris forgot that constructors aren't inherited)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5608 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-04-09 21:07:52 +00:00
parent b4b52cc0fe
commit de4eaa455e
12 changed files with 134 additions and 45 deletions

View File

@ -15,21 +15,13 @@ import java.util.*;
public abstract class AssociationContext<X,Y> {
protected List<Map<Sample,Y>> window;
private final int size;
private final int slide;
private int size;
private int slide;
public AssociationContext() {
this(0,0);
}
public AssociationContext(final int winSize, final int winSlide) {
window = new ArrayList<Map<Sample, Y>>(winSize);
size = winSize;
slide = winSlide;
}
public AssociationContext(final RegionalAssociationWalker parent) {
this(parent.windowSize,parent.slideBy);
this.init(parent);
}
@ -46,7 +38,11 @@ public abstract class AssociationContext<X,Y> {
public boolean filter(MapExtender m) { return true; }
// a basic initialization of the context (give the walker for access to object?)
public void init(RegionalAssociationWalker walker) { }
public void init(RegionalAssociationWalker walker) {
size = walker.windowSize;
slide = walker.slideBy;
window = new ArrayList<Map<Sample,Y>>(size);
}
public Map<Sample,Object> mapLocus(MapExtender extender) {
Map<Sample,ReadBackedPileup> pileups;

View File

@ -6,6 +6,8 @@ import java.util.Map;
import cern.jet.random.Normal;
import cern.jet.random.StudentT;
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizer1D;
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils;
@ -21,6 +23,12 @@ import org.broadinstitute.sting.utils.collections.Pair;
public class AssociationTestRunner {
final static int MAX_Q_VALUE = Integer.MAX_VALUE;
// todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance
final static Dichotomizer1D.Transform LOG_TRANSFORM = (new Dichotomizer1D()).new Transform() {
@Override
public double transform(double d) {
return Math.log(d);
}
};
public static int pToQ(double p) {
return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE);
@ -62,4 +70,14 @@ public class AssociationTestRunner {
}
return mwu.runTwoSidedTest();
}
public static Pair<Double,Double> getDichotomizedValues(AssociationContext context) {
if ( context instanceof Dichotomizable ) {
double raw = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context));
double log = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context),LOG_TRANSFORM);
return new Pair<Double,Double>(raw,log);
}
return new Pair<Double,Double>(Double.NaN,Double.NaN);
}
}

View File

@ -24,9 +24,13 @@ public class MapExtender {
private MapHolder current = null;
private Map<Sample,ReadBackedPileup> fullPileup = null;
private Map<Sample,ReadBackedPileup> readFilteredPileup = null;
private Map<String,Sample> idToSampleObject = null;
public MapExtender() {
// no need to do anything
public MapExtender(Set<Sample> samples) {
idToSampleObject = new HashMap<String,Sample>(samples.size());
for ( Sample s : samples ) {
idToSampleObject.put(s.getId(),s);
}
}
public void set(MapHolder holder) {
@ -38,7 +42,7 @@ public class MapExtender {
readFilteredPileup = new HashMap<Sample,ReadBackedPileup>();
if ( current != null ) {
for ( Map.Entry<Sample,AlignmentContext> sac : current.getContext().entrySet() ) {
for ( Map.Entry<Sample,AlignmentContext> sac : current.getContext(idToSampleObject).entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sac.getValue(), TYPE);
if ( context.hasBasePileup() ) {
fullPileup.put(sac.getKey(),context.getBasePileup());
@ -67,7 +71,7 @@ public class MapExtender {
public Map<Sample,ReadBackedPileup> getReadFilteredPileup(){ return readFilteredPileup; }
public Map<Sample,AlignmentContext> getPreviousContext() {
return previous != null ? previous.getContext() : null;
return previous != null ? previous.getContext(idToSampleObject) : null;
}
public ReferenceContext getPreviousRef() {
@ -79,7 +83,7 @@ public class MapExtender {
}
public Map<Sample,AlignmentContext> getContext() {
return current != null ? current.getContext() : null;
return current != null ? current.getContext(idToSampleObject) : null;
}
public ReferenceContext getReferenceContext() {

View File

@ -6,21 +6,26 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import java.util.HashMap;
import java.util.Map;
public class MapHolder {
private RefMetaDataTracker tracker;
private ReferenceContext ref;
private Map<Sample, AlignmentContext> alignments;
private Map<String, AlignmentContext> alignments;
public MapHolder(RefMetaDataTracker t, ReferenceContext r, AlignmentContext a) {
tracker = t;
ref = r;
alignments = AlignmentContextUtils.splitContextBySample(a);
alignments = AlignmentContextUtils.splitContextBySampleName(a);
}
public Map<Sample, AlignmentContext> getContext() {
return alignments;
public Map<Sample, AlignmentContext> getContext(Map<String,Sample> stringSampleMap) {
Map<Sample,AlignmentContext> mappedContexts = new HashMap<Sample,AlignmentContext>(alignments.size());
for ( Map.Entry<String,AlignmentContext> entry : alignments.entrySet() ) {
mappedContexts.put(stringSampleMap.get(entry.getKey()),entry.getValue());
}
return mappedContexts;
}
public ReferenceContext getRef() {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
@ -15,8 +16,8 @@ public class RegionalAssociationHandler {
// todo -- the correct way to do this is via the PluginManager (a la VariantEval) but this is a QND implementation
private Set<AssociationContext> associations;
public RegionalAssociationHandler(Set<AssociationContext> contexts) {
maps = new MapExtender();
public RegionalAssociationHandler(Set<AssociationContext> contexts, Set<Sample> samples) {
maps = new MapExtender(samples);
associations = contexts;
}
@ -51,10 +52,14 @@ public class RegionalAssociationHandler {
if ( w.isFull() ) {
String outVal;
if ( bedGraphFormat ) {
Pair<Double,Pair<Double,Integer>> vals = AssociationTestRunner.getTestValues(w);
outVal = String.format("%.2f\t%.2e\t%d",vals.first,vals.second.first,vals.second.second);
Pair<Double,Pair<Double,Integer>> statVals = AssociationTestRunner.getTestValues(w);
Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w);
outVal = String.format("%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first,statVals.second.first,statVals.second.second,
simpleDichotVals.first,simpleDichotVals.second);
} else {
outVal = AssociationTestRunner.runTests(w);
Pair<Double,Double> simpleDichotVals = AssociationTestRunner.getDichotomizedValues(w);
outVal += String.format("\tD: %.2f\tLogD: %.2f",simpleDichotVals.first,simpleDichotVals.second);
}
testResults.put(w,String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(),
maps.getReferenceContext().getLocus().getStart()-w.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal));

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType;
@ -64,7 +65,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
public RegionalAssociationHandler reduceInit() {
Set<AssociationContext> validAssociations = getAssociations();
RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations);
RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations,getSamples());
return wac;
}
@ -97,7 +98,8 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
for ( Class<? extends AssociationContext> clazz : contexts ) {
AssociationContext context;
try {
context = clazz.getConstructor(new Class[] {RegionalAssociationWalker.class}).newInstance(new Object[] {this});
context = clazz.getConstructor(new Class[] {}).newInstance(new Object[] {});
context.init(this);
} catch (Exception e ) {
throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e);
}
@ -116,7 +118,8 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
for ( String s : associationsToUse ) {
AssociationContext context;
try {
context = classNameToClass.get(s).getConstructor(new Class[]{RegionalAssociationWalker.class}).newInstance(new Object[] {this});
context = classNameToClass.get(s).getConstructor(new Class[]{}).newInstance(new Object[] {});
context.init(this);
} catch ( Exception e ) {
throw new StingException("The class "+s+" could not be instantiated.",e);
}
@ -141,8 +144,7 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
public void writeBedGraphHeaders(Set<AssociationContext> cons) {
for ( AssociationContext con : cons ) {
GenomeLoc first = getToolkit().getIntervals().iterator().next();
String header = String.format("track type=bedGraph name=%s",con.getClass().getSimpleName());
String header = String.format("track type=bedGraph name=%s description=stat,p,q,dichot,logdichot",con.getClass().getSimpleName());
out.get(con.getClass()).printf("%s%n",header);
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.case
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.HashMap;
@ -24,6 +25,12 @@ public abstract class CaseControl<X> extends AssociationContext<X,X> {
X accumControl = null;
for ( Map<Sample,X> sampleXMap : window ) {
for ( Map.Entry<Sample,X> entry : sampleXMap.entrySet() ) {
boolean isCase;
try {
isCase = entry.getKey().getProperty("cohort").equals("case");
} catch (NullPointerException e) {
throw new UserException("Sample "+entry.getKey().getId()+" does not have a cohort assigned");
}
if ( entry.getKey().getProperty("cohort").equals("case") ) {
accumCase = accum(accumCase, entry.getValue());
} else if ( entry.getKey().getProperty("cohort").equals("control") ) {

View File

@ -19,18 +19,19 @@ import java.util.*;
*/
public class MismatchRate extends ValueTest {
private Map<Sample,Object> sampleStats = new HashMap<Sample,Object>();
private Map<String,Object> sampleStats = new HashMap<String,Object>();
private int currentRefBase = 0;
public void init(RegionalAssociationWalker walker) {
super.init(walker);
Set<Sample> samples = walker.getSamples();
for ( Sample s : samples ) {
if ( s.hasProperty("mismatch_rate.mean") && s.hasProperty("mismatch_rate.std") ) {
double mn = (Double) s.getProperty("mismatch_rate.mean");
double std = (Double) s.getProperty("mismatch_rate.std");
sampleStats.put(s,new Pair<Double,Double>(mn,std));
sampleStats.put(s.getId(),new Pair<Double,Double>(mn,std));
} else {
sampleStats.put(s,new MathUtils.RunningAverage());
sampleStats.put(s.getId(),new MathUtils.RunningAverage());
}
}
}
@ -48,7 +49,7 @@ public class MismatchRate extends ValueTest {
}
public Collection<Number> map(Sample sample, ReadBackedPileup pileup) {
Object stats = sampleStats.get(sample);
Object stats = sampleStats.get(sample.getId());
double mn;
double std;
if ( stats instanceof Pair ) {

View File

@ -0,0 +1,42 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol;
import ch.qos.logback.classic.filter.ThresholdFilter;
import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: 4/9/11
* Time: 3:31 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadsLargeInsertSize extends ProportionTest {
private int THRESHOLD;
@Override
public Pair<Number,Number> map(ReadBackedPileup rbp) {
int total = 0;
int wonky = 0;
for (PileupElement e : rbp ) {
if ( e.getRead().getReadPairedFlag() ) {
++total;
if ( e.getRead().getInferredInsertSize() > THRESHOLD ) {
++wonky;
}
}
}
return new Pair<Number,Number>(wonky,total);
}
public void init(RegionalAssociationWalker parent) {
super.init(parent);
THRESHOLD = 150;
}
public boolean usePreviouslySeenReads() { return false; }
}

View File

@ -14,24 +14,21 @@ import java.util.*;
*/
public class SampleDepth extends ValueTest {
public Map<Sample,Object> sampleStats = null;
public SampleDepth() {
super();
sampleStats = new HashMap<Sample,Object>();
}
public Map<String,Object> sampleStats = null;
// either: the user associates data with the sample (doc.mean, doc.std)
// >OR we calculate the values on-the-fly (e.g. running mean/stdev)
public void init(RegionalAssociationWalker walker) {
super.init(walker);
sampleStats = new HashMap<String,Object>();
Set<Sample> samples = walker.getSamples();
for ( Sample s : samples ) {
if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) {
double mn = (Double) s.getProperty("doc.mean");
double std = (Double) s.getProperty("doc.std");
sampleStats.put(s,new Pair<Double,Double>(mn,std));
sampleStats.put(s.getId(),new Pair<Double,Double>(mn,std));
} else {
sampleStats.put(s,new MathUtils.RunningAverage());
sampleStats.put(s.getId(),new MathUtils.RunningAverage());
}
}
}
@ -48,7 +45,7 @@ public class SampleDepth extends ValueTest {
}
public Collection<Number> map(Sample sample, ReadBackedPileup pileup) {
Object stats = sampleStats.get(sample);
Object stats = sampleStats.get(sample.getId());
double mn;
double std;
if ( stats instanceof Pair ) {

View File

@ -60,11 +60,11 @@ public abstract class ValueTest extends CaseControl<Collection<Number>> implemen
MannWhitneyU mwu = new MannWhitneyU();
for ( Map.Entry<Cohort,Collection<Number>> cohortEntry : getCaseControl().entrySet() ) {
if ( cohortEntry.getKey().equals(Cohort.CASE) ) {
if ( cohortEntry.getKey().equals(Cohort.CASE) && cohortEntry.getValue() != null ) {
for ( Number n : cohortEntry.getValue() ) {
mwu.add(n,MannWhitneyU.USet.SET1);
}
} else if ( cohortEntry.getKey().equals(Cohort.CONTROL)) {
} else if ( cohortEntry.getKey().equals(Cohort.CONTROL) && cohortEntry.getValue() != null) {
for ( Number n : cohortEntry.getValue() ) {
mwu.add(n,MannWhitneyU.USet.SET2);
}

View File

@ -15,7 +15,7 @@ import java.util.Collection;
*/
public class Dichotomizer1D {
private Dichotomizer1D() { } // no instantiation
public Dichotomizer1D() { } // instantiation required for access to transform
public abstract class Transform {
public abstract double transform(double n);
@ -28,8 +28,18 @@ public class Dichotomizer1D {
return tData;
}
public Transform() {
}
}
/**
* Tries to measure effect size by the separation of setOne and setTwo clusters, with a window spread factor of 3
* @param setOne - collection of data from set one
* @param setTwo - colleciton of data from set two
* @return - the so-called "Z"-factor (effect size/spread)
*/
public static double simpleGaussianDichotomy(Collection<Number> setOne, Collection<Number> setTwo) {
double meanOne = MathUtils.sum(setOne)/setOne.size();
double meanTwo = MathUtils.sum(setTwo)/setTwo.size();
@ -53,4 +63,6 @@ public class Dichotomizer1D {
return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform);
}
public static Pair<Double,Double> twoMeansDichotomy() { return null; }
}