Major refactoring of association testing framework. New modules are now beyond trivial to implement. One hurdle remains which is how to deal with statistics that ought to be sample-normalized (e.g. depth, insert-size [when multiple libraries are used], and possibly others).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5366 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5d4bbf41fb
commit
ef38fd1e0e
|
|
@ -1,42 +1,74 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import java.util.*;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @Author chartl
|
* Created by IntelliJ IDEA.
|
||||||
* @Date 2011-02-23
|
* User: chartl
|
||||||
* A general context for windowed association
|
* Date: 3/2/11
|
||||||
|
* Time: 11:58 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public abstract class AssociationContext<X extends AssociationContextAtom> {
|
public abstract class AssociationContext<X> {
|
||||||
protected Class<? extends AssociationContextAtom> clazz;
|
|
||||||
protected List<X> window;
|
|
||||||
|
|
||||||
public AssociationContext( Class<X> zclaz ) {
|
protected List<Map<Sample,Object>> window;
|
||||||
window = new ArrayList<X>(getWindowSize());
|
|
||||||
clazz = zclaz;
|
public AssociationContext() {
|
||||||
|
window = new ArrayList<Map<Sample,Object>>(getWindowSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
public X map(MapExtender e) throws NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException, ClassCastException {
|
// specifies size of window
|
||||||
return (X) clazz.getConstructor(new Class[] {MapExtender.class}).newInstance(new Object[] {e});
|
public abstract int getWindowSize();
|
||||||
}
|
|
||||||
|
|
||||||
|
// specifies how many bases to wait until next test
|
||||||
|
public abstract int slideByValue();
|
||||||
|
|
||||||
|
// specifies whether to use previously seen reads
|
||||||
|
public abstract boolean usePreviouslySeenReads();
|
||||||
|
|
||||||
|
// specifies the map from a sample's pileup to the data we want to test
|
||||||
|
public abstract Object map(ReadBackedPileup rbp);
|
||||||
|
|
||||||
|
// specifies how to take the per-sample data and reduce them into testable pairs
|
||||||
|
public abstract Map<?,X> reduce(List<Map<Sample,X>> win);
|
||||||
|
|
||||||
|
// do we filter the current location (e.g. omit from window)
|
||||||
public boolean filter(MapExtender m) { return true; }
|
public boolean filter(MapExtender m) { return true; }
|
||||||
|
|
||||||
public void reduce(X context) {
|
public Map<Sample,Object> mapLocus(MapExtender extender) {
|
||||||
window.add(context);
|
Map<Sample,ReadBackedPileup> pileups;
|
||||||
|
if ( ! usePreviouslySeenReads() ) {
|
||||||
|
pileups = extender.getReadFilteredPileup();
|
||||||
|
} else {
|
||||||
|
pileups = extender.getFullPileup();
|
||||||
|
}
|
||||||
|
Map<Sample,Object> maps = new HashMap<Sample,Object>(pileups.size());
|
||||||
|
for ( Map.Entry<Sample,ReadBackedPileup> samPileup : pileups.entrySet() ) {
|
||||||
|
maps.put(samPileup.getKey(),map(samPileup.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return maps;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addData(Map<Sample,Object> sampleData) {
|
||||||
|
window.add(sampleData);
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract int getWindowSize();
|
|
||||||
public abstract int slideByValue();
|
|
||||||
|
|
||||||
public boolean isFull() {
|
public boolean isFull() {
|
||||||
return window.size() >= getWindowSize();
|
return window.size() >= getWindowSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void slide() {
|
public void slide() {
|
||||||
ArrayList<X> newWindow = new ArrayList<X>((window.subList(slideByValue(),window.size())));
|
ArrayList<Map<Sample,Object>> newWindow = new ArrayList<Map<Sample,Object>>((window.subList(slideByValue(),window.size())));
|
||||||
newWindow.ensureCapacity(getWindowSize());
|
newWindow.ensureCapacity(getWindowSize());
|
||||||
window = newWindow;
|
window = newWindow;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @Author chartl
|
|
||||||
* @Date 2011-02-23
|
|
||||||
* general class for calculating base data for generalized association tests
|
|
||||||
*/
|
|
||||||
public abstract class AssociationContextAtom {
|
|
||||||
|
|
||||||
public AssociationContextAtom(MapExtender e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -5,7 +5,8 @@ import java.util.List;
|
||||||
|
|
||||||
import cern.jet.random.Normal;
|
import cern.jet.random.Normal;
|
||||||
import cern.jet.random.StudentT;
|
import cern.jet.random.StudentT;
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.*;
|
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.*;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -34,17 +35,19 @@ public class AssociationTestRunner {
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String runStudentT(TStatistic context) {
|
public static String runStudentT(TStatistic context) { /*
|
||||||
double t = context.getTStatistic();
|
StudentT studentT = new StudentT(dfnum/dfdenom,null);
|
||||||
StudentT studentT = new StudentT(context.getDegreesOfFreedom(),null);
|
|
||||||
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
|
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
|
||||||
return String.format("T: %.2f\tP: %.2e",t,p);
|
return String.format("T: %.2f\tP: %.2e",t,p);*/
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String runZ(ZStatistic context) {
|
public static String runZ(ZStatistic context) { /*
|
||||||
double z = context.getZStatistic();
|
double z = num/Math.sqrt(se);
|
||||||
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
|
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
|
||||||
return String.format("Z: %.2f\tP: %.2e",z,p);
|
return String.format("Z: %.2f\tP: %.2e",z,p);*/
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String runFisherExact(AssociationContext context) {
|
public static String runFisherExact(AssociationContext context) {
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,16 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @Author chartl
|
* @Author chartl
|
||||||
|
|
@ -13,9 +18,12 @@ import java.util.Map;
|
||||||
* Holds multiple map contexts for use in the regional association walker
|
* Holds multiple map contexts for use in the regional association walker
|
||||||
*/
|
*/
|
||||||
public class MapExtender {
|
public class MapExtender {
|
||||||
|
static StratifiedAlignmentContext.StratifiedContextType TYPE = StratifiedAlignmentContext.StratifiedContextType.COMPLETE;
|
||||||
|
// hold on to these -- atoms may want access to the tracker or other context types
|
||||||
private MapHolder previous = null;
|
private MapHolder previous = null;
|
||||||
private MapHolder current = null;
|
private MapHolder current = null;
|
||||||
|
private Map<Sample,ReadBackedPileup> fullPileup = null;
|
||||||
|
private Map<Sample,ReadBackedPileup> readFilteredPileup = null;
|
||||||
|
|
||||||
public MapExtender() {
|
public MapExtender() {
|
||||||
// no need to do anything
|
// no need to do anything
|
||||||
|
|
@ -24,8 +32,41 @@ public class MapExtender {
|
||||||
public void set(MapHolder holder) {
|
public void set(MapHolder holder) {
|
||||||
previous = current;
|
previous = current;
|
||||||
current = holder;
|
current = holder;
|
||||||
|
|
||||||
|
Map<Sample,ReadBackedPileup> prevPileup = fullPileup;
|
||||||
|
fullPileup = new HashMap<Sample,ReadBackedPileup>();
|
||||||
|
readFilteredPileup = new HashMap<Sample,ReadBackedPileup>();
|
||||||
|
|
||||||
|
if ( current != null ) {
|
||||||
|
for ( Map.Entry<Sample,StratifiedAlignmentContext> sac : current.getContext().entrySet() ) {
|
||||||
|
AlignmentContext context = sac.getValue().getContext(TYPE);
|
||||||
|
if ( context.hasBasePileup() ) {
|
||||||
|
fullPileup.put(sac.getKey(),context.getBasePileup());
|
||||||
|
} else if ( context.hasExtendedEventPileup() ) {
|
||||||
|
fullPileup.put(sac.getKey(),context.getExtendedEventPileup());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( prevPileup != null ) {
|
||||||
|
|
||||||
|
List<PileupElement> filtElems = new ArrayList<PileupElement>(fullPileup.get(sac.getKey()).size());
|
||||||
|
Set<SAMRecord> seenReads = prevPileup.containsKey(sac.getKey()) ? new HashSet<SAMRecord>(prevPileup.get(sac.getKey()).getReads()) : new HashSet<SAMRecord>(0);
|
||||||
|
for ( PileupElement e : fullPileup.get(sac.getKey()) ) {
|
||||||
|
if ( ! seenReads.contains(e.getRead()) ) {
|
||||||
|
filtElems.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readFilteredPileup.put(sac.getKey(),new ReadBackedPileupImpl(current.getRef().getLocus(),filtElems));
|
||||||
|
} else {
|
||||||
|
readFilteredPileup = fullPileup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<Sample,ReadBackedPileup> getFullPileup() { return fullPileup; }
|
||||||
|
public Map<Sample,ReadBackedPileup> getReadFilteredPileup(){ return readFilteredPileup; }
|
||||||
|
|
||||||
public Map<Sample,StratifiedAlignmentContext> getPreviousContext() {
|
public Map<Sample,StratifiedAlignmentContext> getPreviousContext() {
|
||||||
return previous != null ? previous.getContext() : null;
|
return previous != null ? previous.getContext() : null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -31,10 +30,10 @@ public class RegionalAssociationHandler {
|
||||||
* @reduce: (List<AssociationContext>,AssociationContext) --> List<AssociationContextAtom>
|
* @reduce: (List<AssociationContext>,AssociationContext) --> List<AssociationContextAtom>
|
||||||
* @implementation: just append
|
* @implementation: just append
|
||||||
*/
|
*/
|
||||||
public void runMapReduce() throws NoSuchMethodException, IllegalAccessException, InstantiationException, InvocationTargetException {
|
public void runMapReduce() {
|
||||||
for ( AssociationContext w : associations ) {
|
for ( AssociationContext w : associations ) {
|
||||||
if ( w.filter(maps) ) {
|
if ( w.filter(maps) ) {
|
||||||
w.reduce(w.map(maps));
|
w.addData(w.mapLocus(maps));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQuality;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQualityAtom;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
|
@ -59,11 +57,8 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
||||||
}
|
}
|
||||||
|
|
||||||
private AssociationContext stringToAssociationContext(String s) {
|
private AssociationContext stringToAssociationContext(String s) {
|
||||||
if ( s.equals("LowMappingQuality") ) {
|
|
||||||
return new LowMappingQuality();
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new UserException(String.format("AssociationContext type %s not found.",s));
|
throw new UserException(String.format("AssociationContextOld type %s not found.",s));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Set<AssociationContext> getAssociations() {
|
private Set<AssociationContext> getAssociations() {
|
||||||
|
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Feb 24, 2011
|
|
||||||
* Time: 12:58:16 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public interface TStatistic {
|
|
||||||
public abstract double getTStatistic();
|
|
||||||
public abstract double getDegreesOfFreedom();
|
|
||||||
}
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Feb 24, 2011
|
|
||||||
* Time: 12:58:38 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public interface ZStatistic {
|
|
||||||
public abstract double getZStatistic();
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.TStatistic;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author chartl
|
||||||
|
*/
|
||||||
|
public class InsertSizeDistribution extends TStatistic {
|
||||||
|
public int getWindowSize() { return 100; }
|
||||||
|
public int slideByValue() { return 10; }
|
||||||
|
public boolean usePreviouslySeenReads() { return false; }
|
||||||
|
|
||||||
|
public Collection<Number> map(ReadBackedPileup pileup) {
|
||||||
|
List<Integer> insertSizes = new ArrayList<Integer>(pileup.size());
|
||||||
|
for ( PileupElement e : pileup ) {
|
||||||
|
insertSizes.add(e.getRead().getInferredInsertSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
return (Collection) insertSizes;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContextAtom;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.TStatistic;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.ZStatistic;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Feb 24, 2011
|
|
||||||
* Time: 1:49:37 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom> implements ZStatistic, TStatistic {
|
|
||||||
private double df = -1;
|
|
||||||
|
|
||||||
public LowMappingQuality(Class<LowMappingQualityAtom> atomClass ) {
|
|
||||||
super(atomClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LowMappingQuality() {
|
|
||||||
this(LowMappingQualityAtom.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getWindowSize() { return 20; }
|
|
||||||
public int slideByValue() { return 5; }
|
|
||||||
|
|
||||||
public double getZStatistic() {
|
|
||||||
int case_low= 0;
|
|
||||||
int case_tot = 0;
|
|
||||||
int ctrl_low = 0;
|
|
||||||
int ctrl_tot = 0;
|
|
||||||
for ( LowMappingQualityAtom atom : window ) {
|
|
||||||
Map<String, Pair<Integer,Integer>> ccc = atom.getCaseControlCounts();
|
|
||||||
case_low += ccc.get("case").first;
|
|
||||||
case_tot += ccc.get("case").second;
|
|
||||||
ctrl_low += ccc.get("control").first;
|
|
||||||
ctrl_tot += ccc.get("control").second;
|
|
||||||
}
|
|
||||||
double p_case = ( (double) case_low)/case_tot;
|
|
||||||
double p_ctrl = ((double) ctrl_low)/ctrl_tot;
|
|
||||||
double p_pool = ( (double) case_low + (double) ctrl_low )/(case_tot+ctrl_tot);
|
|
||||||
double SE = Math.sqrt( p_pool*(1-p_pool)*( 1/((double)ctrl_tot) + 1/((double)case_tot) ));
|
|
||||||
return (p_case - p_ctrl)/SE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getTStatistic() {
|
|
||||||
// todo -- running average is unnecessary and inefficient here, but was quick to write
|
|
||||||
MathUtils.RunningAverage cases = new MathUtils.RunningAverage();
|
|
||||||
MathUtils.RunningAverage controls = new MathUtils.RunningAverage();
|
|
||||||
Set<Sample> caseSamples = new HashSet<Sample>();
|
|
||||||
Set<Sample> controlSamples = new HashSet<Sample>();
|
|
||||||
for ( LowMappingQualityAtom atom : window ) {
|
|
||||||
Map<String,List<Integer>> caseControlQuals = atom.getCaseControlMappingQuals();
|
|
||||||
cases.addAll((Collection) caseControlQuals.get("case"));
|
|
||||||
controls.addAll((Collection) caseControlQuals.get("control"));
|
|
||||||
for ( Sample s : atom.getSamples() ) {
|
|
||||||
if ( s.getProperty("cohort").equals("case") ) {
|
|
||||||
caseSamples.add(s);
|
|
||||||
}
|
|
||||||
if ( s.getProperty("cohort").equals("control")) {
|
|
||||||
controlSamples.add(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
double a = cases.var()/caseSamples.size();
|
|
||||||
double b = controls.var()/controlSamples.size();
|
|
||||||
|
|
||||||
df = ( a + b )/( a*a/(caseSamples.size()-1) + b*b/(controlSamples.size()-1));
|
|
||||||
|
|
||||||
return ( cases.mean() - controls.mean() )/Math.sqrt( (cases.var()/caseSamples.size()) + (controls.var()/controlSamples.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo -- this is super hacky
|
|
||||||
public double getDegreesOfFreedom() {
|
|
||||||
if ( df < 0 ) {
|
|
||||||
getTStatistic();
|
|
||||||
}
|
|
||||||
double holder = df;
|
|
||||||
df = -1;
|
|
||||||
return holder;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContextAtom;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.MapHolder;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Feb 24, 2011
|
|
||||||
* Time: 1:49:55 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class LowMappingQualityAtom extends AssociationContextAtom {
|
|
||||||
private static int LOW_THRESH = 5;
|
|
||||||
private static StratifiedAlignmentContext.StratifiedContextType TYPE = StratifiedAlignmentContext.StratifiedContextType.COMPLETE;
|
|
||||||
|
|
||||||
protected Map<Sample, Pair<Integer,Integer>> stratifiedCounts;
|
|
||||||
protected Map<Sample,List<Integer>> mappingQualities;
|
|
||||||
|
|
||||||
public LowMappingQualityAtom(MapExtender e) {
|
|
||||||
super(e);
|
|
||||||
stratifiedCounts = new HashMap<Sample,Pair<Integer,Integer>>();
|
|
||||||
mappingQualities = new HashMap<Sample,List<Integer>>();
|
|
||||||
for ( Sample s : e.getContext().keySet() ) {
|
|
||||||
makeCounts(s,e.getPreviousContext(),e.getContext());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void makeCounts( Sample sample, Map<Sample, StratifiedAlignmentContext> prev, Map<Sample,StratifiedAlignmentContext> cur ) {
|
|
||||||
int low = 0;
|
|
||||||
int tot = 0;
|
|
||||||
if ( cur != null && cur.get(sample) != null && cur.get(sample).getContext(TYPE) != null) {
|
|
||||||
HashSet<String> rnames;
|
|
||||||
List<Integer> mapQuals = new ArrayList<Integer>(cur.size());
|
|
||||||
if ( prev != null && prev.get(sample) != null && prev.get(sample).getContext(TYPE) != null) {
|
|
||||||
rnames = new HashSet<String>(prev.get(sample).getContext(TYPE).size());
|
|
||||||
if ( prev.get(sample).getContext(TYPE).hasBasePileup()) {
|
|
||||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) {
|
|
||||||
rnames.add(e.getRead().getReadName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( prev.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
|
|
||||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
|
||||||
rnames.add(e.getRead().getReadName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
rnames = new HashSet<String>(0);
|
|
||||||
}
|
|
||||||
if ( cur.get(sample).getContext(TYPE).hasBasePileup()) {
|
|
||||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) {
|
|
||||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
|
||||||
++tot;
|
|
||||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
|
||||||
++low;
|
|
||||||
}
|
|
||||||
mapQuals.add(e.getMappingQual());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( cur.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
|
|
||||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
|
||||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
|
||||||
++tot;
|
|
||||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
|
||||||
++low;
|
|
||||||
}
|
|
||||||
mapQuals.add(e.getMappingQual());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stratifiedCounts.put(sample,new Pair<Integer,Integer>(low,tot));
|
|
||||||
mappingQualities.put(sample,mapQuals);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<Sample,Pair<Integer,Integer>> getCounts() {
|
|
||||||
return stratifiedCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String,Pair<Integer,Integer>> getCaseControlCounts() {
|
|
||||||
Pair<Integer,Integer> cases = new Pair<Integer,Integer>(0,0);
|
|
||||||
Pair<Integer,Integer> controls = new Pair<Integer,Integer>(0,0);
|
|
||||||
for ( Map.Entry<Sample,Pair<Integer,Integer>> e : stratifiedCounts.entrySet() ) {
|
|
||||||
if ( e.getKey().getProperty("cohort").equals("case") ) {
|
|
||||||
cases.first += e.getValue().first;
|
|
||||||
cases.second += e.getValue().second;
|
|
||||||
}
|
|
||||||
if ( e.getKey().getProperty("cohort").equals("control") ) {
|
|
||||||
controls.first += e.getValue().first;
|
|
||||||
controls.second += e.getValue().second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String,Pair<Integer,Integer>> caseControlCounts= new HashMap<String,Pair<Integer,Integer>>(2);
|
|
||||||
caseControlCounts.put("case",cases);
|
|
||||||
caseControlCounts.put("control",controls);
|
|
||||||
return caseControlCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String,List<Integer>> getCaseControlMappingQuals() {
|
|
||||||
int caseSize = 0;
|
|
||||||
int controlSize = 0;
|
|
||||||
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
|
|
||||||
if ( v.getKey().getProperty("cohort").equals("case") ) {
|
|
||||||
caseSize += v.getValue().size();
|
|
||||||
}
|
|
||||||
if ( v.getKey().getProperty("cohort").equals("control") ) {
|
|
||||||
controlSize += v.getValue().size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
List<Integer> caseQuals = new ArrayList<Integer>(caseSize);
|
|
||||||
List<Integer> controlQuals = new ArrayList<Integer>(controlSize);
|
|
||||||
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
|
|
||||||
if ( v.getKey().getProperty("cohort").equals("case") ) {
|
|
||||||
caseQuals.addAll(v.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( v.getKey().getProperty("cohort").equals("control")) {
|
|
||||||
controlQuals.addAll(v.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String,List<Integer>> caseControlQuals = new HashMap<String,List<Integer>>(2);
|
|
||||||
caseControlQuals.put("case",caseQuals);
|
|
||||||
caseControlQuals.put("control",controlQuals);
|
|
||||||
return caseControlQuals;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<Sample> getSamples() {
|
|
||||||
return stratifiedCounts.keySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 3/1/11
|
||||||
|
* Time: 3:00 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class MateOtherContig extends ZStatistic {
|
||||||
|
|
||||||
|
public int getWindowSize() { return 50; }
|
||||||
|
public int slideByValue() { return 25; }
|
||||||
|
public boolean usePreviouslySeenReads() { return false; }
|
||||||
|
|
||||||
|
public Pair<Number,Number> map(ReadBackedPileup pileup) {
|
||||||
|
int tot = 0;
|
||||||
|
int otherCon = 0;
|
||||||
|
for ( PileupElement e : pileup ) {
|
||||||
|
if ( e.getRead().getReadPairedFlag() ) {
|
||||||
|
++tot;
|
||||||
|
if ( ! e.getRead().getMateReferenceIndex().equals(e.getRead().getReferenceIndex()) ) {
|
||||||
|
++otherCon;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<Number,Number>(tot,otherCon);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol.ZStatistic;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 3/2/11
|
||||||
|
* Time: 2:09 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class MateUnmapped extends ZStatistic {
|
||||||
|
|
||||||
|
public Pair<Number,Number> map(ReadBackedPileup pileup) {
|
||||||
|
int numMatedReads = 0;
|
||||||
|
int numPairUnmapped = 0;
|
||||||
|
for (PileupElement e : pileup ) {
|
||||||
|
if ( e.getRead().getProperPairFlag() ) {
|
||||||
|
++numMatedReads;
|
||||||
|
if ( e.getRead().getMateUnmappedFlag() ) {
|
||||||
|
++numPairUnmapped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<Number,Number>(numMatedReads,numPairUnmapped);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getWindowSize() { return 100; }
|
||||||
|
public int slideByValue() { return 25; }
|
||||||
|
public boolean usePreviouslySeenReads() { return false; }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* @author chartl
|
||||||
|
*/
|
||||||
|
public abstract class CaseControl<X> extends AssociationContext<X> {
|
||||||
|
|
||||||
|
public Map<Cohort,X> reduce(List<Map<Sample,X>> window) {
|
||||||
|
X accumCase = null;
|
||||||
|
X accumControl = null;
|
||||||
|
for ( Map<Sample,X> sampleXMap : window ) {
|
||||||
|
for ( Map.Entry<Sample,X> entry : sampleXMap.entrySet() ) {
|
||||||
|
if ( entry.getKey().getProperty("cohort").equals("case") ) {
|
||||||
|
accum(accumCase, entry.getValue());
|
||||||
|
} else if ( entry.getKey().getProperty("cohort").equals("control") ) {
|
||||||
|
accum(accumControl,entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Cohort,X> cohortMap = new HashMap<Cohort,X>(2);
|
||||||
|
cohortMap.put(Cohort.CASE,accumCase);
|
||||||
|
cohortMap.put(Cohort.CONTROL,accumControl);
|
||||||
|
return cohortMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected X accum(X left, X right) {
|
||||||
|
if ( left == null ) {
|
||||||
|
return right;
|
||||||
|
}
|
||||||
|
if ( right == null ) {
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
return add(left,right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract X map(ReadBackedPileup rbp);
|
||||||
|
public abstract X add(X left, X right);
|
||||||
|
|
||||||
|
public enum Cohort { CASE,CONTROL }
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Feb 24, 2011
|
||||||
|
* Time: 12:58:16 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class TStatistic extends CaseControl<Collection<Number>> {
|
||||||
|
|
||||||
|
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||||
|
|
||||||
|
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||||
|
if ( left instanceof List) {
|
||||||
|
((List) left).addAll(right);
|
||||||
|
return left;
|
||||||
|
} else if ( left instanceof Set ) {
|
||||||
|
((Set) left).addAll(right);
|
||||||
|
return left;
|
||||||
|
} else {
|
||||||
|
List<Number> newList = new ArrayList<Number>(left.size()+right.size());
|
||||||
|
newList.addAll(left);
|
||||||
|
newList.addAll(right);
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: 3/2/11
|
||||||
|
* Time: 1:53 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class UStatistic extends CaseControl<Collection<Number>> {
|
||||||
|
|
||||||
|
public abstract Collection<Number> map(ReadBackedPileup rbp );
|
||||||
|
|
||||||
|
public Collection<Number> add(Collection<Number> left, Collection<Number> right) {
|
||||||
|
if ( left instanceof List) {
|
||||||
|
((List) left).addAll(right);
|
||||||
|
return left;
|
||||||
|
} else if ( left instanceof Set) {
|
||||||
|
((Set) left).addAll(right);
|
||||||
|
return left;
|
||||||
|
} else {
|
||||||
|
List<Number> newList = new ArrayList<Number>(left.size()+right.size());
|
||||||
|
newList.addAll(left);
|
||||||
|
newList.addAll(right);
|
||||||
|
return newList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,26 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.casecontrol;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Feb 24, 2011
|
||||||
|
* Time: 12:58:38 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class ZStatistic extends CaseControl<Pair<Number,Number>> {
|
||||||
|
public abstract Pair<Number,Number> map(ReadBackedPileup rbp );
|
||||||
|
|
||||||
|
public Pair<Number,Number> add(Pair<Number,Number> left, Pair<Number,Number> right) {
|
||||||
|
return new Pair<Number,Number>(left.first.doubleValue()+right.first.doubleValue(),
|
||||||
|
left.second.doubleValue()+right.second.doubleValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue