Generalized association is now working. Output is in a horrific format. Implementation of T-testing. Improvements are to look for classes dynamically (a la VariantEval/VariantAnnotator), beautify output, and do optimizations where they exist.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5341 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ce34a8a918
commit
0723b0f44c
|
|
@ -18,7 +18,7 @@ public abstract class AssociationContext<X extends AssociationContextAtom> {
|
|||
}
|
||||
|
||||
public X map(MapExtender e) throws NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException, ClassCastException {
|
||||
return (X) clazz.getConstructor().newInstance(e);
|
||||
return (X) clazz.getConstructor(new Class[] {MapExtender.class}).newInstance(new Object[] {e});
|
||||
}
|
||||
|
||||
public boolean filter(MapExtender m) { return true; }
|
||||
|
|
@ -35,6 +35,8 @@ public abstract class AssociationContext<X extends AssociationContextAtom> {
|
|||
}
|
||||
|
||||
public void slide() {
|
||||
window = window.subList(slideByValue(),window.size());
|
||||
ArrayList<X> newWindow = new ArrayList<X>((window.subList(slideByValue(),window.size())));
|
||||
newWindow.ensureCapacity(getWindowSize());
|
||||
window = newWindow;
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import cern.jet.random.Normal;
|
||||
import cern.jet.random.StudentT;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.*;
|
||||
|
||||
/**
|
||||
|
|
@ -18,29 +19,32 @@ public class AssociationTestRunner {
|
|||
|
||||
public static List<String> runTests(AssociationContext context) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
if ( context.getClass().isInstance(TStatistic.class)) {
|
||||
results.add(runStudentT(context));
|
||||
if ( context instanceof TStatistic) {
|
||||
results.add(runStudentT((TStatistic) context));
|
||||
}
|
||||
|
||||
if ( context.getClass().isInstance(ZStatistic.class)) {
|
||||
if ( context instanceof ZStatistic) {
|
||||
results.add(runZ((ZStatistic) context));
|
||||
}
|
||||
|
||||
if ( context.getClass().isInstance(FisherExact.class) ) {
|
||||
if ( context instanceof FisherExact ) {
|
||||
results.add(runFisherExact(context));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public static String runStudentT(AssociationContext context) {
|
||||
return "Test not yet implemented";
|
||||
public static String runStudentT(TStatistic context) {
|
||||
double t = context.getTStatistic();
|
||||
StudentT studentT = new StudentT(context.getDegreesOfFreedom(),null);
|
||||
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
|
||||
return String.format("T: %.2f\tP: %.2e",t,p);
|
||||
}
|
||||
|
||||
public static String runZ(ZStatistic context) {
|
||||
double z = context.getZStatistic();
|
||||
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
|
||||
return String.format("Z: %.2f\tP: %.2f",z,p);
|
||||
return String.format("Z: %.2f\tP: %.2e",z,p);
|
||||
}
|
||||
|
||||
public static String runFisherExact(AssociationContext context) {
|
||||
|
|
|
|||
|
|
@ -27,26 +27,26 @@ public class MapExtender {
|
|||
}
|
||||
|
||||
public Map<Sample,StratifiedAlignmentContext> getPreviousContext() {
|
||||
return previous.getContext();
|
||||
return previous != null ? previous.getContext() : null;
|
||||
}
|
||||
|
||||
public ReferenceContext getPreviousRef() {
|
||||
return previous.getRef();
|
||||
return previous != null ? previous.getRef() : null;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getPreviousTracker() {
|
||||
return previous.getTracker();
|
||||
return previous != null ? previous.getTracker() : null;
|
||||
}
|
||||
|
||||
public Map<Sample,StratifiedAlignmentContext> getContext() {
|
||||
return current.getContext();
|
||||
return current != null ? current.getContext() : null;
|
||||
}
|
||||
|
||||
public ReferenceContext getReferenceContext() {
|
||||
return current.getRef();
|
||||
return current != null ? current.getRef() : null;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() {
|
||||
return current.getTracker();
|
||||
return current != null ? current.getTracker() : null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class RegionalAssociationHandler {
|
|||
private Set<AssociationContext> associations;
|
||||
|
||||
public RegionalAssociationHandler(Set<AssociationContext> contexts) {
|
||||
maps = null;
|
||||
maps = new MapExtender();
|
||||
associations = contexts;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQuality;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQualityAtom;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashSet;
|
||||
|
|
@ -46,15 +49,21 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
|
|||
}
|
||||
List<String> testsHere = rac.runTests();
|
||||
// todo -- really awful shitty formatting
|
||||
out.printf("%s%n",rac.getLocation().toString());
|
||||
for ( String s : testsHere ) {
|
||||
out.printf("%s%n",s);
|
||||
if ( testsHere.size() > 0 ) {
|
||||
out.printf("%s%n",rac.getLocation().toString());
|
||||
for ( String s : testsHere ) {
|
||||
out.printf("%s%n",s);
|
||||
}
|
||||
}
|
||||
return rac;
|
||||
}
|
||||
|
||||
private AssociationContext stringToAssociationContext(String s) {
|
||||
return null;
|
||||
if ( s.equals("LowMappingQuality") ) {
|
||||
return new LowMappingQuality();
|
||||
}
|
||||
|
||||
throw new UserException(String.format("AssociationContext type %s not found.",s));
|
||||
}
|
||||
|
||||
private Set<AssociationContext> getAssociations() {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
|
|
@ -9,5 +11,5 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
|
|||
*/
|
||||
public interface TStatistic {
|
||||
public abstract double getTStatistic();
|
||||
public abstract int getDegreesOfFreedom();
|
||||
public abstract double getDegreesOfFreedom();
|
||||
}
|
||||
|
|
@ -1,11 +1,14 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContextAtom;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.TStatistic;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.ZStatistic;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -14,12 +17,17 @@ import java.util.Map;
|
|||
* Time: 1:49:37 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom> implements ZStatistic {
|
||||
public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom> implements ZStatistic, TStatistic {
|
||||
private double df = -1;
|
||||
|
||||
public LowMappingQuality(Class<LowMappingQualityAtom> atomClass ) {
|
||||
super(atomClass);
|
||||
}
|
||||
|
||||
public LowMappingQuality() {
|
||||
this(LowMappingQualityAtom.class);
|
||||
}
|
||||
|
||||
public int getWindowSize() { return 20; }
|
||||
public int slideByValue() { return 5; }
|
||||
|
||||
|
|
@ -41,4 +49,41 @@ public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom>
|
|||
double SE = Math.sqrt( p_pool*(1-p_pool)*( 1/((double)ctrl_tot) + 1/((double)case_tot) ));
|
||||
return (p_case - p_ctrl)/SE;
|
||||
}
|
||||
|
||||
public double getTStatistic() {
|
||||
// todo -- running average is unnecessary and inefficient here, but was quick to write
|
||||
MathUtils.RunningAverage cases = new MathUtils.RunningAverage();
|
||||
MathUtils.RunningAverage controls = new MathUtils.RunningAverage();
|
||||
Set<Sample> caseSamples = new HashSet<Sample>();
|
||||
Set<Sample> controlSamples = new HashSet<Sample>();
|
||||
for ( LowMappingQualityAtom atom : window ) {
|
||||
Map<String,List<Integer>> caseControlQuals = atom.getCaseControlMappingQuals();
|
||||
cases.addAll((Collection) caseControlQuals.get("case"));
|
||||
controls.addAll((Collection) caseControlQuals.get("control"));
|
||||
for ( Sample s : atom.getSamples() ) {
|
||||
if ( s.getProperty("cohort").equals("case") ) {
|
||||
caseSamples.add(s);
|
||||
}
|
||||
if ( s.getProperty("cohort").equals("control")) {
|
||||
controlSamples.add(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
double a = cases.var()/caseSamples.size();
|
||||
double b = controls.var()/controlSamples.size();
|
||||
|
||||
df = ( a + b )/( a*a/(caseSamples.size()-1) + b*b/(controlSamples.size()-1));
|
||||
|
||||
return ( cases.mean() - controls.mean() )/Math.sqrt( (cases.var()/caseSamples.size()) + (controls.var()/controlSamples.size()));
|
||||
}
|
||||
|
||||
// todo -- this is super hacky
|
||||
public double getDegreesOfFreedom() {
|
||||
if ( df < 0 ) {
|
||||
getTStatistic();
|
||||
}
|
||||
double holder = df;
|
||||
df = -1;
|
||||
return holder;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,9 +8,7 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.MapHolder;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -24,10 +22,12 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
|
|||
private static StratifiedAlignmentContext.StratifiedContextType TYPE = StratifiedAlignmentContext.StratifiedContextType.COMPLETE;
|
||||
|
||||
protected Map<Sample, Pair<Integer,Integer>> stratifiedCounts;
|
||||
protected Map<Sample,List<Integer>> mappingQualities;
|
||||
|
||||
public LowMappingQualityAtom(MapExtender e) {
|
||||
super(e);
|
||||
stratifiedCounts = new HashMap<Sample,Pair<Integer,Integer>>();
|
||||
mappingQualities = new HashMap<Sample,List<Integer>>();
|
||||
for ( Sample s : e.getContext().keySet() ) {
|
||||
makeCounts(s,e.getPreviousContext(),e.getContext());
|
||||
}
|
||||
|
|
@ -36,31 +36,50 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
|
|||
public void makeCounts( Sample sample, Map<Sample, StratifiedAlignmentContext> prev, Map<Sample,StratifiedAlignmentContext> cur ) {
|
||||
int low = 0;
|
||||
int tot = 0;
|
||||
HashSet<String> rnames = new HashSet<String>(prev.get(sample).getContext(TYPE).size());
|
||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) {
|
||||
rnames.add(e.getRead().getReadName());
|
||||
}
|
||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
||||
rnames.add(e.getRead().getReadName());
|
||||
}
|
||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) {
|
||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
||||
++tot;
|
||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
||||
++low;
|
||||
if ( cur != null && cur.get(sample) != null && cur.get(sample).getContext(TYPE) != null) {
|
||||
HashSet<String> rnames;
|
||||
List<Integer> mapQuals = new ArrayList<Integer>(cur.size());
|
||||
if ( prev != null && prev.get(sample) != null && prev.get(sample).getContext(TYPE) != null) {
|
||||
rnames = new HashSet<String>(prev.get(sample).getContext(TYPE).size());
|
||||
if ( prev.get(sample).getContext(TYPE).hasBasePileup()) {
|
||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) {
|
||||
rnames.add(e.getRead().getReadName());
|
||||
}
|
||||
}
|
||||
if ( prev.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
|
||||
for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
||||
rnames.add(e.getRead().getReadName());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rnames = new HashSet<String>(0);
|
||||
}
|
||||
if ( cur.get(sample).getContext(TYPE).hasBasePileup()) {
|
||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) {
|
||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
||||
++tot;
|
||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
||||
++low;
|
||||
}
|
||||
mapQuals.add(e.getMappingQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
||||
++tot;
|
||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
||||
++low;
|
||||
if ( cur.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
|
||||
for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
|
||||
if ( ! rnames.contains(e.getRead().getReadName()) ) {
|
||||
++tot;
|
||||
if ( e.getMappingQual() < LOW_THRESH ) {
|
||||
++low;
|
||||
}
|
||||
mapQuals.add(e.getMappingQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stratifiedCounts.put(sample,new Pair<Integer,Integer>(low,tot));
|
||||
stratifiedCounts.put(sample,new Pair<Integer,Integer>(low,tot));
|
||||
mappingQualities.put(sample,mapQuals);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -88,4 +107,37 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
|
|||
return caseControlCounts;
|
||||
}
|
||||
|
||||
public Map<String,List<Integer>> getCaseControlMappingQuals() {
|
||||
int caseSize = 0;
|
||||
int controlSize = 0;
|
||||
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
|
||||
if ( v.getKey().getProperty("cohort").equals("case") ) {
|
||||
caseSize += v.getValue().size();
|
||||
}
|
||||
if ( v.getKey().getProperty("cohort").equals("control") ) {
|
||||
controlSize += v.getValue().size();
|
||||
}
|
||||
}
|
||||
List<Integer> caseQuals = new ArrayList<Integer>(caseSize);
|
||||
List<Integer> controlQuals = new ArrayList<Integer>(controlSize);
|
||||
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
|
||||
if ( v.getKey().getProperty("cohort").equals("case") ) {
|
||||
caseQuals.addAll(v.getValue());
|
||||
}
|
||||
|
||||
if ( v.getKey().getProperty("cohort").equals("control")) {
|
||||
controlQuals.addAll(v.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,List<Integer>> caseControlQuals = new HashMap<String,List<Integer>>(2);
|
||||
caseControlQuals.put("case",caseQuals);
|
||||
caseControlQuals.put("control",controlQuals);
|
||||
return caseControlQuals;
|
||||
}
|
||||
|
||||
public Set<Sample> getSamples() {
|
||||
return stratifiedCounts.keySet();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -804,8 +804,15 @@ public class MathUtils {
|
|||
s += ( obs - oldMean ) * ( obs - mean );
|
||||
}
|
||||
|
||||
public void addAll(Collection<Number> col) {
|
||||
for ( Number o : col ) {
|
||||
add(o.doubleValue());
|
||||
}
|
||||
}
|
||||
|
||||
public double mean() { return mean; }
|
||||
public double stddev() { return Math.sqrt(s/(obs_count - 1)); }
|
||||
public double var() { return s/(obs_count - 1); }
|
||||
public long observationCount() { return obs_count; }
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ class QTools extends QScript {
|
|||
@Argument(doc="The samples to extract",shortName="sm",required=false) var samples : String = _
|
||||
@Argument(doc="Keep filtered sites when merging or extracting?",shortName="kf",required=false) var keepFilters : Boolean = false
|
||||
@Argument(doc="Input interval list (not used with VCF tools)",shortName="il",required=false) var intervalList : File = _
|
||||
@Argument(doc="Reference fai file",shortName="fai",required=false) var fai : File = _
|
||||
@Argument(doc="interval list expand start",shortName="il_start",required=false) var ilStart : Int = 1
|
||||
@Argument(doc="interval list expand size",shortName="il_size",required=false) var ilSize : Int = 50
|
||||
// todo -- additional arguments or argument collection
|
||||
|
|
@ -76,7 +75,7 @@ class QTools extends QScript {
|
|||
}
|
||||
|
||||
def runExpandTargets = {
|
||||
var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,fai,"INTERVALS","INTERVALS")
|
||||
var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,ref,"INTERVALS","INTERVALS")
|
||||
add(ets)
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.queue
|
|||
|
||||
import org.broadinstitute.sting.queue.util.Logging
|
||||
import org.broadinstitute.sting.queue.function.QFunction
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
|
||||
/**
|
||||
* Defines a Queue pipeline as a collection of CommandLineFunctions.
|
||||
|
|
@ -73,6 +74,10 @@ trait QScript extends Logging {
|
|||
def addAll(functions: List[QFunction] ) = {
|
||||
functions.foreach( f => add(f) )
|
||||
}
|
||||
|
||||
def extractFileEntries(in: File): List[File] = {
|
||||
return collection.JavaConversions.asScalaIterator((new XReadLines(in))).toList.map( new File(_) )
|
||||
}
|
||||
}
|
||||
|
||||
object QScript {
|
||||
|
|
|
|||
Loading…
Reference in New Issue