Generalized association is now working. Output is in a horrific format. Implementation of T-testing. Improvements are to look for classes dynamically (a la VariantEval/VariantAnnotator), beautify output, and do optimizations where they exist.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5341 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2011-03-01 01:23:37 +00:00
parent ce34a8a918
commit 0723b0f44c
11 changed files with 173 additions and 48 deletions

View File

@ -18,7 +18,7 @@ public abstract class AssociationContext<X extends AssociationContextAtom> {
}
public X map(MapExtender e) throws NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException, ClassCastException {
return (X) clazz.getConstructor().newInstance(e);
return (X) clazz.getConstructor(new Class[] {MapExtender.class}).newInstance(new Object[] {e});
}
public boolean filter(MapExtender m) { return true; }
@ -35,6 +35,8 @@ public abstract class AssociationContext<X extends AssociationContextAtom> {
}
public void slide() {
window = window.subList(slideByValue(),window.size());
ArrayList<X> newWindow = new ArrayList<X>((window.subList(slideByValue(),window.size())));
newWindow.ensureCapacity(getWindowSize());
window = newWindow;
}
}

View File

@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.List;
import cern.jet.random.Normal;
import cern.jet.random.StudentT;
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.*;
/**
@ -18,29 +19,32 @@ public class AssociationTestRunner {
public static List<String> runTests(AssociationContext context) {
List<String> results = new ArrayList<String>();
if ( context.getClass().isInstance(TStatistic.class)) {
results.add(runStudentT(context));
if ( context instanceof TStatistic) {
results.add(runStudentT((TStatistic) context));
}
if ( context.getClass().isInstance(ZStatistic.class)) {
if ( context instanceof ZStatistic) {
results.add(runZ((ZStatistic) context));
}
if ( context.getClass().isInstance(FisherExact.class) ) {
if ( context instanceof FisherExact ) {
results.add(runFisherExact(context));
}
return results;
}
public static String runStudentT(AssociationContext context) {
return "Test not yet implemented";
public static String runStudentT(TStatistic context) {
double t = context.getTStatistic();
StudentT studentT = new StudentT(context.getDegreesOfFreedom(),null);
double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t));
return String.format("T: %.2f\tP: %.2e",t,p);
}
public static String runZ(ZStatistic context) {
double z = context.getZStatistic();
double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z));
return String.format("Z: %.2f\tP: %.2f",z,p);
return String.format("Z: %.2f\tP: %.2e",z,p);
}
public static String runFisherExact(AssociationContext context) {

View File

@ -27,26 +27,26 @@ public class MapExtender {
}
public Map<Sample,StratifiedAlignmentContext> getPreviousContext() {
return previous.getContext();
return previous != null ? previous.getContext() : null;
}
public ReferenceContext getPreviousRef() {
return previous.getRef();
return previous != null ? previous.getRef() : null;
}
public RefMetaDataTracker getPreviousTracker() {
return previous.getTracker();
return previous != null ? previous.getTracker() : null;
}
public Map<Sample,StratifiedAlignmentContext> getContext() {
return current.getContext();
return current != null ? current.getContext() : null;
}
public ReferenceContext getReferenceContext() {
return current.getRef();
return current != null ? current.getRef() : null;
}
public RefMetaDataTracker getTracker() {
return current.getTracker();
return current != null ? current.getTracker() : null;
}
}

View File

@ -16,7 +16,7 @@ public class RegionalAssociationHandler {
private Set<AssociationContext> associations;
public RegionalAssociationHandler(Set<AssociationContext> contexts) {
maps = null;
maps = new MapExtender();
associations = contexts;
}

View File

@ -7,7 +7,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQuality;
import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.LowMappingQualityAtom;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.PrintStream;
import java.util.HashSet;
@ -46,15 +49,21 @@ public class RegionalAssociationWalker extends LocusWalker<MapHolder, RegionalAs
}
List<String> testsHere = rac.runTests();
// todo -- really awful shitty formatting
out.printf("%s%n",rac.getLocation().toString());
for ( String s : testsHere ) {
out.printf("%s%n",s);
if ( testsHere.size() > 0 ) {
out.printf("%s%n",rac.getLocation().toString());
for ( String s : testsHere ) {
out.printf("%s%n",s);
}
}
return rac;
}
private AssociationContext stringToAssociationContext(String s) {
return null;
if ( s.equals("LowMappingQuality") ) {
return new LowMappingQuality();
}
throw new UserException(String.format("AssociationContext type %s not found.",s));
}
private Set<AssociationContext> getAssociations() {

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
import org.broadinstitute.sting.utils.collections.Pair;
/**
* Created by IntelliJ IDEA.
* User: chartl
@ -9,5 +11,5 @@ package org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces;
*/
public interface TStatistic {
public abstract double getTStatistic();
public abstract int getDegreesOfFreedom();
public abstract double getDegreesOfFreedom();
}

View File

@ -1,11 +1,14 @@
package org.broadinstitute.sting.oneoffprojects.walkers.association.modules;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext;
import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContextAtom;
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.TStatistic;
import org.broadinstitute.sting.oneoffprojects.walkers.association.interfaces.ZStatistic;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.Map;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -14,12 +17,17 @@ import java.util.Map;
* Time: 1:49:37 PM
* To change this template use File | Settings | File Templates.
*/
public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom> implements ZStatistic {
public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom> implements ZStatistic, TStatistic {
private double df = -1;
public LowMappingQuality(Class<LowMappingQualityAtom> atomClass ) {
super(atomClass);
}
public LowMappingQuality() {
this(LowMappingQualityAtom.class);
}
public int getWindowSize() { return 20; }
public int slideByValue() { return 5; }
@ -41,4 +49,41 @@ public class LowMappingQuality extends AssociationContext<LowMappingQualityAtom>
double SE = Math.sqrt( p_pool*(1-p_pool)*( 1/((double)ctrl_tot) + 1/((double)case_tot) ));
return (p_case - p_ctrl)/SE;
}
public double getTStatistic() {
// todo -- running average is unnecessary and inefficient here, but was quick to write
MathUtils.RunningAverage cases = new MathUtils.RunningAverage();
MathUtils.RunningAverage controls = new MathUtils.RunningAverage();
Set<Sample> caseSamples = new HashSet<Sample>();
Set<Sample> controlSamples = new HashSet<Sample>();
for ( LowMappingQualityAtom atom : window ) {
Map<String,List<Integer>> caseControlQuals = atom.getCaseControlMappingQuals();
cases.addAll((Collection) caseControlQuals.get("case"));
controls.addAll((Collection) caseControlQuals.get("control"));
for ( Sample s : atom.getSamples() ) {
if ( s.getProperty("cohort").equals("case") ) {
caseSamples.add(s);
}
if ( s.getProperty("cohort").equals("control")) {
controlSamples.add(s);
}
}
}
double a = cases.var()/caseSamples.size();
double b = controls.var()/controlSamples.size();
df = ( a + b )/( a*a/(caseSamples.size()-1) + b*b/(controlSamples.size()-1));
return ( cases.mean() - controls.mean() )/Math.sqrt( (cases.var()/caseSamples.size()) + (controls.var()/controlSamples.size()));
}
// todo -- this is super hacky
public double getDegreesOfFreedom() {
if ( df < 0 ) {
getTStatistic();
}
double holder = df;
df = -1;
return holder;
}
}

View File

@ -8,9 +8,7 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.MapHolder;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -24,10 +22,12 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
private static StratifiedAlignmentContext.StratifiedContextType TYPE = StratifiedAlignmentContext.StratifiedContextType.COMPLETE;
protected Map<Sample, Pair<Integer,Integer>> stratifiedCounts;
protected Map<Sample,List<Integer>> mappingQualities;
public LowMappingQualityAtom(MapExtender e) {
super(e);
stratifiedCounts = new HashMap<Sample,Pair<Integer,Integer>>();
mappingQualities = new HashMap<Sample,List<Integer>>();
for ( Sample s : e.getContext().keySet() ) {
makeCounts(s,e.getPreviousContext(),e.getContext());
}
@ -36,31 +36,50 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
public void makeCounts( Sample sample, Map<Sample, StratifiedAlignmentContext> prev, Map<Sample,StratifiedAlignmentContext> cur ) {
int low = 0;
int tot = 0;
HashSet<String> rnames = new HashSet<String>(prev.get(sample).getContext(TYPE).size());
for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) {
rnames.add(e.getRead().getReadName());
}
for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
rnames.add(e.getRead().getReadName());
}
for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) {
if ( ! rnames.contains(e.getRead().getReadName()) ) {
++tot;
if ( e.getMappingQual() < LOW_THRESH ) {
++low;
if ( cur != null && cur.get(sample) != null && cur.get(sample).getContext(TYPE) != null) {
HashSet<String> rnames;
List<Integer> mapQuals = new ArrayList<Integer>(cur.size());
if ( prev != null && prev.get(sample) != null && prev.get(sample).getContext(TYPE) != null) {
rnames = new HashSet<String>(prev.get(sample).getContext(TYPE).size());
if ( prev.get(sample).getContext(TYPE).hasBasePileup()) {
for ( PileupElement e : prev.get(sample).getContext(TYPE).getBasePileup() ) {
rnames.add(e.getRead().getReadName());
}
}
if ( prev.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
for ( PileupElement e : prev.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
rnames.add(e.getRead().getReadName());
}
}
} else {
rnames = new HashSet<String>(0);
}
if ( cur.get(sample).getContext(TYPE).hasBasePileup()) {
for ( PileupElement e : cur.get(sample).getContext(TYPE).getBasePileup() ) {
if ( ! rnames.contains(e.getRead().getReadName()) ) {
++tot;
if ( e.getMappingQual() < LOW_THRESH ) {
++low;
}
mapQuals.add(e.getMappingQual());
}
}
}
}
for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
if ( ! rnames.contains(e.getRead().getReadName()) ) {
++tot;
if ( e.getMappingQual() < LOW_THRESH ) {
++low;
if ( cur.get(sample).getContext(TYPE).hasExtendedEventPileup()) {
for ( PileupElement e : cur.get(sample).getContext(TYPE).getExtendedEventPileup() ) {
if ( ! rnames.contains(e.getRead().getReadName()) ) {
++tot;
if ( e.getMappingQual() < LOW_THRESH ) {
++low;
}
mapQuals.add(e.getMappingQual());
}
}
}
}
stratifiedCounts.put(sample,new Pair<Integer,Integer>(low,tot));
stratifiedCounts.put(sample,new Pair<Integer,Integer>(low,tot));
mappingQualities.put(sample,mapQuals);
}
}
@ -88,4 +107,37 @@ public class LowMappingQualityAtom extends AssociationContextAtom {
return caseControlCounts;
}
public Map<String,List<Integer>> getCaseControlMappingQuals() {
int caseSize = 0;
int controlSize = 0;
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
if ( v.getKey().getProperty("cohort").equals("case") ) {
caseSize += v.getValue().size();
}
if ( v.getKey().getProperty("cohort").equals("control") ) {
controlSize += v.getValue().size();
}
}
List<Integer> caseQuals = new ArrayList<Integer>(caseSize);
List<Integer> controlQuals = new ArrayList<Integer>(controlSize);
for ( Map.Entry<Sample,List<Integer>> v : mappingQualities.entrySet() ) {
if ( v.getKey().getProperty("cohort").equals("case") ) {
caseQuals.addAll(v.getValue());
}
if ( v.getKey().getProperty("cohort").equals("control")) {
controlQuals.addAll(v.getValue());
}
}
Map<String,List<Integer>> caseControlQuals = new HashMap<String,List<Integer>>(2);
caseControlQuals.put("case",caseQuals);
caseControlQuals.put("control",controlQuals);
return caseControlQuals;
}
public Set<Sample> getSamples() {
return stratifiedCounts.keySet();
}
}

View File

@ -804,8 +804,15 @@ public class MathUtils {
s += ( obs - oldMean ) * ( obs - mean );
}
public void addAll(Collection<Number> col) {
for ( Number o : col ) {
add(o.doubleValue());
}
}
public double mean() { return mean; }
public double stddev() { return Math.sqrt(s/(obs_count - 1)); }
public double var() { return s/(obs_count - 1); }
public long observationCount() { return obs_count; }
}

View File

@ -15,7 +15,6 @@ class QTools extends QScript {
@Argument(doc="The samples to extract",shortName="sm",required=false) var samples : String = _
@Argument(doc="Keep filtered sites when merging or extracting?",shortName="kf",required=false) var keepFilters : Boolean = false
@Argument(doc="Input interval list (not used with VCF tools)",shortName="il",required=false) var intervalList : File = _
@Argument(doc="Reference fai file",shortName="fai",required=false) var fai : File = _
@Argument(doc="interval list expand start",shortName="il_start",required=false) var ilStart : Int = 1
@Argument(doc="interval list expand size",shortName="il_size",required=false) var ilSize : Int = 50
// todo -- additional arguments or argument collection
@ -76,7 +75,7 @@ class QTools extends QScript {
}
def runExpandTargets = {
var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,fai,"INTERVALS","INTERVALS")
var ets : ExpandIntervals = new ExpandIntervals(intervalList,ilStart,ilSize,output,ref,"INTERVALS","INTERVALS")
add(ets)
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.queue
import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.function.QFunction
import org.broadinstitute.sting.utils.text.XReadLines
/**
* Defines a Queue pipeline as a collection of CommandLineFunctions.
@ -73,6 +74,10 @@ trait QScript extends Logging {
def addAll(functions: List[QFunction] ) = {
functions.foreach( f => add(f) )
}
def extractFileEntries(in: File): List[File] = {
return collection.JavaConversions.asScalaIterator((new XReadLines(in))).toList.map( new File(_) )
}
}
object QScript {