diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java deleted file mode 100755 index 7d28013aa..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationContext.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/2/11 - * Time: 11:58 AM - * To change this template use File | Settings | File Templates. - */ -public abstract class AssociationContext { - - protected List> window; - private int size; - private int slide; - protected double zVal; - - public AssociationContext() { - } - - public AssociationContext(final RegionalAssociationWalker parent) { - this.init(parent); - } - - // specifies whether to use previously seen reads - public abstract boolean usePreviouslySeenReads(); - - // specifies the map from a sample's pileup to the data we want to test - public abstract Object map(ReadBackedPileup rbp); - - // specifies how to take the per-sample data and reduce them into testable pairs - public abstract Map reduce(List> win); - - // do we filter the current location (e.g. omit from window) - public boolean filter(MapExtender m) { return true; } - - // a basic initialization of the context (give the walker for access to object?) - public void init(RegionalAssociationWalker walker) { - size = walker.windowSize; - slide = walker.slideBy; - window = new ArrayList>(size); - zVal = walker.zVal; - } - - public Map mapLocus(MapExtender extender) { - Map pileups; - if ( ! usePreviouslySeenReads() ) { - pileups = extender.getReadFilteredPileup(); - } else { - pileups = extender.getFullPileup(); - } - Map maps = new HashMap(pileups.size()); - for ( Map.Entry samPileup : pileups.entrySet() ) { - maps.put(samPileup.getKey(),map(samPileup.getValue())); - } - - return maps; - } - - - public void addData(Map sampleData) { - window.add(sampleData); - } - - public void flush() { - window = new ArrayList>(size); - } - - public boolean isFull() { - return window.size() >= size; - } - - public void slide() { - ArrayList> newWindow = new ArrayList>((window.subList(slide,window.size()))); - newWindow.ensureCapacity(size); - window = newWindow; - } - - public int getWindowSize() { - return window.size(); - } - - public boolean testOnFlush() { return true; } - public boolean canFlush() { return true; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java deleted file mode 100755 index b353e145c..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ /dev/null @@ -1,93 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import java.util.Collection; -import java.util.Map; - -import cern.jet.random.Normal; -import cern.jet.random.StudentT; -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.*; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizer1D; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; -import org.broadinstitute.sting.utils.MannWhitneyU; -import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.collections.Pair; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Feb 24, 2011 - * Time: 11:48:26 AM - * To change this template use File | Settings | File Templates. - */ -public class AssociationTestRunner { - final static int MAX_Q_VALUE = Integer.MAX_VALUE; - // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance - final static Dichotomizer1D.Transform LOG_TRANSFORM = (new Dichotomizer1D()).new Transform() { - private double EPSILON = 1e-20; - @Override - public double transform(double d) { - return Math.log(d+EPSILON); - } - }; - - final static Dichotomizer1D.Transform STD_ASIN = (new Dichotomizer1D()).new Transform() { - @Override - public double transform(double d) { - return Math.asin(Math.sqrt(d)); - } - }; - - public static int pToQ(double p) { - return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); - } - - public static Pair,Pair> getTestValues(AssociationContext context) { - if ( context instanceof StatisticalTest ) { - Pair statAndP = ((StatisticalTest) context).getStatisticAndPValue(); - Double delta = ((StatisticalTest) context).getZConfDelta(); - return new Pair,Pair>( - new Pair(statAndP.first,delta), - new Pair(statAndP.second,pToQ(statAndP.second))); - } - - return null; - } - - public static String runTests(AssociationContext context) { - if ( context instanceof StatisticalTest ) { - Pair,Pair> results = getTestValues(context); - return String.format("%s: %.2f\tD: %.2f\tP: %.2e\tQ: %d", - ((StatisticalTest) context).getStatisticName() , - results.first.first,results.first.second,results.second.first,results.second.second); - } - - return null; - } - - @Deprecated // this is used for testing only at the moment - public static Pair mannWhitneyUTest(ValueTest context) { - Map> caseControlVectors = context.getCaseControl(); - if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { - return new Pair(Double.NaN,Double.NaN); - } - MannWhitneyU mwu = new MannWhitneyU(); - for ( Number n : caseControlVectors.get(CaseControl.Cohort.CASE) ) { - mwu.add(n, MannWhitneyU.USet.SET1); - } - for ( Number n : caseControlVectors.get(CaseControl.Cohort.CONTROL) ) { - mwu.add(n,MannWhitneyU.USet.SET2); - } - return mwu.runTwoSidedTest(); - } - - public static Pair getDichotomizedValues(AssociationContext context) { - if ( context instanceof Dichotomizable ) { - double raw = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context)); - double log = Dichotomizer1D.simpleGaussianDichotomy(((Dichotomizable)context),LOG_TRANSFORM); - return new Pair(raw,log); - } - - return new Pair(Double.NaN,Double.NaN); - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java deleted file mode 100755 index ff414f536..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapExtender.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; - -import java.util.*; - -/** - * @Author chartl - * @Date 2011-02-23 - * Holds multiple map contexts for use in the regional association walker - */ -public class MapExtender { - static AlignmentContextUtils.ReadOrientation TYPE = AlignmentContextUtils.ReadOrientation.COMPLETE; - // hold on to these -- atoms may want access to the tracker or other context types - private MapHolder previous = null; - private MapHolder current = null; - private Map fullPileup = null; - private Map readFilteredPileup = null; - private Map idToSampleObject = null; - - public MapExtender(Set samples) { - idToSampleObject = new HashMap(samples.size()); - for ( Sample s : samples ) { - idToSampleObject.put(s.getId(),s); - } - } - - public void set(MapHolder holder) { - previous = current; - current = holder; - - Map prevPileup = fullPileup; - fullPileup = new HashMap(); - readFilteredPileup = new HashMap(); - - if ( current != null ) { - for ( Map.Entry sac : current.getContext(idToSampleObject).entrySet() ) { - AlignmentContext context = AlignmentContextUtils.stratify(sac.getValue(), TYPE); - if ( context.hasBasePileup() ) { - fullPileup.put(sac.getKey(),context.getBasePileup()); - } else if ( context.hasExtendedEventPileup() ) { - fullPileup.put(sac.getKey(),context.getExtendedEventPileup()); - } - - if ( prevPileup != null ) { - - List filtElems = new ArrayList(fullPileup.get(sac.getKey()).size()); - Set seenReads = prevPileup.containsKey(sac.getKey()) ? new HashSet(prevPileup.get(sac.getKey()).getReads()) : new HashSet(0); - for ( PileupElement e : fullPileup.get(sac.getKey()) ) { - if ( ! seenReads.contains(e.getRead()) ) { - filtElems.add(e); - } - } - readFilteredPileup.put(sac.getKey(),new ReadBackedPileupImpl(current.getRef().getLocus(),filtElems)); - } else { - readFilteredPileup = fullPileup; - } - } - } - } - - public Map getFullPileup() { return fullPileup; } - public Map getReadFilteredPileup(){ return readFilteredPileup; } - - public Map getPreviousContext() { - return previous != null ? previous.getContext(idToSampleObject) : null; - } - - public ReferenceContext getPreviousRef() { - return previous != null ? previous.getRef() : null; - } - - public RefMetaDataTracker getPreviousTracker() { - return previous != null ? previous.getTracker() : null; - } - - public Map getContext() { - return current != null ? current.getContext(idToSampleObject) : null; - } - - public ReferenceContext getReferenceContext() { - return current != null ? current.getRef() : null; - } - - public RefMetaDataTracker getTracker() { - return current != null ? current.getTracker() : null; - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java deleted file mode 100755 index 0a4e624e0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/MapHolder.java +++ /dev/null @@ -1,38 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; - -import java.util.HashMap; -import java.util.Map; - -public class MapHolder { - private RefMetaDataTracker tracker; - private ReferenceContext ref; - private Map alignments; - - public MapHolder(RefMetaDataTracker t, ReferenceContext r, AlignmentContext a) { - tracker = t; - ref = r; - alignments = AlignmentContextUtils.splitContextBySampleName(a); - } - - public Map getContext(Map stringSampleMap) { - Map mappedContexts = new HashMap(alignments.size()); - for ( Map.Entry entry : alignments.entrySet() ) { - mappedContexts.put(stringSampleMap.get(entry.getKey()),entry.getValue()); - } - return mappedContexts; - } - - public ReferenceContext getRef() { - return ref; - } - - public RefMetaDataTracker getTracker() { - return tracker; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java deleted file mode 100755 index 3a4047c5a..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RAWSampleStats.java +++ /dev/null @@ -1,128 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext; -import org.broadinstitute.sting.oneoffprojects.walkers.association.MapHolder; -import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationHandler; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.StingException; - -import java.io.PrintStream; -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 4/12/11 - * Time: 5:21 PM - * To change this template use File | Settings | File Templates. - */ -public class RAWSampleStats extends LocusWalker { - @Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false) - public String[] associationsToUse = null; - @Argument(doc="Set the window size for associations to this value",shortName="w",fullName="window",required=false) - public int windowSize = 50; - @Argument(doc="Set the window sliding value for associations to this value",shortName="s",fullName="slide",required=false) - public int slideBy = 10; - - @Output - PrintStream out; - - public RegionalAssociationHandler reduceInit() { - return new RegionalAssociationHandler(getAssociations(),getToolkit().getSAMFileSamples(),false); - } - - public MapHolder map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return new MapHolder(tracker,ref,context); - } - - public RegionalAssociationHandler reduce(MapHolder holder, RegionalAssociationHandler handler) { - handler.updateExtender(holder); - StringBuffer contextBuf = new StringBuffer(); - contextBuf.append(holder.getRef().getLocus().toString()); - for ( AssociationContext context : handler.getAssociations() ) { - contextBuf.append("\t"); - contextBuf.append(context.getClass().getSimpleName()); - StringBuffer caseBuf = new StringBuffer(); - StringBuffer controlBuf = new StringBuffer(); - boolean first = true; - for ( Map.Entry entry : handler.getExtender().getContext().entrySet() ) { - StringBuffer toAppend = entry.getKey().getProperty("cohort").equals("case") ? caseBuf : controlBuf; - toAppend.append(entry.getKey().getId()); - toAppend.append("="); - toAppend.append(handleMap(context.map(entry.getValue().getBasePileup()))); - toAppend.append(";"); - } - caseBuf.deleteCharAt(caseBuf.length()-1); - controlBuf.deleteCharAt(controlBuf.length()-1); - contextBuf.append("\t"); - contextBuf.append("Case:\t"); - contextBuf.append(caseBuf); - contextBuf.append("\tControl:\t"); - contextBuf.append(controlBuf); - } - - out.printf("%s%n",contextBuf.toString()); - - return handler; - } - - public String handleMap(Object o) { - if ( o instanceof Pair) { - Pair k = (Pair) o; - return String.format("%.2f",k.first.doubleValue()/k.second.doubleValue()); - } - - if ( o instanceof Collection ) { - Collection k = (Collection) o; - return String.format("%.2f", MathUtils.average(k,true)); - } - - return "NA"; - } - - - public Set getAssociations() { - List> contexts = new PluginManager(AssociationContext.class).getPlugins(); - - if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) { - HashSet allAssoc = new HashSet(contexts.size()); - for ( Class clazz : contexts ) { - AssociationContext context; - try { - context = clazz.getConstructor(new Class[] {}).newInstance(new Object[] {}); - } catch (Exception e ) { - throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e); - } - allAssoc.add(context); - } - return allAssoc; - } - - - Map> classNameToClass = new HashMap>(contexts.size()); - for ( Class clazz : contexts ) { - classNameToClass.put(clazz.getSimpleName(),clazz); - } - - Set validAssociations = new HashSet(); - for ( String s : associationsToUse ) { - AssociationContext context; - try { - context = classNameToClass.get(s).getConstructor(new Class[]{}).newInstance(new Object[] {}); - } catch ( Exception e ) { - throw new StingException("The class "+s+" could not be instantiated.",e); - } - validAssociations.add(context); - } - return validAssociations; - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java deleted file mode 100755 index 21a26388d..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationHandler.java +++ /dev/null @@ -1,108 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.util.*; - -/** - * @Author chartl - * @Date 2011-02-23 - * A general context for windowed association - */ -public class RegionalAssociationHandler { - private MapExtender maps; - // todo -- the correct way to do this is via the PluginManager (a la VariantEval) but this is a QND implementation - private Set associations; - private boolean bedGraphFormat; - - public RegionalAssociationHandler(Set contexts, Set samples, boolean bedGraph) { - maps = new MapExtender(samples); - associations = contexts; - bedGraphFormat = bedGraph; - } - - public void updateExtender(MapHolder mapHolder) { - maps.set(mapHolder); - } - - /** - * Once the instances are collected; run map-reduce - * @map: MapExtender --> A child of AssociationContextAtom - * @implementation: Indirect construction via newinstance - * @reduce: (List,AssociationContext) --> List - * @implementation: just append - */ - public Map runMapReduce() { - Map testResults = new HashMap(); - if ( maps.getPreviousRef() != null && maps.getPreviousRef().getLocus().compareTo(getLocation()) != -1 ) { - testResults.putAll(flush()); - } - for ( AssociationContext w : associations ) { - if ( w.filter(maps) ) { - w.addData(w.mapLocus(maps)); - } - - if ( w.isFull() ) { - testResults.put(w,runTest(w)); - w.slide(); - } - } - - return testResults; - } - - /** - * Switches what formatting to use based on wiggle or standard - * implicit param bedGraphFormat - use bedgraph format (s p q) or standard (S: s P: p Q: q) - * @return - test results in proper format - */ - public String runTest(AssociationContext context) { - // todo -- maybe the tdf should be the whole window rather than just the most recent loc? - String outVal; - if ( bedGraphFormat ) { - Pair,Pair> statVals = AssociationTestRunner.getTestValues(context); - Pair simpleDichotVals = AssociationTestRunner.getDichotomizedValues(context); - outVal = String.format("%.2f\t%.2f\t%.2e\t%d\t%.2f\t%.2f",statVals.first.first,statVals.first.second, - statVals.second.first,statVals.second.second,simpleDichotVals.first,simpleDichotVals.second); - } else { - outVal = AssociationTestRunner.runTests(context); - Pair simpleDichotVals = AssociationTestRunner.getDichotomizedValues(context); - outVal += String.format("\tDi: %.2f\tLogDi: %.2f",simpleDichotVals.first,simpleDichotVals.second); - } - return String.format("%s\t%d\t%d\t%s",maps.getReferenceContext().getLocus().getContig(), - maps.getReferenceContext().getLocus().getStart()-context.getWindowSize()-1,maps.getReferenceContext().getLocus().getStart()+1, outVal); - - } - - public GenomeLoc getLocation() { - return maps.getReferenceContext().getLocus(); - } - - /** - * Flushes context on a jump between intervals. Can not return null. - * @return on-flush tests - */ - public Map flush() { - Map flushTests = new HashMap(); - for ( AssociationContext ac : associations ) { - if ( ac.canFlush() ) { - if ( ac.testOnFlush() ) { - flushTests.put(ac,runTest(ac)); - } - ac.flush(); - } - } - - return flushTests; - } - - public Set getAssociations() { - return associations; - } - - public MapExtender getExtender() { - return maps; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java deleted file mode 100755 index c088c62d0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationMultiplexer.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.broadinstitute.sting.gatk.walkers.Multiplexer; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.StingException; - -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 12:09 PM - * To change this template use File | Settings | File Templates. - */ -public class RegionalAssociationMultiplexer implements Multiplexer> { - - Set> contexts = null; - boolean bedGraphOut = false; - - public RegionalAssociationMultiplexer(String[] toUse, boolean wiggleOutput) { - super(); - contexts = getAssociations(toUse); - bedGraphOut = wiggleOutput; - } - - public Collection> multiplex() { - return contexts; - } - - public String transformArgument(final Class context, String arg) { - if ( bedGraphOut ) { - return String.format("%s.%s.bedgraph",arg,context.getSimpleName()); - } - return String.format("%s.%s.tdf", arg, context.getSimpleName()); - } - - private Set> getAssociations(String[] associationsToUse) { - List> contexts = new PluginManager(AssociationContext.class).getPlugins(); - - if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) { - return new HashSet>((Collection)contexts); - } - - Map> classNameToClass = new HashMap>(contexts.size()); - for ( Class clazz : contexts ) { - classNameToClass.put(clazz.getSimpleName(),clazz); - } - - Set> validAssociations = new HashSet>(); - for ( String s : associationsToUse ) { - validAssociations.add(classNameToClass.get(s)); - } - return validAssociations; - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java deleted file mode 100755 index 56acdbe77..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java +++ /dev/null @@ -1,270 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import cern.colt.function.DoubleDoubleFunction; -import cern.colt.matrix.DoubleMatrix1D; -import cern.colt.matrix.DoubleMatrix2D; -import cern.colt.matrix.impl.DenseDoubleMatrix1D; -import cern.colt.matrix.impl.DenseDoubleMatrix2D; -import cern.colt.matrix.linalg.Algebra; -import cern.colt.matrix.linalg.EigenvalueDecomposition; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/25/11 - * Time: 11:05 AM - * To change this template use File | Settings | File Templates. - */ -public class RegionalAssociationRecalibrator extends RodWalker> implements TreeReducible> { - - @Argument(shortName="w",fullName="whiteningFile",doc="File containing the mean, max vectors and covariance matrix",required=false) - File whitenFile = null; - @Argument(shortName="p",fullName="normParameter",doc="Exponent for vector norm; p > 4 will induce a p=infinity approximation",required=false) - double pNorm = 1.0; - - @Output - public PrintStream out; - - private DataWhitener whitener; - private double[] dataHolder; - private int boundTables = 0; - - public void initialize() { - - for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) { - logger.debug(source.getType().getSimpleName()); - if ( source.getType().equals(BedTableCodec.class)) { - ++boundTables; - if ( ! source.getFile().getName().endsWith(".bedgraph") ) { - throw new UserException("Regional association requires bedgraph files. The file "+source.getFile().getAbsolutePath()+" does not have the proper extension."); - } - } - } - - if ( whitenFile != null ) { - whitener = getWhitenerFromFile(whitenFile); - } - } - - public Set reduceInit() { return new TreeSet(); } - - public LocHashingPair map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext ali ) { - if ( tracker == null ) { return null; } - - ArrayList features = new ArrayList(boundTables); - - for ( GATKFeature feature : tracker.getAllRods()) { - Object uo = feature.getUnderlyingObject(); - if ( uo instanceof TableFeature && feature.getLocation().getStart() == ref.getLocus().getStart()) { - features.add((TableFeature)uo); - } - } - - if ( features.size() == 0 ) { return null; } - if ( features.size() != boundTables ) { - throw new UserException("Features do not line up at position "+ref.getLocus().toString()); - } - - if ( dataHolder == null ) { - dataHolder = new double[features.size()]; - } - - int idx = 0; - for ( TableFeature f : features ) { - dataHolder[idx] = Double.parseDouble(f.getValue(3)); - idx++; - } - - double normVal; - - if ( whitener != null ) { - normVal = calculateNorm(whitener.whiten(dataHolder),pNorm); - } else { - normVal = calculateNorm(dataHolder,pNorm); - } - - return new LocHashingPair(features.get(0).getLocation(),normVal); - } - - public Set reduce(LocHashingPair map, Set red) { - if ( map != null ) { red.add(map); } - return red; - } - - public Set treeReduce(Set left, Set right) { - if ( left == null ) { - return right; - } else if ( right == null ) { - return left; - } else { - left.addAll(right); - return left; - } - } - - public void onTraversalDone(Set normByLoc) { - double[] normRanks = new double[(normByLoc.size())]; - int offset = 0; - for ( LocHashingPair lhp : normByLoc ) { - normRanks[offset]=(lhp.second); - offset++; - } - Arrays.sort(normRanks); - for ( LocHashingPair lhp : normByLoc ) { - int rank = Arrays.binarySearch(normRanks,lhp.second); - // note - equal values will always be assigned the same rank -- however - // it is proper, no need for random assignment. - double prob = (1.0 + normRanks.length - rank)/(1.0+normRanks.length); - int qual = AssociationTestRunner.pToQ(prob); - out.printf("%s\t%d\t%d\t%d\t%.2e\t%d\t%.2e%n",lhp.getFirst().getContig(),lhp.getFirst().getStart(),lhp.getFirst().getStop(), - qual,prob,rank,lhp.getSecond()); - } - } - - private DataWhitener getWhitenerFromFile(File file){ - XReadLines lineReader; - try { - lineReader = new XReadLines(file); - } catch (FileNotFoundException e) { - throw new UserException("The provided whiten file could not be found",e); - } - - // first line is the mean vector - String[] meanLine = lineReader.next().split("\t"); - if ( meanLine.length != boundTables ) { - String msg = String.format("The number of bound bedgraphs does not match the size of the mean value in the whitening file. bound: %d mean : %d",boundTables,meanLine.length); - throw new UserException(msg); - } - double[] mean = new double[meanLine.length]; - for ( int i = 0 ; i < meanLine.length; i ++ ) { - mean[i] = Double.parseDouble(meanLine[i]); - } - // skip - lineReader.next(); - // now the max - String[] maxLine = lineReader.next().split("\t"); - double[] max = new double[maxLine.length]; - for ( int i = 0; i < maxLine.length ; i++ ) { - max[i] = Double.parseDouble(maxLine[i]); - } - // skip - lineReader.next(); - // now the covariance matrix - double[][] cov = new double[max.length][max.length]; - for ( int i = 0; i < max.length; i ++ ) { - String[] row = lineReader.next().split("\t"); - for ( int j = 0; j < max.length; j ++ ) { - cov[i][j] = Double.parseDouble(row[j]); - } - } - - DataWhitener dataWhitener = new DataWhitener(); - dataWhitener.setCov(cov); - dataWhitener.setMaxs(max); - dataWhitener.setMeans(mean); - - return dataWhitener; - } - - private static double calculateNorm(double[] vec, double p) { - double sumExp = 0.0; - for ( double v : vec ) { - sumExp += Math.pow(Math.abs(v),p); - } - - return Math.pow(sumExp,1.0/p); - } - - class DataWhitener { - - private final Algebra ALGEBRA = new Algebra(); - private final DoubleDoubleFunction SUBTRACT = new DoubleDoubleFunction() { - // note: want vectors that have NaN to have norm calculated within the - // n-k dimensional subspace. This is equivalent to setting the NaNs to - // a post-whitening value of zero. - @Override - public double apply(double v, double v1) { - if ( Double.compare(v,Double.NaN) == 0 || Double.compare(v1,Double.NaN) == 0 ) { - return 0.0; - } - - if ( Double.compare(v,Double.POSITIVE_INFINITY) == 0 ) { return 50.0; } // todo -- fixme - if ( Double.compare(v,Double.NEGATIVE_INFINITY) == 0 ) { return -50.0; } // todo -- fixme, really should be an exception - return v - v1; - } - }; - - private DenseDoubleMatrix1D means; - private DenseDoubleMatrix1D maxAbs; - private DoubleMatrix2D transform; - - public DataWhitener() { - } - - public void setMeans(double[] inMeans) { - means = new DenseDoubleMatrix1D(inMeans); - } - - public void setMaxs(double[] maxs) { - maxAbs = new DenseDoubleMatrix1D(maxs); - } - - public void setCov(double[][] cov) { - EigenvalueDecomposition decomp = new EigenvalueDecomposition(new DenseDoubleMatrix2D(cov)); - DoubleMatrix2D diag = decomp.getD(); - for ( int i = 0; i < diag.rows(); i ++ ) { - // do not artificially inflate signal that is not there - if ( diag.get(i,i) < 1.0 ) { - diag.set(i,i, 1.0); - } else { - diag.set(i,i, Math.pow(diag.get(i,i),-0.5)); - } - } - transform = ALGEBRA.mult(diag,ALGEBRA.transpose(decomp.getV())); - logger.debug("TRANSFORM:"); - logger.debug(transform); - } - - public double[] whiten(double[] value) { - DenseDoubleMatrix1D vec = new DenseDoubleMatrix1D(value); - return ALGEBRA.mult(transform, vec.assign(means, SUBTRACT) ).toArray(); - } - } - - class LocHashingPair extends Pair implements Comparable { - - public LocHashingPair(GenomeLoc g, Double d) { - super(g,d); - } - - public int hashCode() { return first.hashCode(); } - - public int compareTo(Object o) { - if ( o instanceof LocHashingPair ) { - return this.first.compareTo(((LocHashingPair) o).first); - } else { - return Integer.MIN_VALUE; - } - } - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java deleted file mode 100755 index dec7d2808..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ /dev/null @@ -1,156 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Downsample; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.Multiplex; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.wiggle.WiggleWriter; - -import java.io.PrintStream; -import java.lang.reflect.Modifier; -import java.util.*; - -/** - * @Author chartl - * @Date 2011-02-23 - * Generalized framework for regional (windowed) associations - * -- todos -- - * : todo -- - * Cap statistics output (sometimes see Infinity or -Infinity) [fixed cap, or by k*max_seen_so_far] - */ -@Downsample(by= DownsampleType.NONE) -public class RegionalAssociationWalker extends LocusWalker implements TreeReducible { - @Argument(doc="Association type(s) to use. Supports multiple arguments (-AT thing1 -AT thing2).",shortName="AT",fullName="associationType",required=false) - public String[] associationsToUse = null; - @Argument(doc="Change output file to bedgraph format (s p q, not STAT: s P: p Q: q",shortName="bg",fullName="bedgraph",required=false) - public boolean bedGraph = false; - @Argument(doc="Set the window size for associations to this value",shortName="w",fullName="window",required=false) - public int windowSize = 50; - @Argument(doc="Set the window sliding value for associations to this value",shortName="s",fullName="slide",required=false) - public int slideBy = 10; - @Argument(doc="Set the exercise-wide constant Z-value for delta-measure",shortName="z",fullName="zValue",required=false) - public double zVal = 6.0; - // for now apply this to t-tests too -- though df means the percentile is not constant, most - // dfs are large, so it doesn't really vary all that much - - @Output - @Multiplex(value=RegionalAssociationMultiplexer.class,arguments={"associationsToUse","bedGraph"}) - Map out; - - public void initialize() { - if ( windowSize < 1 ) { throw new UserException("Window size cannot be less than one."); } - - for ( Sample s : getSamples() ) { - if ( s.getProperty("cohort") == null ) { - throw new UserException("Sample "+s.getId()+" does not have a cohort property associated with it. "+ - "Please ensure that metadata is bound with -SM and that sample "+s.getId()+" has the cohort property assigned."); - } - } - - Set validAssociations = getAssociations(); - - if ( bedGraph ) { - writeBedGraphHeaders(validAssociations); - } - } - - public RegionalAssociationHandler reduceInit() { - Set validAssociations = getAssociations(); - RegionalAssociationHandler wac = new RegionalAssociationHandler(validAssociations,getSamples(),bedGraph); - - return wac; - } - - public MapHolder map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return new MapHolder(tracker,ref,context); - } - - public RegionalAssociationHandler reduce(MapHolder map, RegionalAssociationHandler rac) { - rac.updateExtender(map); - Map testResults; - try { - testResults = rac.runMapReduce(); - } catch (Exception e) { - throw new StingException("Error in map reduce",e); - } - - if ( testResults.size() > 0 ) { - for ( Map.Entry result : testResults.entrySet() ) { - out.get(result.getKey().getClass()).printf("%s%n",result.getValue()); - } - } - return rac; - } - - public Set getAssociations() { - List> contexts = new PluginManager(AssociationContext.class).getPlugins(); - - if ( associationsToUse.length > 0 && associationsToUse[0].equals("ALL") ) { - HashSet allAssoc = new HashSet(contexts.size()); - for ( Class clazz : contexts ) { - AssociationContext context; - try { - context = clazz.getConstructor(new Class[] {}).newInstance(new Object[] {}); - context.init(this); - } catch (Exception e ) { - throw new StingException("The class "+clazz.getSimpleName()+" could not be instantiated",e); - } - allAssoc.add(context); - } - return allAssoc; - } - - - Map> classNameToClass = new HashMap>(contexts.size()); - for ( Class clazz : contexts ) { - classNameToClass.put(clazz.getSimpleName(),clazz); - } - - Set validAssociations = new HashSet(); - for ( String s : associationsToUse ) { - AssociationContext context; - try { - context = classNameToClass.get(s).getConstructor(new Class[]{}).newInstance(new Object[] {}); - context.init(this); - } catch ( Exception e ) { - throw new StingException("The class "+s+" could not be instantiated.",e); - } - validAssociations.add(context); - } - return validAssociations; - } - - public RegionalAssociationHandler treeReduce(RegionalAssociationHandler left, RegionalAssociationHandler right) { - // for now be dumb; in future fix the fact that left-most intervals of a 16kb shard won't see the context from - // the right-most locus of the previous shard - return right; - } - - public void onTraversalDone(RegionalAssociationHandler rac) { - // do nothing - } - - public Set getSamples() { - return getToolkit().getSAMFileSamples(); - } - - public void writeBedGraphHeaders(Set cons) { - for ( AssociationContext con : cons ) { - String header = String.format("track type=bedGraph name=%s description=stat,p,q,dichot,logdichot",con.getClass().getSimpleName()); - out.get(con.getClass()).printf("%s%n",header); - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java deleted file mode 100755 index e3ca95418..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/BaseQualityScore.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:52 PM - * To change this template use File | Settings | File Templates. - */ -public class BaseQualityScore extends ValueTest { - - public Collection map(ReadBackedPileup rbp) { - ArrayList baseQuals = new ArrayList(rbp.size()); - for (PileupElement e : rbp ) { - baseQuals.add((int)e.getQual()); - } - - return (Collection) baseQuals; - } - - public boolean usePreviouslySeenReads() { return true; } - public boolean useTStatistic() { return true; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java deleted file mode 100755 index bd57d8488..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/CaseControl.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationContext; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * @author chartl - */ -public abstract class CaseControl extends AssociationContext { - - public Map getCaseControl() { - return reduce(window); - } - - public Map reduce(List> window) { - X accumCase = null; - X accumControl = null; - for ( Map sampleXMap : window ) { - for ( Map.Entry entry : sampleXMap.entrySet() ) { - boolean isCase; - try { - isCase = entry.getKey().getProperty("cohort").equals("case"); - } catch (NullPointerException e) { - throw new UserException("Sample "+entry.getKey().getId()+" does not have a cohort assigned"); - } - if ( entry.getKey().getProperty("cohort").equals("case") ) { - accumCase = accum(accumCase, entry.getValue()); - } else if ( entry.getKey().getProperty("cohort").equals("control") ) { - accumControl = accum(accumControl,entry.getValue()); - } - } - } - - Map cohortMap = new HashMap(2); - cohortMap.put(Cohort.CASE,accumCase); - cohortMap.put(Cohort.CONTROL,accumControl); - return cohortMap; - } - - protected X accum(X left, X right) { - if ( left == null ) { - return right; - } - if ( right == null ) { - return left; - } - return add(left,right); - } - - public abstract X map(ReadBackedPileup rbp); - public abstract X add(X left, X right); - - public enum Cohort { CASE,CONTROL } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/FisherExact.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/FisherExact.java deleted file mode 100755 index f7a0587e3..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/FisherExact.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Feb 24, 2011 - * Time: 12:58:56 PM - * To change this template use File | Settings | File Templates. - */ -public interface FisherExact { - public abstract int[][] getCounts(); - public abstract String[] getRowNames(); - public abstract String[] getColNames(); -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java deleted file mode 100755 index af6d77099..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/InsertSizeDistribution.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * @author chartl - */ -public class InsertSizeDistribution extends ValueTest { - private final int MAPQ_THRESHOLD = 5; - - public boolean usePreviouslySeenReads() { return false; } - - public Collection map(ReadBackedPileup pileup) { - List insertSizes = new ArrayList(pileup.size()); - for ( PileupElement e : pileup ) { - if ( e.getMappingQual() >= MAPQ_THRESHOLD ) { - insertSizes.add(Math.abs(e.getRead().getInferredInsertSize())); - } - } - - return (Collection) insertSizes; - } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java deleted file mode 100755 index 83081aaef..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MappingQuality0.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:29 PM - * To change this template use File | Settings | File Templates. - */ -public class MappingQuality0 extends ProportionTest { - - public Pair map(ReadBackedPileup rbp) { - int total = 0; - int mq0 = 0; - for ( PileupElement e : rbp ) { - ++total; - if (e.getMappingQual() == 0) - ++mq0; - } - - return new Pair(mq0,total); - } - - public boolean usePreviouslySeenReads() { return false; } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java deleted file mode 100755 index 2e27a69e2..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateMappingQuality.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:43 PM - * To change this template use File | Settings | File Templates. - */ -public class MateMappingQuality extends ValueTest { - - public Collection map(ReadBackedPileup rbp) { - ArrayList mateMapQ = new ArrayList(rbp.size()); - for ( PileupElement e : rbp ) { - if ( e.getRead().getReadPairedFlag() && e.getRead().getAttribute("MQ") != null) { - mateMapQ.add((Integer) e.getRead().getAttribute("MQ")); - } - } - - return (Collection) mateMapQ; - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java deleted file mode 100755 index f13b7dd66..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateOtherContig.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/1/11 - * Time: 3:00 PM - * To change this template use File | Settings | File Templates. - */ -public class MateOtherContig extends ProportionTest { - - private final int MAPQ_THRESHOLD = 5; - - public boolean usePreviouslySeenReads() { return false; } - - public Pair map(ReadBackedPileup pileup) { - int tot = 0; - int otherCon = 0; - for ( PileupElement e : pileup ) { - if ( e.getRead().getReadPairedFlag() && e.getRead().getMappingQuality() >= MAPQ_THRESHOLD ) { - ++tot; - if ( ! e.getRead().getMateReferenceIndex().equals(e.getRead().getReferenceIndex()) ) { - ++otherCon; - } - } - } - - return new Pair(otherCon,tot); - } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java deleted file mode 100755 index 8e0968e51..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateSameStrand.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:47 PM - * To change this template use File | Settings | File Templates. - */ -public class MateSameStrand extends ProportionTest { - private final int MAPQ_THRESHOLD = 5; - - public Pair map(ReadBackedPileup rbp) { - int numPairs = 0; - int mateSameStrand = 0; - for (PileupElement e : rbp ) { - if ( e.getRead().getReadPairedFlag() && e.getMappingQual() >= MAPQ_THRESHOLD) { - ++numPairs; - if ( e.getRead().getMateNegativeStrandFlag() == e.getRead().getReadNegativeStrandFlag() ) { - ++mateSameStrand; - } - } - } - - return new Pair(mateSameStrand,numPairs); - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java deleted file mode 100755 index 9b0e195d0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MateUnmapped.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/2/11 - * Time: 2:09 PM - * To change this template use File | Settings | File Templates. - */ -public class MateUnmapped extends ProportionTest { - private final int MAPQ_THRESHOLD = 5; - - public Pair map(ReadBackedPileup pileup) { - int numMatedReads = 0; - int numPairUnmapped = 0; - for (PileupElement e : pileup ) { - if (e.getRead().getReadPairedFlag() && e.getMappingQual() >= MAPQ_THRESHOLD ) { - ++numMatedReads; - if ( e.getRead().getMateUnmappedFlag() ) { - ++numPairUnmapped; - } - } - } - - return new Pair(numPairUnmapped,numMatedReads); - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java deleted file mode 100755 index 18137e71a..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/MismatchRate.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender; -import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/8/11 - * Time: 2:32 PM - * To change this template use File | Settings | File Templates. - */ -public class MismatchRate extends ValueTest { - - private Map sampleStats = new HashMap(); - private int currentRefBase = 0; - - public void init(RegionalAssociationWalker walker) { - super.init(walker); - Set samples = walker.getSamples(); - for ( Sample s : samples ) { - if ( s.hasProperty("mismatch_rate.mean") && s.hasProperty("mismatch_rate.std") ) { - double mn = (Double) s.getProperty("mismatch_rate.mean"); - double std = (Double) s.getProperty("mismatch_rate.std"); - sampleStats.put(s.getId(),new Pair(mn,std)); - } else { - sampleStats.put(s.getId(),new MathUtils.RunningAverage()); - } - } - } - - @Override - public Map mapLocus(MapExtender extender) { - currentRefBase = BaseUtils.simpleBaseToBaseIndex(extender.getReferenceContext().getBase()); - Map pileups = extender.getReadFilteredPileup(); - Map maps = new HashMap(pileups.size()); - for ( Map.Entry samPileup : pileups.entrySet() ) { - maps.put(samPileup.getKey(),map(samPileup.getKey(),samPileup.getValue())); - } - - return maps; - } - - public Collection map(Sample sample, ReadBackedPileup pileup) { - Object stats = sampleStats.get(sample.getId()); - double mn; - double std; - double mmr = calcMMR(pileup); - if ( stats instanceof Pair ) { - mn = ((Pair)stats).first; - std = ((Pair)stats).second; - } else { - MathUtils.RunningAverage ra = (MathUtils.RunningAverage) stats; - mn = ra.mean(); - std = ra.stddev(); - if ( std <= 0.0 ) { - std = 1.0; - } - ra.add(mmr); - } - - return Arrays.asList((Number) ((mmr - mn) / std)); - } - - public double calcMMR(ReadBackedPileup rbp) { - int[] counts = rbp.getBaseCounts(); - int total = 0; - int nonref = 0; - for ( int base : new int[]{0,1,2,3} ) { - total += counts[base]; - if ( base != currentRefBase ) { - nonref += counts[base]; - } - } - - return ((double)nonref)/total; - } - - // note: this is to satisfy the interface, and is never called due to override - public Collection map(ReadBackedPileup pileup) { return null; } - - public boolean usePreviouslySeenReads() { return true; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java deleted file mode 100755 index 0226841e6..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProperPairs.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:50 PM - * To change this template use File | Settings | File Templates. - */ -public class ProperPairs extends ProportionTest { - private final int MAPQ_THRESHOLD = 5; - - public Pair map(ReadBackedPileup rbp) { - int numReads = 0; - int numPropPair = 0; - for (PileupElement e : rbp ) { - if ( e.getRead().getReadPairedFlag() && e.getMappingQual() >= MAPQ_THRESHOLD ) { - ++numReads; - if ( e.getRead().getReadPairedFlag() && e.getRead().getProperPairFlag() ) { - ++numPropPair; - } - } - } - - return new Pair(numPropPair,numReads); - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java deleted file mode 100755 index b39dd5174..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ProportionTest.java +++ /dev/null @@ -1,91 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import cern.jet.random.Normal; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Feb 24, 2011 - * Time: 12:58:38 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class ProportionTest extends CaseControl> implements Dichotomizable, StatisticalTest { - public static final Normal standardNormal = new Normal(0.0,1.0,null); - - public abstract Pair map(ReadBackedPileup rbp ); - - public Pair add(Pair left, Pair right) { - return new Pair(left.first.doubleValue()+right.first.doubleValue(), - left.second.doubleValue()+right.second.doubleValue()); - } - - public Pair,Collection> getDichotomizedData() { - Collection caseProps = new ArrayList(); - Collection controlProps = new ArrayList(); - - for (Map> sampleNumberMap : window ) { - for ( Map.Entry> samplePairEntry : sampleNumberMap.entrySet() ) { - Pair val = samplePairEntry.getValue(); - if ( samplePairEntry.getKey().getProperty("cohort").equals("case")) { - caseProps.add(val.first.doubleValue()/val.second.doubleValue()); - } else if ( samplePairEntry.getKey().getProperty("cohort").equals("control") ) { - controlProps.add(val.first.doubleValue()/val.second.doubleValue()); - } - } - } - - return new Pair,Collection>(caseProps,controlProps); - } - - public Pair getStatisticAndPValue() { - Map> caseControlCounts = getCaseControl(); - if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) { - return new Pair(Double.NaN,Double.NaN); - } - double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - - double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/ - (caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue()); - double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl)); - - double z = (pCase-pControl)/se; - double p = z < 0 ? 2*standardNormal.cdf(z) : 2*(1-standardNormal.cdf(z)); - - return new Pair(z,p); - } - - // todo -- this is a temporary method, it needs to be merged in with others if it proves useful - public Double getZConfDelta() { - Map> caseControlCounts = getCaseControl(); - if ( caseControlCounts == null || caseControlCounts.get(CaseControl.Cohort.CASE) == null || caseControlCounts.get(CaseControl.Cohort.CONTROL) == null ) { - return Double.NaN; - } - double pCase = caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double pControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue()/caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - double nCase = caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue(); - double nControl = caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue(); - - double p2 = (caseControlCounts.get(CaseControl.Cohort.CASE).first.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).first.doubleValue())/ - (caseControlCounts.get(CaseControl.Cohort.CASE).second.doubleValue()+caseControlCounts.get(CaseControl.Cohort.CONTROL).second.doubleValue()); - double se = Math.sqrt(p2*(1-p2)*(1/nCase + 1/nControl)); - - double z = (pCase-pControl)/se; - - return ( z < 0 ? -1.0*se*(z-zVal) : se*(z-zVal)); - } - - public String getStatisticName() { return "Z"; } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java deleted file mode 100755 index 6d29f92ef..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadClipping.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:31 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadClipping extends ValueTest { - - public Collection map(ReadBackedPileup rbp) { - ArrayList clipping = new ArrayList(rbp.size()); - for ( PileupElement e : rbp ) { - int clips = 0; - for( CigarElement c : e.getRead().getCigar().getCigarElements() ) { - if ( c.getOperator() == CigarOperator.SOFT_CLIP || c.getOperator() == CigarOperator.HARD_CLIP ) { - clips += c.getLength(); - } - } - clipping.add(clips); - } - - return (Collection) clipping; - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java deleted file mode 100755 index 15704ac23..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadIndels.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:54 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadIndels extends ValueTest { - - public Collection map(ReadBackedPileup rbp) { - ArrayList indelElements = new ArrayList(rbp.size()); - for (PileupElement e : rbp ) { - int indelOps = 0; - for ( CigarElement c : e.getRead().getCigar().getCigarElements()) { - if ( c.getOperator() == CigarOperator.DELETION || c.getOperator() == CigarOperator.INSERTION ) - ++indelOps; - } - indelElements.add(indelOps); - } - - return (Collection) indelElements; - } - - public boolean usePreviouslySeenReads() { return false; } - public boolean useTStatistic() { return true; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java deleted file mode 100755 index e5e2ba727..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadMappingQuality.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/6/11 - * Time: 1:41 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadMappingQuality extends ValueTest { - - public Collection map(ReadBackedPileup rbp) { - ArrayList mapQuals = new ArrayList(rbp.size()); - for ( PileupElement e : rbp ) { - mapQuals.add(e.getMappingQual()); - } - - return (Collection) mapQuals; - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java deleted file mode 100755 index 9095e93d6..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsAberrantInsertSize.java +++ /dev/null @@ -1,44 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import ch.qos.logback.classic.filter.ThresholdFilter; -import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: 4/9/11 - * Time: 3:31 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadsAberrantInsertSize extends ProportionTest { - - private int HI_THRESHOLD; - private int LO_THRESHOLD; - - @Override - public Pair map(ReadBackedPileup rbp) { - int total = 0; - int wonky = 0; - for (PileupElement e : rbp ) { - if ( e.getRead().getReadPairedFlag() ) { - ++total; - if ( e.getRead().getInferredInsertSize() > HI_THRESHOLD || e.getRead().getInferredInsertSize() < LO_THRESHOLD ) { - ++wonky; - } - } - } - - return new Pair(wonky,total); - } - - public void init(RegionalAssociationWalker parent) { - super.init(parent); - HI_THRESHOLD = 200; - LO_THRESHOLD = 50; - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java deleted file mode 100755 index eb32b4af0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReadsWithIndels.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 4/15/11 - * Time: 12:54 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadsWithIndels extends ProportionTest { - - public Pair map(ReadBackedPileup pileup) { - int numReads = 0; - int numWithIndels = 0; - for ( PileupElement e : pileup ) { - ++numReads; - for ( CigarElement element : e.getRead().getCigar().getCigarElements() ) { - if ( element.getOperator().equals(CigarOperator.DELETION) || element.getOperator().equals(CigarOperator.INSERTION) ) { - ++numWithIndels; - break; - } - } - } - - return new Pair(numWithIndels,numReads); - } - - public boolean usePreviouslySeenReads() { return false; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java deleted file mode 100755 index 0abc9930f..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ReferenceMismatches.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/8/11 - * Time: 1:17 PM - * To change this template use File | Settings | File Templates. - */ -public class ReferenceMismatches extends ProportionTest { - - final static int[] BASE_INDEX = {0,1,2,3}; - - int currentRefBase = 0; - - @Override - public Map mapLocus(MapExtender extender) { - currentRefBase = BaseUtils.simpleBaseToBaseIndex(extender.getReferenceContext().getBase()); - return super.mapLocus(extender); - } - - public Pair map(ReadBackedPileup rbp) { - int[] counts = rbp.getBaseCounts(); - int total = 0; - int nonref = 0; - for ( int base : BASE_INDEX ) { - total += counts[base]; - if ( base != currentRefBase ) { - nonref += counts[base]; - } - } - - return new Pair(nonref,total); - } - - public boolean usePreviouslySeenReads() { return true; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java deleted file mode 100755 index 9110ce739..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/SampleDepth.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.MapExtender; -import org.broadinstitute.sting.oneoffprojects.walkers.association.RegionalAssociationWalker; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.*; - -/** - * @author chartl - */ -public class SampleDepth extends ValueTest { - - public Map sampleStats = null; - - // either: the user associates data with the sample (doc.mean, doc.std) - // >OR we calculate the values on-the-fly (e.g. running mean/stdev) - public void init(RegionalAssociationWalker walker) { - super.init(walker); - sampleStats = new HashMap(); - Set samples = walker.getSamples(); - for ( Sample s : samples ) { - if ( s.hasProperty("doc.mean") && s.hasProperty("doc.std") ) { - double mn = (Double) s.getProperty("doc.mean"); - double std = (Double) s.getProperty("doc.std"); - sampleStats.put(s.getId(),new Pair(mn,std)); - } else { - sampleStats.put(s.getId(),new MathUtils.RunningAverage()); - } - } - } - - @Override - public Map mapLocus(MapExtender extender) { - Map pileups = extender.getReadFilteredPileup(); - Map maps = new HashMap(pileups.size()); - for ( Map.Entry samPileup : pileups.entrySet() ) { - maps.put(samPileup.getKey(),map(samPileup.getKey(),samPileup.getValue())); - } - - return maps; - } - - public Collection map(Sample sample, ReadBackedPileup pileup) { - Object stats = sampleStats.get(sample.getId()); - double mn; - double std; - if ( stats instanceof Pair ) { - mn = ((Pair)stats).first; - std = ((Pair)stats).second; - } else { - MathUtils.RunningAverage ra = (MathUtils.RunningAverage) stats; - mn = ra.mean(); - std = ra.stddev(); - if ( std <= 0.0 ) { - std = 1.0; - } - ra.add(pileup.size()); - } - - return Arrays.asList((Number)((pileup.size()-mn)/std)); - } - - // note: this is to satisfy the interface, and is never called due to override - public Collection map(ReadBackedPileup pileup) { return null; } - - public boolean usePreviouslySeenReads() { return true; } - - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java deleted file mode 100755 index 0c344bb63..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/modules/casecontrol/ValueTest.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol; - -import cern.jet.random.StudentT; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.Dichotomizable; -import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.StatisticalTest; -import org.broadinstitute.sting.utils.MannWhitneyU; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/2/11 - * Time: 1:53 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class ValueTest extends CaseControl> implements StatisticalTest, Dichotomizable { - - public abstract Collection map(ReadBackedPileup rbp ); - - public boolean useTStatistic() { return false; } // default to using the U statistic - - public Collection add(Collection left, Collection right) { - if ( left instanceof ArrayList ) { - ((ArrayList) left).addAll(right); - return left; - } else if ( left instanceof Set) { - ((Set) left).addAll(right); - return left; - } else { - List newList = new ArrayList(left.size()+right.size()); - newList.addAll(left); - newList.addAll(right); - return newList; - } - } - - public Pair,Collection> getDichotomizedData() { - Collection caseMeans = new ArrayList(); - Collection controlMeans = new ArrayList(); - - for ( Map> sampleMap : window ) { - for ( Map.Entry> sampleEntry : sampleMap.entrySet() ) { - if ( sampleEntry.getKey().getProperty("cohort").equals("case") ) { - caseMeans.add(MathUtils.average(sampleEntry.getValue(),true)); - } else if ( sampleEntry.getKey().getProperty("cohort").equals("control") ) { - controlMeans.add(MathUtils.average(sampleEntry.getValue(),true)); - } - } - } - - return new Pair,Collection>(caseMeans,controlMeans); - } - - public Pair getUStatisticAndPValue() { - MannWhitneyU mwu = new MannWhitneyU(); - - for ( Map.Entry> cohortEntry : getCaseControl().entrySet() ) { - if ( cohortEntry.getKey().equals(Cohort.CASE) && cohortEntry.getValue() != null ) { - for ( Number n : cohortEntry.getValue() ) { - mwu.add(n,MannWhitneyU.USet.SET1); - } - } else if ( cohortEntry.getKey().equals(Cohort.CONTROL) && cohortEntry.getValue() != null) { - for ( Number n : cohortEntry.getValue() ) { - mwu.add(n,MannWhitneyU.USet.SET2); - } - } - } - - return mwu.runTwoSidedTest(); - } - - public Pair getStatisticAndPValue() { return useTStatistic() ? getTStatisticAndPValue() : getUStatisticAndPValue(); } - - public String getStatisticName() { return useTStatistic() ? "T" : "V"; } - - public Pair getTStatisticAndPValue() { - Map> caseControlVectors = getCaseControl(); - if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { - return new Pair(Double.NaN,Double.NaN); - } - double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE),true); - double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase,true); - double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size(); - double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL),true); - double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl,true); - double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size(); - - double df_num = Math.pow(varCase/nCase + varControl/nControl,2); - double df_denom = Math.pow(varCase/nCase,2)/(nCase-1) + Math.pow(varControl/nControl,2)/(nControl-1); - double t = (meanCase-meanControl)/Math.sqrt(varCase/nCase+varControl/nControl); - - StudentT studentT = new StudentT(df_num/df_denom,null); - double p = t < 0 ? 2*studentT.cdf(t) : 2*(1-studentT.cdf(t)); - - return new Pair(t,p); - } - - public Double getZConfDelta() { - Map> caseControlVectors = getCaseControl(); - if ( caseControlVectors == null || caseControlVectors.get(CaseControl.Cohort.CASE) == null || caseControlVectors.get(CaseControl.Cohort.CONTROL) == null ) { - return Double.NaN; - } - double meanCase = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CASE),true); - double varCase = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CASE),meanCase,true); - double nCase = caseControlVectors.get(CaseControl.Cohort.CASE).size(); - double meanControl = MathUtils.average(caseControlVectors.get(CaseControl.Cohort.CONTROL),true); - double varControl = MathUtils.variance(caseControlVectors.get(CaseControl.Cohort.CONTROL),meanControl,true); - double nControl = caseControlVectors.get(CaseControl.Cohort.CONTROL).size(); - double dnom = Math.sqrt(varCase/nCase+varControl/nControl); - - double t = (meanCase-meanControl)/dnom; - - return ( t < 0 ? -1.0*dnom*(t-zVal) : dnom*(t-zVal)); - } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java deleted file mode 100755 index 29e985295..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizable.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; - -import org.broadinstitute.sting.utils.collections.Pair; - -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: 4/5/11 - * Time: 11:35 AM - * To change this template use File | Settings | File Templates. - */ -public interface Dichotomizable { - public abstract Pair,Collection> getDichotomizedData(); -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java deleted file mode 100755 index 82922d316..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/Dichotomizer1D.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; - -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.util.ArrayList; -import java.util.Collection; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 4/4/11 - * Time: 7:18 PM - * To change this template use File | Settings | File Templates. - */ -public class Dichotomizer1D { - - public Dichotomizer1D() { } // instantiation required for access to transform - - public abstract class Transform { - public abstract double transform(double n); - - public Collection apply(Collection collection) { - ArrayList tData = new ArrayList(collection.size()); - for ( Number n : collection ) { - tData.add(transform(n.doubleValue())); - } - - return tData; - } - - public Transform() { - - } - } - - /** - * Tries to measure effect size by the separation of setOne and setTwo clusters, with a window spread factor of 3 - * @param setOne - collection of data from set one - * @param setTwo - colleciton of data from set two - * @return - the so-called "Z"-factor (effect size/spread) - */ - public static double simpleGaussianDichotomy(Collection setOne, Collection setTwo) { - double meanOne = MathUtils.average(setOne,true); - double meanTwo = MathUtils.average(setTwo,true); - double stdOne = Math.sqrt(MathUtils.variance(setOne, meanOne,true)); - double stdTwo = Math.sqrt(MathUtils.variance(setTwo, meanTwo,true)); - /* - System.out.print("setOne: "); - for ( Number n : setOne ) { - System.out.printf(",%.2f",n.doubleValue()); - } - System.out.print("\tsetTwo: "); - for ( Number n : setTwo ) { - System.out.printf(",%.2f",n.doubleValue()); - } - - System.out.printf("\tmn1: %.2f mn2: %.2f var1: %.2f var2: %.2f%n",meanOne,meanTwo,stdOne,stdTwo); - */ - return 1.0 - (3.0*(stdOne+stdTwo))/Math.abs(meanOne-meanTwo); - } - - public static double simpleGaussianDichotomy(Collection setOne, Collection setTwo, Transform transform) { - return simpleGaussianDichotomy(transform.apply(setOne),transform.apply(setTwo)); - } - - public static double simpleGaussianDichotomy(Dichotomizable dichotomizable) { - Pair,Collection> dichotomizedData = dichotomizable.getDichotomizedData(); - return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second); - } - - public static double simpleGaussianDichotomy(Dichotomizable dichotomizable, Transform transform) { - Pair,Collection> dichotomizedData = dichotomizable.getDichotomizedData(); - return simpleGaussianDichotomy(dichotomizedData.first,dichotomizedData.second, transform); - } - - public static Pair twoMeansDichotomy() { return null; } - -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java deleted file mode 100755 index f3b66ca3f..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/statistics/StatisticalTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers.association.statistics; - -import org.broadinstitute.sting.utils.collections.Pair; - -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: 4/5/11 - * Time: 12:26 PM - * To change this template use File | Settings | File Templates. - */ -public interface StatisticalTest { - public abstract Pair getStatisticAndPValue(); - public abstract String getStatisticName(); - public abstract Double getZConfDelta(); -} diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RFAUnitTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RFAUnitTest.java new file mode 100755 index 000000000..bf8b8194c --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RFAUnitTest.java @@ -0,0 +1,91 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ProportionTest; +import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ValueTest; +import org.broadinstitute.sting.utils.MannWhitneyU; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; +import org.testng.Assert; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: 3/5/11 + * Time: 2:06 PM + * To change this template use File | Settings | File Templates. + */ +public class RFAUnitTest extends BaseTest { + @BeforeClass + public void init() { } + + @Test + private void testMWU() { + logger.warn("Testing MWU"); + MannWhitneyU mwu = new MannWhitneyU(); + mwu.add(0, MannWhitneyU.USet.SET1); + mwu.add(1,MannWhitneyU.USet.SET2); + mwu.add(2,MannWhitneyU.USet.SET2); + mwu.add(3,MannWhitneyU.USet.SET2); + mwu.add(4,MannWhitneyU.USet.SET2); + mwu.add(5,MannWhitneyU.USet.SET2); + mwu.add(6,MannWhitneyU.USet.SET1); + mwu.add(7,MannWhitneyU.USet.SET1); + mwu.add(8,MannWhitneyU.USet.SET1); + mwu.add(9,MannWhitneyU.USet.SET1); + mwu.add(10,MannWhitneyU.USet.SET1); + mwu.add(11,MannWhitneyU.USet.SET2); + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(), MannWhitneyU.USet.SET1),25l); + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11l); + + MannWhitneyU mwu2 = new MannWhitneyU(); + for ( int dp : new int[]{2,4,5,6,8} ) { + mwu2.add(dp,MannWhitneyU.USet.SET1); + } + + for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) { + mwu2.add(dp,MannWhitneyU.USet.SET2); + } + + // tests using the hypothesis that set 2 dominates set 1 (U value = 10) + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10l); + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30l); + Pair sizes = mwu2.getSetSizes(); + Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.first,sizes.second,10l),0.4180519701814064,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.first,sizes.second,10l),0.021756021756021756,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.first,sizes.second,10l),0.06214143703127617,1e-14); + Assert.assertEquals((double)mwu2.runTwoSidedTest().second,2*0.021756021756021756,1e-8); + + // tests using the hypothesis that set 1 dominates set 2 (U value = 30) -- empirical should be identical, normall approx close, uniform way off + Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.second,sizes.first,30l),0.08216463976903321,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.second,sizes.first,30l),0.0023473625009328147,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,30l),0.021756021756021756,1e-14); // note -- exactly same value as above + + logger.warn("Set 1"); + Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8); + logger.warn("Set 2"); + Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET2).second,0.021756021756021756,1e-8); + + MannWhitneyU mwu3 = new MannWhitneyU(); + for ( int dp : new int[]{0,2,4} ) { + mwu3.add(dp,MannWhitneyU.USet.SET1); + } + for ( int dp : new int[]{1,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34} ) { + mwu3.add(dp,MannWhitneyU.USet.SET2); + } + long u = MannWhitneyU.calculateOneSidedU(mwu3.getObservations(),MannWhitneyU.USet.SET1); + Pair nums = mwu3.getSetSizes(); + Assert.assertEquals(MannWhitneyU.calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(nums.first,nums.second,u),3.665689149560116E-4,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(nums.first,nums.second,u),0.0032240865760884696,1e-14); + Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(nums.first,nums.second,u),0.0026195003025784036,1e-14); + + } +} diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java deleted file mode 100755 index c046270b7..000000000 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/RegionalAssociationUnitTest.java +++ /dev/null @@ -1,222 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.walkers; - -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ProportionTest; -import org.broadinstitute.sting.oneoffprojects.walkers.association.modules.casecontrol.ValueTest; -import org.broadinstitute.sting.utils.MannWhitneyU; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.oneoffprojects.walkers.association.AssociationTestRunner; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; -import org.testng.Assert; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: 3/5/11 - * Time: 2:06 PM - * To change this template use File | Settings | File Templates. - */ -public class RegionalAssociationUnitTest extends BaseTest { - @BeforeClass - public void init() { } - - @Test - private void testTStatistics() { - logger.warn("Testing T statistics"); - TTest test1 = new TTest(); - test1.setCaseData((Collection) Arrays.asList(1,1,2,3,4)); - test1.setControlData((Collection) Arrays.asList(10, 10, 20, 30, 40)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.02697891,1e-8); - TTest test2 = new TTest(); - test2.setCaseData((Collection) Arrays.asList(5, 6, 5, 2, 3, 8, 7, 12, 10, 6, 4, 2, 8, 7, 3)); - test2.setControlData((Collection) Arrays.asList(1, 6, 7, 2, 3, 3, 4, 1, 2, 5, 7, 3, 10, 3, 3, 2, 3)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test2).second.first, 0.04100948, 1e-8); - TTest test3 = new TTest(); - test3.setCaseData((Collection) Arrays.asList(94,25,68,4,27,51,9,10,91,61,61,37,39,44,36,27,86,33,3,38,5,6,28,93,30,56,81,8,40,44)); - test3.setControlData((Collection) Arrays.asList(6,64,96,85,20,74,93,18,31,20,88,38,80,50,33,81,35,8,2,69,49,6,26,74,79,63,63,96,45,18)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.2309763,1e-7); - TTest test4 = new TTest(); - test4.setCaseData((Collection) Arrays.asList(14,8,8,17,8,12,10,10,13,9,13,9,9,12,12,11,16,12,13,16,10,13,11,16,13,16,11,13,9,16,16,14,9,14,17,10,15,15,9,15,17,15,17,12,10,13,11,14,8,14)); - test4.setControlData((Collection) Arrays.asList(7,1,4,2,3,7,8,5,5,4,10,6,4,9,2,9,9,3,3,10,1,8,9,5,3,7,2,7,10,9,4,9,2,10,10,3,2,3,4,4,5,10,9,4,3,5,6,10,5,10)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test4).second.first,4.291725e-20,1e-19); - Assert.assertEquals(AssociationTestRunner.getTestValues(test4).first.first,11.60592,1e-5); - } - - @Test - private void testZStatistics() { - logger.warn("Testing Z statistics"); - ZTest test1 = new ZTest(); - test1.setCaseData(new Pair(100,500)); - test1.setControlData(new Pair(55,300)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first.first,0.57742362050306,2e-6); - Assert.assertEquals(AssociationTestRunner.getTestValues(test1).second.first,0.56367,2e-5); - ZTest test2 = new ZTest(); - test1.setCaseData(new Pair(1020, 1800)); - test1.setControlData(new Pair(680, 1670)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test1).first.first,9.3898178216531,2e-6); - ZTest test3 = new ZTest(); - test3.setCaseData(new Pair(20,60)); - test3.setControlData(new Pair(30,80)); - Assert.assertEquals(AssociationTestRunner.getTestValues(test3).first.first,-0.50917511840392,2e-6); - Assert.assertEquals(AssociationTestRunner.getTestValues(test3).second.first,0.610643593,2e-4); - } - - @Test (enabled = false) - private void testUStatistic() { - logger.warn("Testing U statistics"); - UTest test1 = new UTest(); - test1.setCaseData((Collection) Arrays.asList(2,4,5,6,8)); - test1.setControlData((Collection) Arrays.asList(1,3,7,9,10,11,12,13)); - Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test1).first,-1.537,1e-4); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.092292,5e-2); // z-approximation, off by about 0.05 - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test1).second,0.044444,1e-3); // recursive calculation - UTest test2 = new UTest(); - test2.setCaseData((Collection) Arrays.asList(1,7,8,9,10,11,15,18)); - test2.setControlData((Collection) Arrays.asList(2,3,4,5,6,12,13,14,16,17)); - Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test2).first,-0.3109831608,1e-10); - UTest test3 = new UTest(); - test3.setCaseData((Collection)Arrays.asList(13,14,7,18,5,2,9,17,8,10,3,15,19,6,20,16,11,4,12,1)); - test3.setControlData((Collection) Arrays.asList(29,21,14,10,12,11,28,19,18,13,7,27,20,5,17,16,9,23,22,26)); - Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test3).first,-2.907884571802469,1e-14); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test3).second,2*0.00302,1e-3); - UTest test4 = new UTest(); - test4.setCaseData((Collection) Arrays.asList(1,2,4,5,6,9)); - test4.setControlData((Collection) Arrays.asList(3,8,11,12,13)); - Assert.assertEquals((double) AssociationTestRunner.mannWhitneyUTest(test4).first,-1.9170289512680814,1e-14); - Assert.assertEquals(AssociationTestRunner.mannWhitneyUTest(test4).second,0.0303,1e-4); - - } - - @Test - private void testMWU() { - logger.warn("Testing MWU"); - MannWhitneyU mwu = new MannWhitneyU(); - mwu.add(0, MannWhitneyU.USet.SET1); - mwu.add(1,MannWhitneyU.USet.SET2); - mwu.add(2,MannWhitneyU.USet.SET2); - mwu.add(3,MannWhitneyU.USet.SET2); - mwu.add(4,MannWhitneyU.USet.SET2); - mwu.add(5,MannWhitneyU.USet.SET2); - mwu.add(6,MannWhitneyU.USet.SET1); - mwu.add(7,MannWhitneyU.USet.SET1); - mwu.add(8,MannWhitneyU.USet.SET1); - mwu.add(9,MannWhitneyU.USet.SET1); - mwu.add(10,MannWhitneyU.USet.SET1); - mwu.add(11,MannWhitneyU.USet.SET2); - Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(), MannWhitneyU.USet.SET1),25l); - Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11l); - - MannWhitneyU mwu2 = new MannWhitneyU(); - for ( int dp : new int[]{2,4,5,6,8} ) { - mwu2.add(dp,MannWhitneyU.USet.SET1); - } - - for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) { - mwu2.add(dp,MannWhitneyU.USet.SET2); - } - - // tests using the hypothesis that set 2 dominates set 1 (U value = 10) - Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10l); - Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30l); - Pair sizes = mwu2.getSetSizes(); - Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.first,sizes.second,10l),0.4180519701814064,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.first,sizes.second,10l),0.021756021756021756,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.first,sizes.second,10l),0.06214143703127617,1e-14); - Assert.assertEquals((double)mwu2.runTwoSidedTest().second,2*0.021756021756021756,1e-8); - - // tests using the hypothesis that set 1 dominates set 2 (U value = 30) -- empirical should be identical, normall approx close, uniform way off - Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.second,sizes.first,30l),0.08216463976903321,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.second,sizes.first,30l),0.0023473625009328147,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,30l),0.021756021756021756,1e-14); // note -- exactly same value as above - - logger.warn("Set 1"); - Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8); - logger.warn("Set 2"); - Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET2).second,0.021756021756021756,1e-8); - - MannWhitneyU mwu3 = new MannWhitneyU(); - for ( int dp : new int[]{0,2,4} ) { - mwu3.add(dp,MannWhitneyU.USet.SET1); - } - for ( int dp : new int[]{1,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34} ) { - mwu3.add(dp,MannWhitneyU.USet.SET2); - } - long u = MannWhitneyU.calculateOneSidedU(mwu3.getObservations(),MannWhitneyU.USet.SET1); - Pair nums = mwu3.getSetSizes(); - Assert.assertEquals(MannWhitneyU.calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(nums.first,nums.second,u),3.665689149560116E-4,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(nums.first,nums.second,u),0.0032240865760884696,1e-14); - Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(nums.first,nums.second,u),0.0026195003025784036,1e-14); - - } - - - private class TTest extends ValueTest { - Map> toTest = new HashMap>(2); - - @Override - public Map> getCaseControl() { - return toTest; - } - - public void setCaseData(Collection data) { - toTest.put(Cohort.CASE,data); - } - - public void setControlData(Collection data) { - toTest.put(Cohort.CONTROL,data); - } - - public Collection map(ReadBackedPileup rbp) { return null; } - public int getWindowSize() { return 1; } - - @Override - public boolean useTStatistic() { return true; } - - public boolean usePreviouslySeenReads() { return false; } - } - - private class ZTest extends ProportionTest { - Map> toTest = new HashMap>(2); - - @Override - public Map> getCaseControl() { - return toTest; - } - - public void setCaseData(Pair data) { - toTest.put(Cohort.CASE,data); - } - - public void setControlData(Pair data) { - toTest.put(Cohort.CONTROL,data); - } - - public Pair map(ReadBackedPileup p) { return null; } - public int getWindowSize() { return 1; } - public int slideByValue() { return 1; } - public boolean usePreviouslySeenReads() { return true; } - } - - private class UTest extends ValueTest { - TTest test = new TTest(); - public boolean usePreviouslySeenReads() { return false; } - public int getWindowSize() { return 1; } - public int slideByValue() { return 1; } - public Collection map(ReadBackedPileup p ){ return null; } - - @Override - public Map> getCaseControl() { - return test.getCaseControl(); - } - - public void setCaseData(Collection data) { test.setCaseData(data);} - public void setControlData(Collection data) { test.setControlData(data); } - } -}