bye bye
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6035 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9b90e9385d
commit
df71d5b965
|
|
@ -1,54 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.newassociation;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Argument collection for the read feature associator and related walkers
|
||||
*/
|
||||
public class RFAArgumentCollection {
|
||||
|
||||
@Argument(doc="ReadFeatures you want to test. None specified = all will be tested.",required=false,shortName="f",fullName="Feature")
|
||||
public List<String> inputFeatures = null;
|
||||
|
||||
@Argument(doc="Size of window on which to perform tests",required=false,fullName="windowSize")
|
||||
public int windowSize = 50;
|
||||
|
||||
@Argument(doc="Size of the jump between tested windows",required=false,fullName="windowJump")
|
||||
public int windowJump = 10;
|
||||
|
||||
@Argument(doc="File containing a list of case samples",required=false,shortName="case",fullName="case")
|
||||
public File caseFile = null;
|
||||
|
||||
@Argument(doc="File containing a list of control samples",required=false,shortName="control",fullName="control")
|
||||
public File controlFile = null;
|
||||
|
||||
@Argument(doc="Fixed significance level, as a Z-score",required=false,shortName="z",fullName="fixedZ")
|
||||
public double fixedZ = 6.0;
|
||||
|
||||
@Argument(doc="Insert size below which to flag a read as aberrant",required=false,shortName="LIS",fullName="LowInsertSize")
|
||||
public int lowInsertSize = 50;
|
||||
|
||||
@Argument(doc="Insert size above which to flag a read as aberrant",required=false,shortName="HIS",fullName="HighInsertSize")
|
||||
public int highInsertSize = 450;
|
||||
|
||||
@Argument(doc="Significance level for determining whether a sample contains a significant proportion of affected reads",required=false,shortName="sz",fullName="perSampleZ")
|
||||
public double sampleZThresh = 3.0;
|
||||
|
||||
@Argument(doc="Lower bound for significant proportion of affected reads",shortName="se",fullName="sampleEpsilon",required=false)
|
||||
public double EPSILON = 0.05;
|
||||
|
||||
@Argument(doc="Number of clipped bases for a read to be considered \"split\"",required=false,shortName="cb",fullName="clippedBases")
|
||||
public short clippedBases = 5;
|
||||
|
||||
/* todo -- these for a possible "split read" binarification of clipped bases -- if there's a fast way to traverse clipped bases
|
||||
@Argument(doc="Minimum base quality for a clipped base to be indicative of a split read",required=false,shortName="CLBQ",fullName="CLBQ")
|
||||
public short clbq = 10;
|
||||
|
||||
@Argument(doc="Minimum base quality sum for clipped bases (as a string) to be indicative of a split read",required=false,shortName="CLBS",fullName="CLBS")
|
||||
public short clbs=50;
|
||||
*/
|
||||
}
|
||||
|
|
@ -1,264 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.newassociation;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.newassociation.features.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Read feature association walker -- associates read features between dichotomized, or multi-group cohorts
|
||||
* todo -- need a heuristic to stop doing tests where there is certainly no signal
|
||||
* todo -- for most features there's a nuisance variable which is the proportion of *paired* reads, perhaps a pair-only setting for read features
|
||||
*/
|
||||
|
||||
@ReadFilters({MaxInsertSizeFilter.class,MappingQualityReadFilter.class})
|
||||
public class RFAWalker extends ReadWalker<SAMRecord,ReadFeatureWindow> {
|
||||
// todo -- this needs to be an argument collection that can get passed around to initialize read features etc
|
||||
@ArgumentCollection
|
||||
private RFAArgumentCollection collection = new RFAArgumentCollection();
|
||||
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
||||
Map<String,Boolean> caseStatus;
|
||||
|
||||
protected List<ReadFeatureAggregator> aggregators; // no re-instantiation, use a list to ensure ordering
|
||||
|
||||
protected GenomeLoc loc;
|
||||
protected String sample;
|
||||
|
||||
public void initialize() {
|
||||
if ( collection.windowSize % collection.windowJump != 0 ) {
|
||||
throw new UserException("Window size is not divisible by window jump.");
|
||||
}
|
||||
|
||||
if ( collection.caseFile == null || collection.controlFile == null ) {
|
||||
throw new UserException("You must provide both a case file (-case) and a control file (-control) each listing those samples belonging to the cohort");
|
||||
}
|
||||
|
||||
caseStatus = new HashMap<String,Boolean>(getToolkit().getSAMFileSamples().size());
|
||||
try {
|
||||
for ( String sample : new XReadLines(collection.caseFile) ) {
|
||||
caseStatus.put(sample,true);
|
||||
}
|
||||
for ( String sample : new XReadLines(collection.controlFile)) {
|
||||
caseStatus.put(sample,false);
|
||||
}
|
||||
|
||||
for ( Sample sample : getToolkit().getSAMFileSamples() ) {
|
||||
if ( ! caseStatus.containsKey(sample.getId())) {
|
||||
throw new UserException("No case/control status for sample "+sample.getId());
|
||||
}
|
||||
}
|
||||
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException("Unable to open a case/control file",e);
|
||||
}
|
||||
|
||||
Set<Class<? extends ReadFeatureAggregator>> aggregatorSet = getFeatureAggregators(collection.inputFeatures);
|
||||
Set<ReadFeatureAggregator> rfHolder1 = new HashSet<ReadFeatureAggregator>(aggregatorSet.size());
|
||||
try {
|
||||
for ( Class<? extends ReadFeatureAggregator> featureClass : aggregatorSet ) {
|
||||
ReadFeatureAggregator readFeature = featureClass.getConstructor(RFAArgumentCollection.class).newInstance(collection);
|
||||
rfHolder1.add(readFeature);
|
||||
}
|
||||
} catch ( Exception e ) {
|
||||
throw new StingException("A read feature instantiation error occurred during initialization",e);
|
||||
}
|
||||
|
||||
ReadFeatureAggregator[] rfHolder2 = new ReadFeatureAggregator[rfHolder1.size()];
|
||||
int idx = 0;
|
||||
for ( ReadFeatureAggregator f : rfHolder1 ) {
|
||||
rfHolder2[idx++] = f;
|
||||
}
|
||||
Arrays.sort(rfHolder2, new Comparator<ReadFeatureAggregator>() {
|
||||
@Override
|
||||
public int compare(ReadFeatureAggregator a, ReadFeatureAggregator b) {
|
||||
return a.getClass().getSimpleName().compareTo(b.getClass().getSimpleName());
|
||||
}
|
||||
});
|
||||
aggregators = Arrays.asList(rfHolder2);
|
||||
|
||||
writeHeader();
|
||||
|
||||
}
|
||||
|
||||
public ReadFeatureWindow reduceInit() {
|
||||
Set<String> samples = new HashSet<String>(getToolkit().getSamples().size());
|
||||
for ( Sample s : getToolkit().getSamples() ) {
|
||||
samples.add(s.getId());
|
||||
}
|
||||
return new ReadFeatureWindow(aggregators,caseStatus,collection);
|
||||
}
|
||||
|
||||
public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( ref == null ) { return null; } // unmapped reads have null ref contexts
|
||||
loc = getToolkit().getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),read.getAlignmentStart());
|
||||
sample = read.getReadGroup().getSample();
|
||||
return read;
|
||||
}
|
||||
|
||||
public ReadFeatureWindow reduce(SAMRecord read, ReadFeatureWindow prevReduce) {
|
||||
if ( read != null ) {
|
||||
List<Map<String,List<ReadFeatureAggregator>>> completed = prevReduce.inc(read,loc,sample);
|
||||
// todo -- run tests here; for now just log that a window/multiple windows are complete
|
||||
if ( completed.size() > 0 ) {
|
||||
// System.out.printf("At %s we have seen %d completed windows%n",loc,completed.size())
|
||||
// bed format
|
||||
int locShift = 0;
|
||||
for ( Map<String,List<ReadFeatureAggregator>> samWindow : completed ) {
|
||||
GenomeLoc window = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(),loc.getStart()-52+locShift,loc.getStop()-1);
|
||||
runWindowTests(samWindow, window);
|
||||
locShift += collection.windowJump;
|
||||
}
|
||||
}
|
||||
}
|
||||
return prevReduce;
|
||||
}
|
||||
|
||||
public Set<Class<? extends ReadFeatureAggregator>> getFeatureAggregators(List<String> requestedFeatures) {
|
||||
HashSet<Class<? extends ReadFeatureAggregator>> newFeatureSet = new HashSet<Class<? extends ReadFeatureAggregator>>();
|
||||
List<Class<? extends ReadFeatureAggregator>> availableFeatures = new PluginManager<ReadFeatureAggregator>(ReadFeatureAggregator.class).getPlugins();
|
||||
|
||||
if ( collection.inputFeatures == null ) {
|
||||
newFeatureSet.addAll(availableFeatures);
|
||||
return newFeatureSet;
|
||||
}
|
||||
|
||||
|
||||
Map<String,Class<? extends ReadFeatureAggregator>> classNameToClass = new HashMap<String,Class<? extends ReadFeatureAggregator>>(collection.inputFeatures.size());
|
||||
for ( Class<? extends ReadFeatureAggregator> clazz : availableFeatures ) {
|
||||
classNameToClass.put(clazz.getSimpleName(),clazz);
|
||||
}
|
||||
|
||||
for ( String s : requestedFeatures) {
|
||||
if ( classNameToClass.containsKey(s) ) {
|
||||
newFeatureSet.add(classNameToClass.get(s));
|
||||
} else {
|
||||
throw new UserException("The name "+s+" does not correspond to an available read feature class.");
|
||||
}
|
||||
}
|
||||
|
||||
return newFeatureSet;
|
||||
}
|
||||
|
||||
public void runWindowTests(Map<String,List<ReadFeatureAggregator>> window, GenomeLoc loc) {
|
||||
// two main tests: fixed-significance shift, and confidence-interval sum
|
||||
//System.out.printf("Running tests...%n");
|
||||
out.printf("%s\t%d\t%d",loc.getContig(),loc.getStart(),loc.getStop());
|
||||
for ( int agIdx = 0; agIdx < aggregators.size(); agIdx ++ ) {
|
||||
double fixedDelta = fixedSignificance(window.get("case").get(agIdx),window.get("control").get(agIdx));
|
||||
double weightedDelta = confidenceIntervalSum(window,agIdx);
|
||||
out.printf("\t%.2e\t%.2e",Double.isNaN(fixedDelta) ? 0.0 : fixedDelta,weightedDelta);
|
||||
}
|
||||
out.printf("%n");
|
||||
}
|
||||
|
||||
public double fixedSignificance(ReadFeatureAggregator caseAg, ReadFeatureAggregator controlAg) {
|
||||
if ( caseAg.getnReads() == 0 || controlAg.getnReads() == 0 ) {
|
||||
return 0.0;
|
||||
}
|
||||
double stat_num = caseAg.getMean() - controlAg.getMean();
|
||||
double stat_denom = Math.sqrt(caseAg.getUnbiasedVar()/caseAg.getnReads() + controlAg.getUnbiasedVar()/controlAg.getnReads());
|
||||
double stat = stat_num/stat_denom;
|
||||
//System.out.printf("Mean_dif: %.2e Var: %.2e Stat: %.2f Z: %.2f SS-ZZ: %.2f%n",stat_num,stat_denom,stat, collection.fixedZ, stat*stat-collection.fixedZ*collection.fixedZ);
|
||||
if ( stat*stat < collection.fixedZ*collection.fixedZ ) {
|
||||
return 0.0;
|
||||
} else {
|
||||
//System.out.printf("Calculating delta: %.2f%n",(stat < 0) ? stat_denom*(-1*collection.fixedZ-stat) : stat_denom*(stat-collection.fixedZ));
|
||||
//return (stat > 0) ? stat_denom*(stat-collection.fixedZ) : stat_denom*(stat+collection.fixedZ);
|
||||
return stat;
|
||||
}
|
||||
}
|
||||
|
||||
public double confidenceIntervalSum(Map<String,List<ReadFeatureAggregator>> window, int offset) {
|
||||
// this comment serves as an explicit normality assumption (e.g. that the DF will be large)
|
||||
double caseWeightMean = 0.0;
|
||||
double caseWeightVar = 0.0;
|
||||
double caseSumWeight = 0.0;
|
||||
double controlWeightMean = 0.0;
|
||||
double controlWeightVar = 0.0;
|
||||
double controlSumWeight = 0.0;
|
||||
for ( Map.Entry<String,List<ReadFeatureAggregator>> sampleEntry : window.entrySet() ) {
|
||||
if ( ! sampleEntry.getKey().equals("case") && ! sampleEntry.getKey().equals("control") ) {
|
||||
// first check if the sample is shifted from zero (CI does not include zero)
|
||||
// todo -- fixme. This will always be true for insert sizes, clipped reads, mapping quality...should have an avg value
|
||||
|
||||
ReadFeatureAggregator aggregator = sampleEntry.getValue().get(offset);
|
||||
if ( aggregator.getnReads() == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean shifted;
|
||||
int shiftThresh = 0;
|
||||
// todo -- this is fucking awful
|
||||
if ( aggregator instanceof InsertSize ) {
|
||||
shiftThresh = 150;
|
||||
}
|
||||
if ( aggregator instanceof ClippedBases ) {
|
||||
shiftThresh = 6;
|
||||
}
|
||||
if ( aggregator.getMean() < shiftThresh ) {
|
||||
// use fixedZ/4 to be a little bit less strenuous -- todo -- make an input?
|
||||
shifted = aggregator.getMean() + collection.fixedZ/4*Math.sqrt(aggregator.getUnbiasedVar())/aggregator.getnReads() < shiftThresh;
|
||||
} else {
|
||||
shifted = aggregator.getMean() - collection.fixedZ/4*Math.sqrt(aggregator.getUnbiasedVar())/aggregator.getnReads() > shiftThresh;;
|
||||
}
|
||||
if ( shifted ) {
|
||||
double twoS2 = 2*aggregator.getUnbiasedVar()*aggregator.getUnbiasedVar();
|
||||
if ( caseStatus.get(sampleEntry.getKey())) {
|
||||
caseWeightMean += (aggregator.getnReads()-1.0)*aggregator.getMean()/(twoS2);
|
||||
caseWeightVar += aggregator.getUnbiasedVar()*Math.pow((aggregator.getnReads()-1.0)/(twoS2),2);
|
||||
caseSumWeight += (aggregator.getnReads()-1.0)/(twoS2);
|
||||
} else {
|
||||
controlWeightMean += (aggregator.getnReads()-1.0)*aggregator.getMean()/(twoS2);
|
||||
controlWeightVar += aggregator.getUnbiasedVar()*Math.pow((aggregator.getnReads()-1.0)/(twoS2),2);
|
||||
controlSumWeight += (aggregator.getnReads()-1.0)/(twoS2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double caseGaussianMean = caseWeightMean/caseSumWeight;
|
||||
double controlGaussianMean = controlWeightMean/controlSumWeight;
|
||||
double caseGaussianVar = caseWeightVar/(caseSumWeight*caseSumWeight);
|
||||
double controlGaussianVar = controlWeightVar/(controlSumWeight*controlSumWeight);
|
||||
// todo -- is the z-factor an appropriate statistic?
|
||||
//return 1.0 - 3*(caseGaussianVar+controlGaussianVar)/Math.abs(caseGaussianMean-controlGaussianMean);
|
||||
if ( caseGaussianMean > controlGaussianMean ) {
|
||||
// want to examine the case lower fixedZ*stdev vs the control upper fixedZ*stev
|
||||
return (caseGaussianMean-collection.fixedZ/4*Math.sqrt(caseGaussianVar)) - (controlGaussianMean + collection.fixedZ/4*Math.sqrt(controlGaussianVar));
|
||||
} else {
|
||||
// want to examine the case upper fixedZ*stev vs the control lower fixedZ*stev
|
||||
return (controlGaussianMean-collection.fixedZ/4*Math.sqrt(controlGaussianVar)) - ( caseGaussianMean + collection.fixedZ/4*Math.sqrt(caseGaussianVar));
|
||||
}
|
||||
}
|
||||
|
||||
public void writeHeader() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append("description=chr,start,stop");
|
||||
for ( ReadFeatureAggregator f : aggregators ) {
|
||||
buf.append(",");
|
||||
buf.append(f.getClass().getSimpleName());
|
||||
buf.append("-d,");
|
||||
buf.append(f.getClass().getSimpleName());
|
||||
buf.append("-c");
|
||||
}
|
||||
out.printf("track type=bedTable %s%n",buf);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,227 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.newassociation;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.newassociation.features.ReadFeatureAggregator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
*/
|
||||
public class RFWindow {
|
||||
|
||||
private RFAArgumentCollection argumentCollection; // the RF Argument Collection from the RF Walker
|
||||
private List<GenomeLoc> windowStarts; // holds the starting positions of the windows
|
||||
private Map<String,Boolean> sampleCohortMap; // identifies case samples ("true") and control samples ("false")
|
||||
List<ReadFeatureAggregator> aggregators; // read feature aggregators to be used (maintained for fast cloning, and ordering)
|
||||
// feature ---> sample ---> count
|
||||
List<Map<String,List<ReadFeatureAggregator>>> aggregatorWindows; // holds the map between samples and features in the current windows
|
||||
private GenomeLocParser parser; // the active parser, for creating GenomeLocs for empty windows
|
||||
private GenomeLoc previousLoc; // the previous genome loc within the user interval given to the RFWindow
|
||||
|
||||
/**
|
||||
* Defines a new window which maps samples to read feature aggregators
|
||||
* @return - a new element for aggregatorWindows
|
||||
*/
|
||||
private Map<String,List<ReadFeatureAggregator>> newWindow() {
|
||||
Map<String,List<ReadFeatureAggregator>> win = new HashMap<String,List<ReadFeatureAggregator>>(sampleCohortMap.size());
|
||||
for ( String s : sampleCohortMap.keySet() ) {
|
||||
win.put(s,getAggregators());
|
||||
}
|
||||
// todo -- generalize me
|
||||
win.put("case",getAggregators());
|
||||
win.put("control",getAggregators());
|
||||
|
||||
return win;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a list of new aggregators to collect data
|
||||
* @return list of ReadFeatureAggregators to be the value of a new window
|
||||
*/
|
||||
private List<ReadFeatureAggregator> getAggregators() {
|
||||
ArrayList<ReadFeatureAggregator> newEmptyAgs = new ArrayList<ReadFeatureAggregator>(aggregators.size());
|
||||
try {
|
||||
for ( ReadFeatureAggregator ag : aggregators ) {
|
||||
newEmptyAgs.add(ag.getClass().getConstructor(RFAArgumentCollection.class).newInstance(argumentCollection));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Error instantiating read feature aggregator",e);
|
||||
}
|
||||
|
||||
return newEmptyAgs;
|
||||
}
|
||||
|
||||
/**
|
||||
* A constructor for the RFWindow object
|
||||
* @param collection - the argument collection, from the walker
|
||||
* @param cohortMap - the map between sample IDs and whether or not it is a case sample, from the walker
|
||||
* @param parser - the Genome Loc Parser, from the walker
|
||||
*/
|
||||
public RFWindow(List<ReadFeatureAggregator> aggregators, RFAArgumentCollection collection, Map<String,Boolean> cohortMap, GenomeLocParser parser) {
|
||||
this.argumentCollection = collection;
|
||||
this.sampleCohortMap = cohortMap;
|
||||
this.parser = parser;
|
||||
this.aggregators = aggregators;
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiate the tiled windows of sample -> List<aggregator> maps, filling in empty windows up to the one which contains the
|
||||
* provided genomeloc if necessary.
|
||||
* @param loc - the location to fill up to, usually the starting position of a read or an interval
|
||||
* @param currentUserInterval - the current user-provided interval being processed by the walker
|
||||
*/
|
||||
@Requires({"! currentUserInterval.isBefore(loc)"})
|
||||
public void instantiate(GenomeLoc loc, GenomeLoc currentUserInterval) {
|
||||
aggregatorWindows = new ArrayList<Map<String,List<ReadFeatureAggregator>>>(argumentCollection.windowSize/argumentCollection.windowJump);
|
||||
windowStarts = new ArrayList<GenomeLoc>(argumentCollection.windowSize/argumentCollection.windowJump);
|
||||
this.fill(loc, currentUserInterval);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills the tiled window lists with empty windows up to the one which includes the locus
|
||||
* todo -- this can take a lot of memory for large intervals instantiated far into them, e.g.
|
||||
* |---------------------------------------------------------------------| interval
|
||||
* . <=== locus
|
||||
* todo -- perhaps a reduced representation for empty windows that were not created but already have expired?
|
||||
* @param loc - the location to fill up to, usually the starting position of a read or an interval
|
||||
* @param currentUserInterval - the current user-provided interval being processed by the walker
|
||||
*/
|
||||
@Requires({"! currentUserInterval.isBefore(loc)"})
|
||||
public void fill(GenomeLoc loc, GenomeLoc currentUserInterval) {
|
||||
// case 0: if the windows are empty add in the beginning of the interval
|
||||
if ( windowStarts.size() == 0 ) {
|
||||
windowStarts.add(currentUserInterval.getStartLocation());
|
||||
aggregatorWindows.add(newWindow());
|
||||
}
|
||||
|
||||
// case 1: loc is before or within windowJump bases of the current user interval; we need only instantiate the first window
|
||||
if ( loc.isBefore(currentUserInterval) || loc.distance(currentUserInterval) <= argumentCollection.windowJump ) {
|
||||
// do nothing at all
|
||||
} else {
|
||||
// case 2: loc is somewhere in the middle or at the end of current user interval, need to fill in windows up until then
|
||||
GenomeLoc nextLoc = shiftLocByJump(windowStarts.get(windowStarts.size()-1),argumentCollection.windowJump);
|
||||
while ( loc.distance(nextLoc) > argumentCollection.windowJump ) {
|
||||
nextLoc = shiftLocByJump(windowStarts.get(windowStarts.size()-1),argumentCollection.windowJump);
|
||||
windowStarts.add(nextLoc);
|
||||
aggregatorWindows.add(newWindow());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Shifts a location from chrZ:X to chrZ:X+jump
|
||||
* @param loc - location to shift
|
||||
* @param jump - amount to shift by
|
||||
* @return loc with start position shifted by jump
|
||||
*/
|
||||
@Requires("loc.getStart()+jump < parser.getContigInfo(loc.getContig()).getSequenceLength()") // e.g. that the new loc is not off the end of the contig
|
||||
private GenomeLoc shiftLocByJump(GenomeLoc loc, int jump) {
|
||||
return parser.createGenomeLoc(loc.getContig(),loc.getStart()+jump);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills in missing windows between the previously-seen one and the ending interval, then expires all windows, returning
|
||||
* them as complete, resetting previousLocation to null so that the next read re-instantiates internal window data
|
||||
* @param userIntervalEnding - the user interval which is ending
|
||||
* @return those currently active windows, plus empty interpolating windows between the last active window and the end of the interval
|
||||
*/
|
||||
public List<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>> flush(GenomeLoc userIntervalEnding) {
|
||||
// jump in locations -- flush the windows
|
||||
List<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>> complete = new ArrayList<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>>(aggregators.size());
|
||||
// fill in any uncovered windows
|
||||
GenomeLoc iStop = userIntervalEnding.getStopLocation();
|
||||
fill(iStop,userIntervalEnding);
|
||||
// now expire all windows, terminating them either at the proper window size, or at the endpoint of the interval if it comes sooner
|
||||
while ( windowStarts.size() > 0 ) {
|
||||
Map<String,List<ReadFeatureAggregator>> cMap = aggregatorWindows.remove(0);
|
||||
GenomeLoc wsLoc = windowStarts.remove(0);
|
||||
|
||||
GenomeLoc cLoc;
|
||||
if ( wsLoc.distance(iStop) > argumentCollection.windowSize ) {
|
||||
cLoc = parser.createGenomeLoc(wsLoc.getContig(),wsLoc.getStart(),wsLoc.getStart()+argumentCollection.windowSize);
|
||||
} else {
|
||||
cLoc = wsLoc.endpointSpan(iStop);
|
||||
}
|
||||
|
||||
complete.add(new Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>(cLoc,cMap));
|
||||
}
|
||||
|
||||
previousLoc = null; // will re-instantiate data upon next loc
|
||||
|
||||
return complete;
|
||||
}
|
||||
|
||||
/**
|
||||
* Workhorse method:
|
||||
* - determines if new windows need be made
|
||||
* - determines if old windows need be tested & trashed
|
||||
* - determines which windows need to be updated & updates them
|
||||
*
|
||||
* @param loc - starting alignment position of the read
|
||||
* @param sample - the sample ID from whom the read was sequenced
|
||||
* @return - those feature window(s) that need be tested (and then baleeted)
|
||||
*/
|
||||
public List<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>> inc(SAMRecord record, GenomeLoc loc, String sample, GenomeLoc userInterval) {
|
||||
List<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>> complete = new ArrayList<Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>>(aggregators.size());
|
||||
if ( previousLoc == null ) {
|
||||
// first time, gotta instantiate stuff
|
||||
instantiate(loc,userInterval);
|
||||
windowInc(sample,record,aggregatorWindows.get(0));
|
||||
} else if ( loc.distance(previousLoc) == 0 ) {
|
||||
// best and most common case: just update the living windows
|
||||
for ( Map<String,List<ReadFeatureAggregator>> win : aggregatorWindows ) {
|
||||
windowInc(sample,record,win);
|
||||
}
|
||||
} else {
|
||||
// another standard case: we've gone to some further base in the same interval
|
||||
while ( loc.distance(windowStarts.get(windowStarts.size()-1)) > argumentCollection.windowJump ) {
|
||||
// careful, don't use the location itself, but add in windows every winJump bases until this condition is not met
|
||||
windowStarts.add(shiftLocByJump(windowStarts.get(windowStarts.size()-1),argumentCollection.windowJump));
|
||||
aggregatorWindows.add(newWindow());
|
||||
}
|
||||
|
||||
while ( windowStarts.size() > 0 && loc.distance(windowStarts.get(0)) > argumentCollection.windowSize ) {
|
||||
Map<String,List<ReadFeatureAggregator>> cMap = aggregatorWindows.remove(0);
|
||||
GenomeLoc cLoc = windowStarts.remove(0).endpointSpan(previousLoc);
|
||||
complete.add(new Pair<GenomeLoc,Map<String,List<ReadFeatureAggregator>>>(cLoc,cMap));
|
||||
}
|
||||
|
||||
for ( Map<String,List<ReadFeatureAggregator>> win : aggregatorWindows ) {
|
||||
windowInc(sample,record,win);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
previousLoc = loc;
|
||||
|
||||
return complete;
|
||||
}
|
||||
|
||||
/**
|
||||
* Incorporate new features into the window
|
||||
* @param sample - id of sample from which the features come
|
||||
* @param record - the read
|
||||
* @param window - the particular window to be updated
|
||||
*/
|
||||
private void windowInc(String sample, SAMRecord record, Map<String,List<ReadFeatureAggregator>> window) {
|
||||
if ( sample == null || record == null ) { return; }
|
||||
|
||||
for (ReadFeatureAggregator aggregator : window.get(sample) ) {
|
||||
aggregator.aggregate(record);
|
||||
}
|
||||
|
||||
for ( ReadFeatureAggregator aggregator : window.get( sampleCohortMap.get(sample) ? "case" : "control") ) {
|
||||
aggregator.aggregate(record);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.newassociation.features;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.newassociation.RFAArgumentCollection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 5/4/11
|
||||
* Time: 12:52 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class BinaryFeatureAggregator extends ReadFeatureAggregator<Boolean> {
|
||||
|
||||
public BinaryFeatureAggregator(RFAArgumentCollection col) {
|
||||
super(col);
|
||||
}
|
||||
|
||||
public void aggregate(Boolean hasFeature) {
|
||||
// now robustified
|
||||
mean = ( 1+(hasFeature ? 1 : 0)+nReads*mean)/(++nReads+1);
|
||||
var = mean*(1-mean);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue