Oneoff project, totally unrelated to anything
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2776 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
334da80e8b
commit
f7e7bcd2ef
|
|
@ -0,0 +1,167 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 3, 2009
|
||||
* Time: 11:54:35 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
|
||||
public class DSBWalker extends LocusWalker<Integer,Integer> {
|
||||
@Argument(fullName="coverage",shortName="C",doc="Regions with coverage above specified threshold will be reported",required=true)
|
||||
int COV_CUTOFF = 0;
|
||||
@Argument(fullName="minLength",shortName="ml",doc="Only regions longer than the specified value will be reported",required=false)
|
||||
int MINLENGTH_CUTOFF = 0;
|
||||
|
||||
private int MERGE_DIST = 300; // merge intervals that are closer than this distance from one another
|
||||
|
||||
private long maxcov = 0;
|
||||
private long maxz = 0;
|
||||
private long mergedmaxcov = 0;
|
||||
private long mergedmaxz = 0;
|
||||
GenomeLoc mergedInterval = null;
|
||||
GenomeLoc currentInterval = null;
|
||||
|
||||
private long nIntervals = 0;
|
||||
|
||||
private void emit(GenomeLoc l) {
|
||||
if ( mergedInterval == null ) {
|
||||
mergedInterval = l.clone();
|
||||
mergedmaxcov = maxcov;
|
||||
mergedmaxz = maxz;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( mergedInterval.getContigIndex() != l.getContigIndex() ) {
|
||||
long length = mergedInterval.getStop()-mergedInterval.getStart()+1;
|
||||
if ( length >= MINLENGTH_CUTOFF ) {
|
||||
out.println(mergedInterval+"\t"+length+"\t"+mergedmaxcov+"\t"+mergedmaxz); // eject old interval
|
||||
nIntervals++;
|
||||
}
|
||||
mergedInterval = l.clone();
|
||||
mergedmaxcov = maxcov;
|
||||
mergedmaxz = maxz;
|
||||
return;
|
||||
}
|
||||
|
||||
// merged interval exists and new interval is on the same contig. Check if the new interval
|
||||
// is close enough so we got to merge and keep waiting:
|
||||
|
||||
if ( l.getStart() - mergedInterval.getStop() < MERGE_DIST ) {
|
||||
mergedInterval = GenomeLocParser.setStop(mergedInterval,l.getStop());
|
||||
if ( maxcov > mergedmaxcov) mergedmaxcov = maxcov;
|
||||
if ( maxz > mergedmaxz ) mergedmaxz = maxz;
|
||||
return;
|
||||
}
|
||||
|
||||
// nope, new interval is far enough. Print old one and keep current one.
|
||||
|
||||
long length = mergedInterval.getStop()-mergedInterval.getStart()+1;
|
||||
if ( length >= MINLENGTH_CUTOFF ) {
|
||||
out.println(mergedInterval+"\t"+length+"\t"+mergedmaxcov+"\t"+mergedmaxz); // eject old interval
|
||||
nIntervals++;
|
||||
}
|
||||
mergedInterval = l.clone();
|
||||
mergedmaxcov = maxcov;
|
||||
mergedmaxz = maxz;
|
||||
|
||||
}
|
||||
|
||||
public void onTraversalDone() {
|
||||
if ( mergedInterval != null ) {
|
||||
long length = mergedInterval.getStop()-mergedInterval.getStart()+1;
|
||||
if ( length >= MINLENGTH_CUTOFF ) {
|
||||
out.println(mergedInterval+"\t"+length+"\t"+mergedmaxcov+"\t"+mergedmaxz); // eject old interval
|
||||
nIntervals++;
|
||||
}
|
||||
}
|
||||
System.out.println(nIntervals+" intervals detected.");
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
List<SAMRecord> reads = pileup.getReads();
|
||||
|
||||
int nZero = pileup.getNumberOfMappingQualityZeroReads();
|
||||
|
||||
int nonZCoverage = reads.size() - nZero;
|
||||
|
||||
if ( nonZCoverage >= COV_CUTOFF ) {
|
||||
|
||||
// if we were not inside an interval, start one:
|
||||
if ( currentInterval == null ) {
|
||||
maxcov = nonZCoverage;
|
||||
maxz = nZero;
|
||||
currentInterval = context.getLocation().clone();
|
||||
// System.out.println("Setting current to "+currentInterval);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// if we were inside an interval and we just jumped onto a new contig, get rid of the old interval
|
||||
if ( currentInterval.compareContigs(context.getLocation()) != 0 ) {
|
||||
// we just moved to a new contig
|
||||
System.out.println("On contig "+context.getLocation().getContig());
|
||||
emit(currentInterval);
|
||||
maxcov = nonZCoverage;
|
||||
maxz = nZero;
|
||||
currentInterval = context.getLocation().clone();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// we are on the same contig, we are within the interval, so we need to extend the current interval:
|
||||
currentInterval = GenomeLocParser.setStop(currentInterval,context.getLocation().getStop()); // still within the interval, adjust stop
|
||||
//System.out.println("Extending current to "+currentInterval +" ("+context.getLocation()+", "+context.getLocation().getStop()+")");
|
||||
if ( nonZCoverage > maxcov ) maxcov = nonZCoverage; // adjust maxcov
|
||||
if ( nZero > maxz ) maxz = nZero; // adjust maxz
|
||||
} else {
|
||||
// low coverage, if we were inside an interval, it stops now:
|
||||
if ( currentInterval != null ) {
|
||||
// System.out.println("Emitting current as "+currentInterval);
|
||||
emit(currentInterval);
|
||||
currentInterval = null;
|
||||
maxcov = 0;
|
||||
maxz = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum+value; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,360 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.CircularArray;
|
||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 12, 2009
|
||||
* Time: 2:25:44 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
|
||||
public class DSBWalkerV2 extends LocusWalker<Integer,Integer> {
|
||||
// @Argument(fullName="coverage",shortName="C",doc="Regions with coverage above specified threshold will be reported",required=true)
|
||||
// int COV_CUTOFF = 0;
|
||||
// @Argument(fullName="minLength",shortName="ml",doc="Only regions longer than the specified value will be reported",required=false)
|
||||
// int MINLENGTH_CUTOFF = 0;
|
||||
@Argument(fullName="windowSize",shortName="W",doc="Size of the sliding window",required=true)
|
||||
int WINDOW_SIZE = 100;
|
||||
@Argument(fullName="enrichmentCutoff",shortName="E",doc="Report windows with enrichment (signal/control) above this cutoff",required=true)
|
||||
double ENRICHMENT_CUTOFF = 5.0;
|
||||
@Argument(fullName="minSignal",shortName="ms",doc="Do not report windows with signal lower than this value "+
|
||||
"(this cutoff is secondary to enrichmentCutoff and guards against windows where control signal is 0 or too low,"+
|
||||
"so that control*enrichmentCutoff is too low to be convincing)",required=true)
|
||||
int MIN_SIGNAL = 10;
|
||||
|
||||
private CircularArray<PrimitivePair.Int> signalWindow = null;
|
||||
private CircularArray<PrimitivePair.Int> controlWindow = null;
|
||||
private CircularArray<PrimitivePair.Int> signalStrandsWindow = null;
|
||||
private CircularArray<PrimitivePair.Int> controlStrandsWindow = null;
|
||||
|
||||
private PrimitivePair.Long totalSignalCoverage = new PrimitivePair.Long();
|
||||
private PrimitivePair.Long totalControlCoverage = new PrimitivePair.Long();
|
||||
private PrimitivePair.Long totalSignalFwdStrands = new PrimitivePair.Long();
|
||||
private PrimitivePair.Long totalControlFwdStrands = new PrimitivePair.Long();
|
||||
|
||||
private Set<String> signalReadGroups; // we are going to remember which read groups are stimulated tagged and which are unstimulated untagged in order to be able
|
||||
private Set<String> controlReadGroups ; // to properly assign the reads coming from a merged stream
|
||||
|
||||
private long windowStart = -1;
|
||||
private long windowStop = -1;
|
||||
private int curContig = -1;
|
||||
private String curContigName = "";
|
||||
|
||||
// the following variables are for buffering and merging windows :
|
||||
private long regionStart = -1;
|
||||
private long lastWindowStart = -1;
|
||||
private PrimitivePair.Int maxSignalReads = new PrimitivePair.Int();
|
||||
private PrimitivePair.Int minSignalReads = new PrimitivePair.Int();
|
||||
private PrimitivePair.Int maxControlReads = new PrimitivePair.Int();
|
||||
private PrimitivePair.Int minControlReads = new PrimitivePair.Int();
|
||||
private double minEnrichmentUnique;
|
||||
private double maxEnrichmentUnique;
|
||||
private double minEnrichmentNonUnique;
|
||||
private double maxEnrichmentNonUnique;
|
||||
private double minEnrichmentTotal;
|
||||
private double maxEnrichmentTotal;
|
||||
private double minUniqueSignalStrandBalance = 0.0;
|
||||
private double maxUniqueSignalStrandBalance = 0.0;
|
||||
private double minNonUniqueSignalStrandBalance = 0.0;
|
||||
private double maxNonUniqueSignalStrandBalance = 0.0;
|
||||
private double minUniqueControlStrandBalance = 0.0;
|
||||
private double maxUniqueControlStrandBalance = 0.0;
|
||||
private double minNonUniqueControlStrandBalance = 0.0;
|
||||
private double maxNonUniqueControlStrandBalance = 0.0;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
int nSams = getToolkit().getArguments().samFiles.size();
|
||||
|
||||
if ( nSams != 2 ) {
|
||||
out.println("ERROR: two input bam files (signal and backround control) must be specified");
|
||||
System.exit(1);
|
||||
}
|
||||
List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||
signalReadGroups = readGroupSets.get(0);
|
||||
// System.out.println(signalReadGroups.size()+" read groups in signal");
|
||||
controlReadGroups = readGroupSets.get(1);
|
||||
// System.out.println(controlReadGroups.size()+" read groups in control");
|
||||
signalWindow = new CircularArray<PrimitivePair.Int>(WINDOW_SIZE);
|
||||
controlWindow = new CircularArray<PrimitivePair.Int>(WINDOW_SIZE);
|
||||
signalStrandsWindow = new CircularArray<PrimitivePair.Int>(WINDOW_SIZE);
|
||||
controlStrandsWindow = new CircularArray<PrimitivePair.Int>(WINDOW_SIZE);
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
List<SAMRecord> reads = pileup.getReads();
|
||||
|
||||
// compute coverages at the current site:
|
||||
PrimitivePair.Int signalCov = new PrimitivePair.Int();
|
||||
PrimitivePair.Int controlCov = new PrimitivePair.Int();
|
||||
PrimitivePair.Int signalFwdStrands = new PrimitivePair.Int();
|
||||
PrimitivePair.Int controlFwdStrands = new PrimitivePair.Int();
|
||||
|
||||
for ( SAMRecord r : reads ) {
|
||||
if ( signalReadGroups.contains( r.getReadGroup().getReadGroupId() ) ) {
|
||||
if ( r.getMappingQuality() == 0 ) {
|
||||
signalCov.second++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) signalFwdStrands.second++;
|
||||
}
|
||||
else {
|
||||
signalCov.first++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) signalFwdStrands.first++;
|
||||
}
|
||||
} else {
|
||||
if ( controlReadGroups.contains( r.getReadGroup().getReadGroupId() ) ) {
|
||||
if ( r.getMappingQuality() == 0 ) {
|
||||
controlCov.second++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) controlFwdStrands.second++;
|
||||
}
|
||||
else {
|
||||
controlCov.first++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) controlFwdStrands.first++;
|
||||
}
|
||||
} else {
|
||||
throw new StingException("Read "+r+" belongs to unknown read group ("+r.getReadGroup()+")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GenomeLoc loc = context.getLocation();
|
||||
|
||||
// if ( curContig != 0 ) System.out.println(loc+" "+signalCov.first+" "+signalCov.second+" "+controlCov.first+" "+controlCov.second);
|
||||
|
||||
if ( loc.getContigIndex() != curContig || loc.getStart() >= windowStop+WINDOW_SIZE ) {
|
||||
// we jumped to the next contig, or we are on the same contig but the current position is
|
||||
// more than WINDOW_SIZE away from the current window's end (i.e. there's nothing to shift)
|
||||
checkCurrentWindow(true);
|
||||
|
||||
if ( loc.getContigIndex() != curContig ) {
|
||||
System.out.println("on contig "+loc.getContig());
|
||||
}
|
||||
curContig = loc.getContigIndex();
|
||||
curContigName = loc.getContig();
|
||||
// prevPos = loc.getStart();
|
||||
windowStart = loc.getStart();
|
||||
windowStop = windowStart + WINDOW_SIZE - 1;
|
||||
signalWindow.clear();
|
||||
controlWindow.clear();
|
||||
totalSignalCoverage.assignFrom( signalCov );
|
||||
totalControlCoverage.assignFrom( controlCov );
|
||||
totalSignalFwdStrands.assignFrom( signalFwdStrands );
|
||||
totalControlFwdStrands.assignFrom( controlFwdStrands );
|
||||
signalWindow.set(0,signalCov);
|
||||
controlWindow.set(0,controlCov);
|
||||
signalStrandsWindow.set(0,signalFwdStrands);
|
||||
controlStrandsWindow.set(0,controlFwdStrands);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// offset of the current position w.r.t. the start of the window:
|
||||
int offset = (int)(loc.getStart() - windowStart);
|
||||
|
||||
if ( offset >= WINDOW_SIZE ) {
|
||||
// if we are here, the current position is outside of the current window, but not
|
||||
// far enough so that we'd need to reinitialize the window from scratch (that was already checked above).
|
||||
// Now we need to shift.
|
||||
|
||||
// We are receiving covered positions in order, so we are guaranteed that everything prior to
|
||||
// the current position was already counted; if some elements of the windows are still nulls, it means
|
||||
// there was no coverage there
|
||||
|
||||
int shift = offset - WINDOW_SIZE + 1;
|
||||
|
||||
// scroll the window(s) base by base until the current position is inside the window. At each step
|
||||
// we will check if the window meets the requirements and should be printed out.
|
||||
for ( int i = 0 ; i < shift ; i++ ) {
|
||||
|
||||
// we are going to shift; check if the window as it is now is worth printing
|
||||
checkCurrentWindow(false);
|
||||
|
||||
// discard coverage from the first element of the window (this element is about to be shifted out of scope)
|
||||
if ( signalWindow.get(0) != null ) totalSignalCoverage.subtract(signalWindow.get(0));
|
||||
if ( signalStrandsWindow.get(0) != null ) totalSignalFwdStrands.subtract(signalStrandsWindow.get(0));
|
||||
|
||||
if ( controlWindow.get(0) != null ) totalControlCoverage.subtract(controlWindow.get(0));
|
||||
if ( controlStrandsWindow.get(0) != null ) totalControlFwdStrands.subtract(controlStrandsWindow.get(0));
|
||||
|
||||
// advnace window coordinates on the ref
|
||||
windowStart++;
|
||||
windowStop++;
|
||||
|
||||
// shift the data in the window(s):
|
||||
signalWindow.shiftData(1);
|
||||
controlWindow.shiftData(1);
|
||||
signalStrandsWindow.shiftData(1);
|
||||
controlStrandsWindow.shiftData(1);
|
||||
|
||||
offset--; // this is the new offset w.r.t. to the shifted window
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// at this point, either the current position was inside the current window, or it was outside,
|
||||
// but the window was already shifted
|
||||
totalSignalCoverage.add(signalCov);
|
||||
totalControlCoverage.add(controlCov);
|
||||
totalSignalFwdStrands.add(signalFwdStrands);
|
||||
totalControlFwdStrands.add(controlFwdStrands);
|
||||
signalWindow.set(offset,signalCov);
|
||||
controlWindow.set(offset,controlCov);
|
||||
signalStrandsWindow.set(offset,signalFwdStrands);
|
||||
controlStrandsWindow.set(offset,controlFwdStrands);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum+value; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
printRegion();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
/** Checks if the currently held window satisfies the conditions set up for significance, and invokes buffered printout if so.
|
||||
* If the parameter is set to true, printout of previously held region is forced, and the buffer is reinitialized with
|
||||
* the new window if it passes the cutoffs, or left empty.
|
||||
*
|
||||
*/
|
||||
private void checkCurrentWindow(boolean force) {
|
||||
if ( force ) printRegion();
|
||||
if ( signalWindow.get(0) == null && controlWindow.get(0) == null ) return; // do not emit windows that start from empty cell; we will get them later
|
||||
if ( totalControlCoverage.first * ENRICHMENT_CUTOFF / 36.0 < MIN_SIGNAL ) { // control coverage zero or too low
|
||||
if ( totalSignalCoverage.first /28.0 > MIN_SIGNAL ) emitWindow(false); // require at least MIN_SIGNAL coverage for signal
|
||||
return;
|
||||
}
|
||||
|
||||
// if we have decent coverage in control, just check for required enrichment in the signal
|
||||
if ( ((double)totalSignalCoverage.first/28.0) / (totalControlCoverage.first/36.0) > ENRICHMENT_CUTOFF ) emitWindow(false);
|
||||
}
|
||||
|
||||
/** This is actually a delayed print command: it buffers the successive windows set for printout, merges the windows that
|
||||
* are close enough and prints only when a train of close-by windows has ended and next window received is far enough
|
||||
*/
|
||||
private void emitWindow(boolean force) {
|
||||
|
||||
if ( regionStart == -1 ) {
|
||||
resetBuffer();
|
||||
return;
|
||||
}
|
||||
|
||||
if ( force || windowStart > lastWindowStart + WINDOW_SIZE ) {
|
||||
// new window is far enough from the region we were buffering: emit old region
|
||||
|
||||
printRegion();
|
||||
resetBuffer();
|
||||
return;
|
||||
}
|
||||
|
||||
// current window is too close (overlapping) with a previous one: we need to merge
|
||||
|
||||
lastWindowStart = windowStart;
|
||||
maxSignalReads.first = Math.max(maxSignalReads.first, (int)Math.round(totalSignalCoverage.first/28.0));
|
||||
maxSignalReads.second = Math.max(maxSignalReads.second,(int)Math.round(totalSignalCoverage.second/28.0));
|
||||
minSignalReads.first = Math.min(minSignalReads.first, (int)Math.round(totalSignalCoverage.first/28.0));
|
||||
minSignalReads.second = Math.min(minSignalReads.second,(int)Math.round(totalSignalCoverage.second/28.0));
|
||||
maxControlReads.first = Math.max(maxControlReads.first,(int)Math.round(totalControlCoverage.first/36.0));
|
||||
maxControlReads.second = Math.max(maxControlReads.second,(int)Math.round(totalControlCoverage.second/36.0));
|
||||
minControlReads.first = Math.min(minControlReads.first,(int)Math.round(totalControlCoverage.first/36.0));
|
||||
minControlReads.second = Math.min(minControlReads.second,(int)Math.round(totalControlCoverage.second/36.0));
|
||||
maxEnrichmentUnique = Math.max(maxEnrichmentUnique,((double)totalSignalCoverage.first/28.0)/(totalControlCoverage.first/36.0));
|
||||
minEnrichmentUnique = Math.min(minEnrichmentUnique, ((double)totalSignalCoverage.first/28.0)/(totalControlCoverage.first/36.0));
|
||||
maxEnrichmentNonUnique = Math.max(maxEnrichmentNonUnique,((double)totalSignalCoverage.second/28.0)/(totalControlCoverage.second/36.0));
|
||||
minEnrichmentNonUnique = Math.min( minEnrichmentNonUnique, ((double)totalSignalCoverage.second/28.0)/(totalControlCoverage.second/36.0) );
|
||||
maxEnrichmentTotal = Math.max( maxEnrichmentTotal, ((double)(totalSignalCoverage.first+totalSignalCoverage.second)/28.0)/
|
||||
((totalControlCoverage.first+ totalControlCoverage.second)/36.0) );
|
||||
minEnrichmentTotal = Math.min( minEnrichmentTotal, ((double)(totalSignalCoverage.first+totalSignalCoverage.second)/28.0)/
|
||||
((totalControlCoverage.first+ totalControlCoverage.second)/36.0) );
|
||||
|
||||
|
||||
maxUniqueSignalStrandBalance = Math.max(maxUniqueSignalStrandBalance,((double)totalSignalFwdStrands.first)/totalSignalCoverage.first);
|
||||
minUniqueSignalStrandBalance = Math.min(minUniqueSignalStrandBalance,((double)totalSignalFwdStrands.first)/totalSignalCoverage.first);
|
||||
maxNonUniqueSignalStrandBalance = Math.max(maxNonUniqueSignalStrandBalance,((double)totalSignalFwdStrands.second)/totalSignalCoverage.second);
|
||||
minNonUniqueSignalStrandBalance = Math.min(minNonUniqueSignalStrandBalance,((double)totalSignalFwdStrands.second)/totalSignalCoverage.second);
|
||||
maxUniqueControlStrandBalance = Math.max(maxUniqueControlStrandBalance,((double)totalControlFwdStrands.first)/totalControlCoverage.first);
|
||||
minUniqueControlStrandBalance = Math.min(minUniqueControlStrandBalance,((double)totalControlFwdStrands.first)/totalControlCoverage.first);
|
||||
maxNonUniqueControlStrandBalance = Math.max(maxNonUniqueControlStrandBalance,((double)totalControlFwdStrands.second)/totalControlCoverage.second);
|
||||
minNonUniqueControlStrandBalance = Math.min(minNonUniqueControlStrandBalance,((double)totalControlFwdStrands.second)/totalControlCoverage.second);
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void resetBuffer() {
|
||||
regionStart = windowStart;
|
||||
lastWindowStart = windowStart;
|
||||
maxSignalReads.first = (int)Math.round(totalSignalCoverage.first/28.0);
|
||||
maxSignalReads.second = (int)Math.round(totalSignalCoverage.second/28.0);
|
||||
minSignalReads.assignFrom(maxSignalReads);
|
||||
maxControlReads.first = (int)Math.round(totalControlCoverage.first/36.0);
|
||||
maxControlReads.second = (int)Math.round(totalControlCoverage.second/36.0);
|
||||
minControlReads.assignFrom(maxControlReads);
|
||||
minEnrichmentUnique = maxEnrichmentUnique = ((double)totalSignalCoverage.first/28.0)/(totalControlCoverage.first/36.0);
|
||||
minEnrichmentNonUnique = maxEnrichmentNonUnique = ((double)totalSignalCoverage.second/28.0)/(totalControlCoverage.second/36.0);
|
||||
minEnrichmentTotal = maxEnrichmentTotal = ((double)(totalSignalCoverage.first+totalSignalCoverage.second)/28.0)/
|
||||
((totalControlCoverage.first+ totalControlCoverage.second)/36.0);
|
||||
|
||||
minUniqueSignalStrandBalance = maxUniqueSignalStrandBalance = ((double)totalSignalFwdStrands.first)/totalSignalCoverage.first;
|
||||
minNonUniqueSignalStrandBalance = maxNonUniqueSignalStrandBalance = ((double)totalSignalFwdStrands.second)/totalSignalCoverage.second;
|
||||
minUniqueControlStrandBalance = maxUniqueControlStrandBalance = ((double)totalControlFwdStrands.first)/totalControlCoverage.first;
|
||||
minNonUniqueControlStrandBalance = maxNonUniqueControlStrandBalance = ((double)totalControlFwdStrands.second)/totalControlCoverage.second;
|
||||
}
|
||||
|
||||
private void printRegion() {
|
||||
if ( regionStart == -1 ) return;
|
||||
out.print(curContigName+":"+regionStart+"-"+windowStop+"\t"+(windowStop-regionStart+1) +"\t"+
|
||||
minSignalReads.first+"-"+maxSignalReads.first+"\t"+
|
||||
minSignalReads.second+"-"+maxSignalReads.second+"\t"+
|
||||
minControlReads.first+"-"+maxControlReads.first+"\t"+
|
||||
minControlReads.second+"-"+maxControlReads.second+"\t");
|
||||
out.printf("%.2f-%.2f\t",minEnrichmentUnique,maxEnrichmentUnique);
|
||||
out.printf("%.2f-%.2f\t",minEnrichmentNonUnique,maxEnrichmentNonUnique);
|
||||
out.printf("%.2f-%.2f\t",minEnrichmentTotal,maxEnrichmentTotal);
|
||||
out.printf("%.2f-%.2f\t",minUniqueSignalStrandBalance,maxUniqueSignalStrandBalance);
|
||||
out.printf("%.2f-%.2f\t",minNonUniqueSignalStrandBalance,maxNonUniqueSignalStrandBalance);
|
||||
out.printf("%.2f-%.2f\t",minUniqueControlStrandBalance,maxUniqueControlStrandBalance);
|
||||
out.printf("%.2f-%.2f",minNonUniqueControlStrandBalance,maxNonUniqueControlStrandBalance);
|
||||
|
||||
if ( minUniqueSignalStrandBalance > 0.75 || minUniqueSignalStrandBalance < 0.25 ) out.print("\tS_U_STRAND_FILTER");
|
||||
out.println();
|
||||
|
||||
regionStart = -1; // to indicate that there is nothing left to print, the buffer is empty
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,441 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Jan 3, 2010
|
||||
* Time: 1:58:38 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
|
||||
|
||||
@Argument(fullName="windowSize",shortName="W",doc="Size of the sliding window",required=true)
|
||||
int WINDOW_SIZE = 100;
|
||||
@Argument(fullName="enrichmentCutoff",shortName="E",doc="Report windows with enrichment (signal/control) above this cutoff",required=true)
|
||||
double ENRICHMENT_CUTOFF = 5.0;
|
||||
@Argument(fullName="minSignal",shortName="ms",doc="Do not report windows with signal lower than this value "+
|
||||
"(this cutoff is secondary to enrichmentCutoff and guards against windows where control signal is 0 or too low,"+
|
||||
"so that control*enrichmentCutoff is too low to be convincing)",required=true)
|
||||
int MIN_SIGNAL = 10;
|
||||
@Argument(fullName="coverageFactor",shortName="cf",doc="Total number of uniquely mapped signal reads/total number of uniquely mapped control reads",required=false)
|
||||
double COVERAGE_FACTOR=1.0;
|
||||
@Argument(fullName="coverageFactorNU",shortName="cfnu",doc="Total number of non-uniquely mapped signal reads/total number of non-uniquely mapped control reads",required=false)
|
||||
double COVERAGE_FACTOR_NU=1.0;
|
||||
|
||||
private Set<String> signalReadGroups; // we are going to remember which read groups are stimulated tagged and which are unstimulated untagged in order to be able
|
||||
private Set<String> controlReadGroups ; // to properly assign the reads coming from a merged stream
|
||||
|
||||
private GenomeLoc currentWindow = null;
|
||||
private String currentContig = "chrM";
|
||||
|
||||
|
||||
private LinkedList<SAMRecord> readsInSignalWindow = null;
|
||||
private LinkedList<SAMRecord> readsInControlWindow = null;
|
||||
|
||||
private WindowStats signalCountsInCurrWindow = new WindowStats();
|
||||
private WindowStats controlCountsInCurrWindow = new WindowStats();
|
||||
|
||||
|
||||
// following variables are used by emitWindow to buffer adjacent windows
|
||||
private int MERGE_CUTOFF = -1;
|
||||
|
||||
private long regionStart = -1;
|
||||
private long lastWindowStart = -1;
|
||||
private int addedSinceLastEmit = 0; // how many sliding window steps where buffered since the last emit (i.e. since the last window that really passed significance criteria)
|
||||
// buffered read count stats for the windows inside the currently held merged print region:
|
||||
private List<WindowStats> signalReadCountsBuffer = new ArrayList<WindowStats>(1000);
|
||||
private List<WindowStats> controlReadCountsBuffer = new ArrayList<WindowStats>(1000);
|
||||
|
||||
|
||||
/** Clears buffered reads and all counts. DOES NOT clear buffered print region */
|
||||
private void resetWindows() {
|
||||
readsInSignalWindow.clear();
|
||||
readsInControlWindow.clear();
|
||||
signalCountsInCurrWindow.clear();
|
||||
controlCountsInCurrWindow.clear();
|
||||
}
|
||||
|
||||
private void addSignal(SAMRecord read) {
|
||||
readsInSignalWindow.add(read);
|
||||
signalCountsInCurrWindow.addRead(read);
|
||||
}
|
||||
|
||||
private void addControl(SAMRecord read) {
|
||||
readsInControlWindow.add(read);
|
||||
controlCountsInCurrWindow.addRead(read);
|
||||
}
|
||||
|
||||
/** Discard signal reads that start strictly before the specified position and
|
||||
* update associated counts
|
||||
* @param pos
|
||||
*/
|
||||
private void purgeSignal(long pos) {
|
||||
Iterator<SAMRecord> it = readsInSignalWindow.iterator();
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecord r = it.next();
|
||||
if ( r.getAlignmentStart() >= pos ) return; // we are done
|
||||
|
||||
// read starts before pos: discard it and update the counts:
|
||||
signalCountsInCurrWindow.removeRead(r);
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
/** Discard signal reads that start strictly before the specified position and
|
||||
* update associated counts
|
||||
* @param pos
|
||||
*/
|
||||
private void purgeControl(long pos) {
|
||||
Iterator<SAMRecord> it = readsInControlWindow.iterator();
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecord r = it.next();
|
||||
if ( r.getAlignmentStart() >= pos ) return; // we are done
|
||||
|
||||
// read starts before pos: discard it and update the counts:
|
||||
controlCountsInCurrWindow.removeRead(r);
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
private void resetWindowMergingBuffer(long start) {
|
||||
regionStart = start;
|
||||
lastWindowStart = start;
|
||||
signalReadCountsBuffer.clear();
|
||||
controlReadCountsBuffer.clear();
|
||||
signalReadCountsBuffer.add(signalCountsInCurrWindow.clone());
|
||||
controlReadCountsBuffer.add(controlCountsInCurrWindow.clone());
|
||||
}
|
||||
|
||||
/** Delayed print: the window starting at 'start' will be added to the print buffer; if the window is close enough
|
||||
* to the current contents if the buffer, the addition will result in merging the window with the buffer;
|
||||
* otherwise, the old contents of the buffer will be printed and the buffer will be re-initialized with new window.
|
||||
* It is assumed that counters are in synch with the start position passed to this method.
|
||||
* @param start
|
||||
*/
|
||||
private void emitWindow(long start) {
|
||||
// System.out.println("Emitting at "+start);
|
||||
|
||||
if ( regionStart == -1 ) { // we did not keep any region so far; initialize the buffer and return, will print later
|
||||
resetWindowMergingBuffer(start);
|
||||
addedSinceLastEmit = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( start > lastWindowStart + MERGE_CUTOFF ) {
|
||||
// this loop is a dummy: we have already cleared those unneeded
|
||||
// counts in shiftWindows(); stays here to avoid generating bugs later
|
||||
// if we change something in shiftWindows()
|
||||
for ( ; addedSinceLastEmit > 0 ; addedSinceLastEmit-- ) {
|
||||
signalReadCountsBuffer.remove(signalReadCountsBuffer.size()-1);
|
||||
controlReadCountsBuffer.remove(controlReadCountsBuffer.size()-1);
|
||||
}
|
||||
printRegion();
|
||||
resetWindowMergingBuffer(start);
|
||||
return;
|
||||
}
|
||||
|
||||
// the current window is too close to the previous one: we have to merge;
|
||||
// NOTE: if window is too close, bufferAccepts() returned true, so the counts are already
|
||||
// added.
|
||||
lastWindowStart = start;
|
||||
addedSinceLastEmit = 0;
|
||||
// signalReadCountsBuffer.add(uniqueSignalReads);
|
||||
// controlReadCountsBuffer.add(uniqueControlReads);
|
||||
|
||||
}
|
||||
|
||||
private boolean bufferAccepts(long pos) {
|
||||
return ( regionStart != -1 && pos <= lastWindowStart+MERGE_CUTOFF);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void printRegion() {
|
||||
if ( regionStart == -1 ) return;
|
||||
|
||||
long regionStop = lastWindowStart+WINDOW_SIZE-1;
|
||||
|
||||
double[] tmpEnrU = new double[signalReadCountsBuffer.size()];
|
||||
int[] tmpSignalU = new int[signalReadCountsBuffer.size()];
|
||||
int[] tmpControlU = new int[signalReadCountsBuffer.size()];
|
||||
double[] tmpEnrNU = new double[signalReadCountsBuffer.size()];
|
||||
int[] tmpSignalNU = new int[signalReadCountsBuffer.size()];
|
||||
int[] tmpControlNU = new int[signalReadCountsBuffer.size()];
|
||||
|
||||
double[] tmpFWDSignalFracU = new double[signalReadCountsBuffer.size()];
|
||||
double[] tmpFWDControlFracU = new double[signalReadCountsBuffer.size()];
|
||||
double[] tmpFWDSignalFracNU = new double[signalReadCountsBuffer.size()];
|
||||
double[] tmpFWDControlFracNU = new double[signalReadCountsBuffer.size()];
|
||||
|
||||
int lastInd = signalReadCountsBuffer.size() - 1;
|
||||
|
||||
// out.println("Size="+signalReadCountsBuffer.size()+":");
|
||||
|
||||
for ( int i = 0 ; i <= lastInd ; i++ ) {
|
||||
tmpEnrU[i]= ( ((double) signalReadCountsBuffer.get(i).uniqueReads) / (controlReadCountsBuffer.get(i).uniqueReads+1.0 ) ) / COVERAGE_FACTOR ;
|
||||
|
||||
tmpSignalU[i] = signalReadCountsBuffer.get(i).uniqueReads;
|
||||
tmpControlU[i] = controlReadCountsBuffer.get(i).uniqueReads;
|
||||
|
||||
tmpEnrNU[i]= ( ((double) signalReadCountsBuffer.get(i).nonUniqueReads) / (controlReadCountsBuffer.get(i).nonUniqueReads+1.0 ) ) / COVERAGE_FACTOR_NU ;
|
||||
|
||||
tmpSignalNU[i] = signalReadCountsBuffer.get(i).nonUniqueReads;
|
||||
tmpControlNU[i] = controlReadCountsBuffer.get(i).nonUniqueReads;
|
||||
|
||||
tmpFWDSignalFracU[i] = signalReadCountsBuffer.get(i).uniqueReads > 0 ? ( ((double)signalReadCountsBuffer.get(i).uniqueFWDReads) / signalReadCountsBuffer.get(i).uniqueReads ) : 0.5;
|
||||
tmpFWDControlFracU[i] = controlReadCountsBuffer.get(i).uniqueReads > 0 ? ( ((double)controlReadCountsBuffer.get(i).uniqueFWDReads) / controlReadCountsBuffer.get(i).uniqueReads ) : 0.5;
|
||||
tmpFWDSignalFracNU[i] = signalReadCountsBuffer.get(i).nonUniqueReads > 0 ? ( ((double)signalReadCountsBuffer.get(i).nonUniqueFWDReads) / signalReadCountsBuffer.get(i).nonUniqueReads ) : 0.5;
|
||||
tmpFWDControlFracNU[i] = controlReadCountsBuffer.get(i).nonUniqueReads > 0 ? ( ((double)controlReadCountsBuffer.get(i).nonUniqueFWDReads) / controlReadCountsBuffer.get(i).nonUniqueReads ) : 0.5;
|
||||
}
|
||||
|
||||
Arrays.sort(tmpEnrU);
|
||||
Arrays.sort(tmpSignalU);
|
||||
Arrays.sort(tmpControlU);
|
||||
|
||||
Arrays.sort(tmpEnrNU);
|
||||
Arrays.sort(tmpSignalNU);
|
||||
Arrays.sort(tmpControlNU);
|
||||
|
||||
Arrays.sort(tmpFWDSignalFracU);
|
||||
Arrays.sort(tmpFWDControlFracU);
|
||||
Arrays.sort(tmpFWDSignalFracNU);
|
||||
Arrays.sort(tmpFWDControlFracNU);
|
||||
|
||||
|
||||
out.print(currentContig+":"+regionStart+"-"+regionStop+"\t"+
|
||||
(regionStop-regionStart+1) +"\t"+
|
||||
"signal_unique:"+ tmpSignalU[0]+"-"+ tmpSignalU[lastInd/2]+"-"+ tmpSignalU[lastInd]+"\t"+
|
||||
"control_unique:"+ tmpControlU[0]+"-"+ tmpControlU[lastInd/2]+"-"+ tmpControlU[lastInd]);
|
||||
|
||||
out.printf("\tsignal_fwd_frac_unique:%.1f-%.1f-%.1f",tmpFWDSignalFracU[0],tmpFWDSignalFracU[lastInd/2],tmpFWDSignalFracU[lastInd]);
|
||||
out.printf("\tcontrol_fwd_frac_unique:%.1f-%.1f-%.1f",tmpFWDControlFracU[0],tmpFWDControlFracU[lastInd/2],tmpFWDControlFracU[lastInd]);
|
||||
|
||||
out.print("\tsignal_nonnunique:"+ tmpSignalNU[0]+"-"+ tmpSignalNU[lastInd/2]+"-"+ tmpSignalNU[lastInd]+"\t"+
|
||||
"control_nonunique:"+ tmpControlNU[0]+"-"+ tmpControlNU[lastInd/2]+"-"+ tmpControlNU[lastInd]);
|
||||
|
||||
out.printf("\tsignal_fwd_frac_nonunique:%.1f-%.1f-%.1f",tmpFWDSignalFracNU[0],tmpFWDSignalFracNU[lastInd/2],tmpFWDSignalFracNU[lastInd]);
|
||||
out.printf("\tcontrol_fwd_frac_nonunique:%.1f-%.1f-%.1f",tmpFWDControlFracNU[0],tmpFWDControlFracNU[lastInd/2],tmpFWDControlFracNU[lastInd]);
|
||||
|
||||
out.printf("\tnorm_enrichment_unique:%.2f-%.2f-%.2f",tmpEnrU[0],tmpEnrU[lastInd/2],tmpEnrU[lastInd]);
|
||||
out.printf("\tnorm_enrichment_nonunique:%.2f-%.2f-%.2f",tmpEnrNU[0],tmpEnrNU[lastInd/2],tmpEnrNU[lastInd]);
|
||||
|
||||
// if ( minUniqueSignalStrandBalance > 0.75 || minUniqueSignalStrandBalance < 0.25 ) out.print("\tS_U_STRAND_FILTER");
|
||||
out.println();
|
||||
|
||||
regionStart = -1; // to indicate that there is nothing left to print, the buffer is empty
|
||||
// System.exit(1);
|
||||
}
|
||||
|
||||
private void updateWindowMergingBuffer(long i) {
|
||||
if ( bufferAccepts(i) ) {
|
||||
// we are not too far away from last window added to the buffer that actually passed significance criteria;
|
||||
// in this case we have to keep buffering since another significant window may be encountered soon
|
||||
// System.out.println("Updating buffer at "+i+" with "+ uniqueSignalReads);
|
||||
signalReadCountsBuffer.add(signalCountsInCurrWindow.clone());
|
||||
controlReadCountsBuffer.add(controlCountsInCurrWindow.clone());
|
||||
addedSinceLastEmit++;
|
||||
} else {
|
||||
// we are too far from the last significant window; if another significant window comes later, it will not
|
||||
// be merged into this region but will start a new one. In this case we have to erase all the counts we have been
|
||||
// saving since the last significant window (the latter is where the current region is going to end!)
|
||||
for ( ; addedSinceLastEmit > 0 ; addedSinceLastEmit-- ) {
|
||||
signalReadCountsBuffer.remove(signalReadCountsBuffer.size()-1);
|
||||
controlReadCountsBuffer.remove(controlReadCountsBuffer.size()-1);
|
||||
}
|
||||
printRegion(); // print current region right away, why not? next significant window will start new region for sure.
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void shiftWindows(long pos) {
|
||||
// we shift windows when there is a read that does not fit into the current window.
|
||||
// the position, to which the shift is performed, is the first position such that the new read
|
||||
// can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope -
|
||||
// we are guaranteed that there will be no new reads to add until we reach pos.
|
||||
|
||||
|
||||
for ( long i = currentWindow.getStart() ; i < pos ; i++ ) {
|
||||
// if ( readsInSignalWindow.size() == 0 ) {
|
||||
// i = pos-1;
|
||||
// continue;
|
||||
// };
|
||||
|
||||
// if ( readsInSignalWindow.getFirst().getAlignmentStart() > i ) {
|
||||
// i = readsInSignalWindow.getFirst().getAlignmentStart() - 1; // jump directly to next read position
|
||||
// continue;
|
||||
// }
|
||||
|
||||
purgeSignal(i); // remove all the reads that start before current position i (and update all the counters)
|
||||
purgeControl(i);
|
||||
|
||||
updateWindowMergingBuffer(i);
|
||||
|
||||
if ( ( controlCountsInCurrWindow.uniqueReads + 1 ) * ENRICHMENT_CUTOFF < MIN_SIGNAL ) {
|
||||
// too few control reads
|
||||
if ( signalCountsInCurrWindow.uniqueReads >= MIN_SIGNAL ) {
|
||||
// emit signal only if it is higher that hard cut-off:
|
||||
emitWindow(i); // print current window (print can be buffered and delayed!)
|
||||
}
|
||||
} else {
|
||||
// enough control reads;
|
||||
// check for actual enrichment:
|
||||
if ( ((double) signalCountsInCurrWindow.uniqueReads) / (controlCountsInCurrWindow.uniqueReads+1.0) > ENRICHMENT_CUTOFF ) {
|
||||
emitWindow(i); // print current window (print can be buffered and delayed!)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// we emitted intermediate windows up to pos-1 as/if needed and purged everything that starts before pos-1
|
||||
// now we have to purge everything that starts before pos and return (no emitting yet, as we are about to add a read upon return):
|
||||
|
||||
purgeSignal(pos);
|
||||
purgeControl(pos);
|
||||
|
||||
currentWindow = GenomeLocParser.createGenomeLoc(currentWindow.getContigIndex(),pos,pos+WINDOW_SIZE-1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
int nSams = getToolkit().getArguments().samFiles.size();
|
||||
|
||||
if ( nSams != 2 ) {
|
||||
out.println("ERROR: two input bam files (signal and backround control) must be specified");
|
||||
System.exit(1);
|
||||
}
|
||||
List<Set<String>> readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||
signalReadGroups = readGroupSets.get(0);
|
||||
// System.out.println(signalReadGroups.size()+" read groups in signal");
|
||||
controlReadGroups = readGroupSets.get(1);
|
||||
// System.out.println(controlReadGroups.size()+" read groups in control");
|
||||
|
||||
currentWindow = GenomeLocParser.createGenomeLoc(0,1,WINDOW_SIZE);
|
||||
readsInSignalWindow = new LinkedList<SAMRecord>();
|
||||
readsInControlWindow = new LinkedList<SAMRecord>();
|
||||
|
||||
MERGE_CUTOFF = WINDOW_SIZE;
|
||||
ENRICHMENT_CUTOFF *= COVERAGE_FACTOR;
|
||||
currentContig = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(0).getSequenceName();
|
||||
}
|
||||
|
||||
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
|
||||
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
|
||||
|
||||
if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) {
|
||||
printRegion(); // print all we had on the previous contig
|
||||
|
||||
currentWindow = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),
|
||||
read.getAlignmentStart(),
|
||||
read.getAlignmentStart()+WINDOW_SIZE-1);
|
||||
currentContig = read.getReferenceName();
|
||||
resetWindows();
|
||||
} else {
|
||||
// we are on the same contig
|
||||
if ( read.getAlignmentEnd() > currentWindow.getStop() ) {
|
||||
// can not accomodate the read inside the current window - shift!
|
||||
// System.out.println("read ends at "+read.getAlignmentEnd()+" window ends at "+currentWindow.getStop()+ " shifting to "+ (currentWindow.getStart() + ( read.getAlignmentEnd() - currentWindow.getStop() )) +" ("+uniqueSignalReads+"/"+uniqueControlReads+")");
|
||||
|
||||
// while shifting the window, the following method will issue (delayed) print commands for
|
||||
// all intermediate windows that pass significance criteria:
|
||||
shiftWindows(currentWindow.getStart() + ( read.getAlignmentEnd() - currentWindow.getStop() ));
|
||||
}
|
||||
// now the read will fit into the window
|
||||
}
|
||||
|
||||
// at this point we are guaranteed that the read will fit into the window
|
||||
|
||||
if ( signalReadGroups.contains( read.getReadGroup().getReadGroupId() ) ) {
|
||||
addSignal(read);
|
||||
} else if ( controlReadGroups.contains( read.getReadGroup().getReadGroupId() )) {
|
||||
addControl(read);
|
||||
} else {
|
||||
throw new StingException("Read "+read + " belongs to unrecognized read group");
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value+sum; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
|
||||
/** Auxiliary class that encapsulates the task of monitoring counts of various read traits in some set of reads
|
||||
* (for instance, reads in the current window). Counted traits include uniquely/non-uniquely mapped reads,
|
||||
* forward-strand aligned reads etc.
|
||||
*/
|
||||
class WindowStats implements Cloneable {
|
||||
public int uniqueReads = 0;
|
||||
public int nonUniqueReads = 0;
|
||||
public int uniqueFWDReads = 0;
|
||||
public int nonUniqueFWDReads = 0;
|
||||
|
||||
/** Reset all counts to 0 */
|
||||
public void clear() {
|
||||
uniqueReads = nonUniqueReads = uniqueFWDReads = nonUniqueFWDReads = 0;
|
||||
}
|
||||
|
||||
/** Examines the read and increments the counts for all the monitored traits observed in this read. */
|
||||
public void addRead(SAMRecord r) {
|
||||
if ( r.getMappingQuality() == 0 ) {
|
||||
// nonunique
|
||||
nonUniqueReads++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) nonUniqueFWDReads++;
|
||||
} else {
|
||||
// unique
|
||||
uniqueReads++;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) uniqueFWDReads++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Examines the read and decrements the counts for all the monitored traits observed in this read. */
|
||||
public void removeRead(SAMRecord r) {
|
||||
if ( r.getMappingQuality() == 0 ) {
|
||||
// nonunique
|
||||
nonUniqueReads--;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) nonUniqueFWDReads--;
|
||||
}
|
||||
else {
|
||||
// unique
|
||||
uniqueReads--;
|
||||
if ( ! r.getReadNegativeStrandFlag() ) uniqueFWDReads--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** allocates new object, copies this object into it, and returns the copy */
|
||||
public WindowStats clone() {
|
||||
WindowStats ret = new WindowStats();
|
||||
ret.uniqueReads = this.uniqueReads;
|
||||
ret.nonUniqueReads = this.nonUniqueReads;
|
||||
ret.uniqueFWDReads = this.uniqueFWDReads;
|
||||
ret.nonUniqueFWDReads = this.nonUniqueFWDReads;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue