From e4546f802ce99136c0b038b5424a84579b22bdf0 Mon Sep 17 00:00:00 2001 From: andrewk Date: Tue, 24 Nov 2009 18:12:34 +0000 Subject: [PATCH] Accumulates coverage across hybrid selection bait intervals to assess effect of bait adjacency. Requires input bait intervals that have an overhang beyond the actual bait interval to capture coverage data at these points. Outputs R parseable file that has all data in lists and then does some basic plotting. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2144 348d0f76-0448-11de-a6fe-93d51630548a --- .../CoverageAcrossIntervalsWalker.java | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageAcrossIntervalsWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageAcrossIntervalsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageAcrossIntervalsWalker.java new file mode 100755 index 000000000..6df20a9b8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageAcrossIntervalsWalker.java @@ -0,0 +1,164 @@ +package org.broadinstitute.sting.playground.gatk.walkers; + +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import net.sf.samtools.SAMRecord; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: andrewk + * Date: Oct 20, 2009 + * Time: 5:00:53 PM + * To change this template use File | Settings | File Templates. + */ + +@By(DataSource.REFERENCE) +public class CoverageAcrossIntervalsWalker extends LocusWalker, CoverageAcrossIntervalsWalker.Coverage> { + /* Accumulates coverage across hybrid selection bait intervals to assess effect of bait adjacency. + Requires input bait intervals that have an overhang beyond the actual bait interval to capture coverage data at these points. + Outputs R parseable file that has all data in lists and then does some basic plotting. + */ + @Argument(fullName="include_duplicates", shortName="idup", required=false, doc="consider duplicate reads") + public boolean INCLUDE_DUPLICATE_READS = false; + + @Argument(fullName="min_mapq", shortName="mmq", required=false, doc="Minimum mapping quality of reads to consider") + public Integer MIN_MAPQ = 1; + + @Argument(fullName="min_len", shortName="min", required=false, doc="Minimum length of interval to consider") + public Integer MIN_LEN = Integer.MIN_VALUE; + + @Argument(fullName="max_len", shortName="max", required=false, doc="Maximum length of interval to consider") + public Integer MAX_LEN = 5000; + + @Argument(fullName="max_bait_count", shortName="mbc", required=false, doc="Maximum number of baits to consider") + public Integer MAX_BAIT_COUNT = 24; + + @Argument(fullName="free_standing_distance", shortName="fsd", required=false, doc="minimum distance to next interval to consider freestanding") + public Integer FREE_STANDING_DISTANCE = 500; + + public static class Coverage { + // Container for carrying positive and negative strand coverage + public ArrayList pos = new ArrayList(); + public ArrayList neg = new ArrayList(); + public void addCoveragePoints(Pair pn) { + pos.add(pn.first); + neg.add(pn.second); + } + } + + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + List reads = context.getReads(); + + IntervalRod intervalROD = (IntervalRod)tracker.lookup("interval", null); + GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); + if (interval == null) { throw new StingException("No intervals at locus; should not happen"); } + int offset = (int)(context.getPosition() - interval.getStart()); + + int depth[] = new int[2]; + for ( int i = 0; i < reads.size(); i++ ) + { + SAMRecord read = reads.get(i); + + if (read.getNotPrimaryAlignmentFlag()) { continue; } + if (read.getReadUnmappedFlag()) { continue; } + if (!INCLUDE_DUPLICATE_READS && read.getDuplicateReadFlag()) { continue; } + if (read.getMappingQuality() < MIN_MAPQ) { continue; } + + depth[read.getReadNegativeStrandFlag() ? 0 : 1]++; + } + + return new Pair(depth[0],depth[1]); + } + + public Coverage reduceInit() { return new Coverage(); } + public Coverage reduce(Pair depths, Coverage cov) { + cov.addCoveragePoints(depths); + return cov; + } + + /** + * Return true if your walker wants to reduce each interval separately. Default is false. + * If you set this flag, several things will happen. + * The system will invoke reduceInit() once for each interval being processed, starting a fresh reduce + * Reduce will accumulate normally at each map unit in the interval + * However, onTraversalDone(reduce) will be called after each interval is processed. + * The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done, + * which is overloaded here to call onTraversalDone(reduce) for each location + */ + public boolean isReduceByInterval() { + return true; + } + + public void onTraversalDone(List> results) { + int TOTAL_BAIT_WINDOW = MAX_BAIT_COUNT*120+FREE_STANDING_DISTANCE*2; + int depths[][][] = new int[2][MAX_BAIT_COUNT+1][TOTAL_BAIT_WINDOW]; // Indices: pos/neg strand count, bait count, position in bait + int bait_count[] = new int[MAX_BAIT_COUNT+1]; + + for (Pair pair : results) { + GenomeLoc loc = pair.first; + //if (loc.size() < MIN_LEN || loc.size() > MAX_LEN) { continue; } + + long interval_width = loc.size() - FREE_STANDING_DISTANCE * 2; + int baits = (int)(interval_width / 120); + //out.format("#Interval_width: %d Bait_count: %d\n", interval_width, baits); + if (interval_width % 120 != 0) { continue; } + if (interval_width > MAX_BAIT_COUNT*120) { continue; } + + // This target is good; count it + bait_count[baits]++; + Coverage pn_cov = pair.second; + for (int pn=0; pn<2; pn++) { + ArrayList cov = pn == 0 ? pn_cov.neg : pn_cov.pos; + for (int i=0; i0; baits--) { + int norm_depth_width = baits*120+FREE_STANDING_DISTANCE*2; + float norm_depths[] = new float[norm_depth_width]; + for (int pn=0; pn<2; pn++) { + for (int i=0; i 0) { + String depth_str = Arrays.toString(norm_depths); + depth_str = depth_str.substring(1,depth_str.length()-1); + out.format("b%c%d <- c(%s)\n", pn_char, baits, depth_str); + if (plot_begun) { + out.format("points(b%c%d, type='l', col=\"%s\")\n", pn_char, baits, pn_color); + }else{ + out.format("plot(b%c%d, type='l', col=\"%s\", xlab='Position from start of 1st bait', ylab='Coverage')\n", pn_char, baits, pn_color); + plot_begun = true; + } + } + out.format("%ccounts <- c(%ccounts,%d)\n", pn_char, pn_char, bait_count[baits]); + } + } + } + +} +