From 2a704e83dffc1667e4ddf9a2c6b05728d26056c9 Mon Sep 17 00:00:00 2001 From: asivache Date: Mon, 28 Dec 2009 19:52:44 +0000 Subject: [PATCH] Reads now have new traversal flag: generateExtendedEvents(). Support added to GenomeAnalysisEngine and Walker. This is a silent and transparent framework change that no existing code is going to see. The actual code that makes use of the new flag (which is false by default) will be committed separately... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2450 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 3 +- .../org/broadinstitute/sting/gatk/Reads.java | 24 ++++++++++++++- .../sting/gatk/walkers/Walker.java | 29 +++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index dce280eb8..78a37a1ad 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -421,7 +421,8 @@ public class GenomeAnalysisEngine { !argCollection.unsafe, filters, argCollection.readMaxPileup, - walker.includeReadsWithDeletionAtLoci()); + walker.includeReadsWithDeletionAtLoci(), + walker.generateExtendedEvents()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 31b42d77a..8e40f3495 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -33,6 +33,8 @@ public class Reads { private Collection supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private boolean includeReadsWithDeletionAtLoci = false; + private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) + // immediately after the reference base such event is associated with? /** @@ -44,6 +46,18 @@ public class Reads { return includeReadsWithDeletionAtLoci; } + /** + * Return true if the walker wants to see additional piles of "extended" events (indels). An indel is associated, + * by convention, with the reference base immediately preceding the insertion/deletion, and if this flag is set + * to 'true', any locus with an indel associated with it will cause exactly two subsequent calls to walker's map(): first call + * will be made with a "conventional" base pileup, the next call will be made with a pileup of extended (indel/noevent) + * events. + * @return + */ + public boolean generateExtendedEvents() { + return generateExtendedEvents; + } + /** * Gets a list of the files acting as sources of reads. * @return A list of files storing reads data. @@ -115,6 +129,12 @@ public class Reads { * @param downsampleCoverage downsampling per-locus. * @param beSafe Whether to enable safety checking. * @param supplementalFilters additional filters to dynamically apply. + * @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with + * a pile of indel/noevent extended events at every locus with at least one indel associated with it + * (in addition to a "regular" call to map() at this locus performed with base pileup) + * @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method + * will explicitly list reads with deletion over the current reference base; otherwise, only observed + * bases will be seen in the pileups, and the deletions will be skipped silently. */ Reads( List samFiles, SAMFileReader.ValidationStringency strictness, @@ -123,7 +143,8 @@ public class Reads { Boolean beSafe, Collection supplementalFilters, int maximumReadsAtLocus, - boolean includeReadsWithDeletionAtLoci) { + boolean includeReadsWithDeletionAtLoci, + boolean generateExtendedEvents) { this.readsFiles = samFiles; this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; @@ -132,5 +153,6 @@ public class Reads { this.supplementalFilters = supplementalFilters; this.maximumReadsAtLocus = maximumReadsAtLocus; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; + this.generateExtendedEvents = generateExtendedEvents; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index db4de887b..e33421488 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -61,6 +61,35 @@ public abstract class Walker { return false; } + /** + * This method states whether you want to see pileups of "extended events" (currently, indels only) + * at every locus that has at least one indel associated with it. Consider the following situation: + * + * ref: AT--CTGA (note that we expanded the ref here with -- to accomodate insertion in read3) + * read1: AT--CTGA (perfectly matches the ref) + * read2: AT----GA (deletion -CT w.r.t. the ref) + * read3: ATGGCTGA (insertion +GG w.r.t the ref) + * + * Normally, the locus iterator only returns read base pileups over reference bases, optionally with deleted bases + * included (see #includeReadsWithDeletionAtLoci()). In other words, the pileup over the second reference base (T) + * will be [T,T,T] (all reads count), for the next reference base (C) the pileup will be [C,C] (or [C,-,C] if + * #includeReadsWithDeletionAtLoci() is true), next pileup generated over the next reference + * base (T) will be either [T,T], or [T,'-',T], etc. In this default mode, a) insertions are not seen by a walker at all, and + * b) deletions are (optionally) seen only on a base-by-base basis (as the step-by-step traversal over the reference + * bases is performed). In the extended event mode, however, if there is at least one indel associated with a reference + * locus, the engine will generate an additional call to the walker's map() method, with a pileup of + * full-length extended indel/noevent calls. This call will be made after the conventional base pileup call + * at that locus. Thus, in the example above, a conventional call will be first made at the second reference base (T), + * with the [T,T,T] pileup of read bases, then an extended event call will be made at the same locus with + * pileup [no_event, -CT, +GG] (i.e. extended events associated with that reference base). After that, the traversal + * engine will move to the next reference base. + * + * @return false if you do not want to receive extra pileups with extended events, or true if you do. + */ + public boolean generateExtendedEvents() { + return false; + } + public void initialize() { } /**