diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index dce280eb8..78a37a1ad 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -421,7 +421,8 @@ public class GenomeAnalysisEngine { !argCollection.unsafe, filters, argCollection.readMaxPileup, - walker.includeReadsWithDeletionAtLoci()); + walker.includeReadsWithDeletionAtLoci(), + walker.generateExtendedEvents()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 31b42d77a..8e40f3495 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -33,6 +33,8 @@ public class Reads { private Collection supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private boolean includeReadsWithDeletionAtLoci = false; + private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) + // immediately after the reference base such event is associated with? /** @@ -44,6 +46,18 @@ public class Reads { return includeReadsWithDeletionAtLoci; } + /** + * Return true if the walker wants to see additional piles of "extended" events (indels). An indel is associated, + * by convention, with the reference base immediately preceding the insertion/deletion, and if this flag is set + * to 'true', any locus with an indel associated with it will cause exactly two subsequent calls to walker's map(): first call + * will be made with a "conventional" base pileup, the next call will be made with a pileup of extended (indel/noevent) + * events. + * @return + */ + public boolean generateExtendedEvents() { + return generateExtendedEvents; + } + /** * Gets a list of the files acting as sources of reads. * @return A list of files storing reads data. @@ -115,6 +129,12 @@ public class Reads { * @param downsampleCoverage downsampling per-locus. * @param beSafe Whether to enable safety checking. * @param supplementalFilters additional filters to dynamically apply. + * @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with + * a pile of indel/noevent extended events at every locus with at least one indel associated with it + * (in addition to a "regular" call to map() at this locus performed with base pileup) + * @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method + * will explicitly list reads with deletion over the current reference base; otherwise, only observed + * bases will be seen in the pileups, and the deletions will be skipped silently. */ Reads( List samFiles, SAMFileReader.ValidationStringency strictness, @@ -123,7 +143,8 @@ public class Reads { Boolean beSafe, Collection supplementalFilters, int maximumReadsAtLocus, - boolean includeReadsWithDeletionAtLoci) { + boolean includeReadsWithDeletionAtLoci, + boolean generateExtendedEvents) { this.readsFiles = samFiles; this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; @@ -132,5 +153,6 @@ public class Reads { this.supplementalFilters = supplementalFilters; this.maximumReadsAtLocus = maximumReadsAtLocus; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; + this.generateExtendedEvents = generateExtendedEvents; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index db4de887b..e33421488 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -61,6 +61,35 @@ public abstract class Walker { return false; } + /** + * This method states whether you want to see pileups of "extended events" (currently, indels only) + * at every locus that has at least one indel associated with it. Consider the following situation: + * + * ref: AT--CTGA (note that we expanded the ref here with -- to accomodate insertion in read3) + * read1: AT--CTGA (perfectly matches the ref) + * read2: AT----GA (deletion -CT w.r.t. the ref) + * read3: ATGGCTGA (insertion +GG w.r.t the ref) + * + * Normally, the locus iterator only returns read base pileups over reference bases, optionally with deleted bases + * included (see #includeReadsWithDeletionAtLoci()). In other words, the pileup over the second reference base (T) + * will be [T,T,T] (all reads count), for the next reference base (C) the pileup will be [C,C] (or [C,-,C] if + * #includeReadsWithDeletionAtLoci() is true), next pileup generated over the next reference + * base (T) will be either [T,T], or [T,'-',T], etc. In this default mode, a) insertions are not seen by a walker at all, and + * b) deletions are (optionally) seen only on a base-by-base basis (as the step-by-step traversal over the reference + * bases is performed). In the extended event mode, however, if there is at least one indel associated with a reference + * locus, the engine will generate an additional call to the walker's map() method, with a pileup of + * full-length extended indel/noevent calls. This call will be made after the conventional base pileup call + * at that locus. Thus, in the example above, a conventional call will be first made at the second reference base (T), + * with the [T,T,T] pileup of read bases, then an extended event call will be made at the same locus with + * pileup [no_event, -CT, +GG] (i.e. extended events associated with that reference base). After that, the traversal + * engine will move to the next reference base. + * + * @return false if you do not want to receive extra pileups with extended events, or true if you do. + */ + public boolean generateExtendedEvents() { + return false; + } + public void initialize() { } /**