Reads now have new traversal flag: generateExtendedEvents(). Support added to GenomeAnalysisEngine and Walker. This is a silent and transparent framework change that no existing code is going to see. The actual code that makes use of the new flag (which is false by default) will be committed separately...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2450 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-12-28 19:52:44 +00:00
parent c8d0e6e004
commit 2a704e83df
3 changed files with 54 additions and 2 deletions

View File

@ -421,7 +421,8 @@ public class GenomeAnalysisEngine {
!argCollection.unsafe,
filters,
argCollection.readMaxPileup,
walker.includeReadsWithDeletionAtLoci());
walker.includeReadsWithDeletionAtLoci(),
walker.generateExtendedEvents());
}
/**

View File

@ -33,6 +33,8 @@ public class Reads {
private Collection<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
private boolean includeReadsWithDeletionAtLoci = false;
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
// immediately after the reference base such event is associated with?
/**
@ -44,6 +46,18 @@ public class Reads {
return includeReadsWithDeletionAtLoci;
}
/**
* Return true if the walker wants to see additional piles of "extended" events (indels). An indel is associated,
* by convention, with the reference base immediately preceding the insertion/deletion, and if this flag is set
* to 'true', any locus with an indel associated with it will cause exactly two subsequent calls to walker's map(): first call
* will be made with a "conventional" base pileup, the next call will be made with a pileup of extended (indel/noevent)
* events.
* @return
*/
public boolean generateExtendedEvents() {
return generateExtendedEvents;
}
/**
* Gets a list of the files acting as sources of reads.
* @return A list of files storing reads data.
@ -115,6 +129,12 @@ public class Reads {
* @param downsampleCoverage downsampling per-locus.
* @param beSafe Whether to enable safety checking.
* @param supplementalFilters additional filters to dynamically apply.
* @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with
* a pile of indel/noevent extended events at every locus with at least one indel associated with it
* (in addition to a "regular" call to map() at this locus performed with base pileup)
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
*/
Reads( List<File> samFiles,
SAMFileReader.ValidationStringency strictness,
@ -123,7 +143,8 @@ public class Reads {
Boolean beSafe,
Collection<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci) {
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents) {
this.readsFiles = samFiles;
this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction;
@ -132,5 +153,6 @@ public class Reads {
this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.generateExtendedEvents = generateExtendedEvents;
}
}

View File

@ -61,6 +61,35 @@ public abstract class Walker<MapType, ReduceType> {
return false;
}
/**
* This method states whether you want to see pileups of "extended events" (currently, indels only)
* at every locus that has at least one indel associated with it. Consider the following situation:
*
* ref: AT--CTGA (note that we expanded the ref here with -- to accomodate insertion in read3)
* read1: AT--CTGA (perfectly matches the ref)
* read2: AT----GA (deletion -CT w.r.t. the ref)
* read3: ATGGCTGA (insertion +GG w.r.t the ref)
*
* Normally, the locus iterator only returns read base pileups over reference bases, optionally with deleted bases
* included (see #includeReadsWithDeletionAtLoci()). In other words, the pileup over the second reference base (T)
* will be [T,T,T] (all reads count), for the next reference base (C) the pileup will be [C,C] (or [C,-,C] if
* #includeReadsWithDeletionAtLoci() is true), next pileup generated over the next reference
* base (T) will be either [T,T], or [T,'-',T], etc. In this default mode, a) insertions are not seen by a walker at all, and
* b) deletions are (optionally) seen only on a base-by-base basis (as the step-by-step traversal over the reference
* bases is performed). In the extended event mode, however, if there is at least one indel associated with a reference
* locus, the engine will generate an <i>additional</i> call to the walker's map() method, with a pileup of
* full-length extended indel/noevent calls. This call will be made <i>after</i> the conventional base pileup call
* at that locus. Thus, in the example above, a conventional call will be first made at the second reference base (T),
* with the [T,T,T] pileup of read bases, then an extended event call will be made at the <i>same</i> locus with
* pileup [no_event, -CT, +GG] (i.e. extended events associated with that reference base). After that, the traversal
* engine will move to the next reference base.
*
* @return false if you do not want to receive extra pileups with extended events, or true if you do.
*/
public boolean generateExtendedEvents() {
return false;
}
public void initialize() { }
/**