From 073fdd8ec703d9b47dd2c5911d15198f60e552a5 Mon Sep 17 00:00:00 2001 From: asivache Date: Wed, 3 Mar 2010 17:34:30 +0000 Subject: [PATCH] Let's try not to die suffocating when a bad region with humongous coverage is encountered. New option: -maxNumberOfReads (--mnr), with default of 10,000. If count of reads cached in the current window reaches the specified limit, the whole window is immediately shifted by the whole window length and all currently cached reads are dropped. NOTE: this also means that we are not going to call ANY indels from the current window, even though we could try using just the reads cached so far. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2921 348d0f76-0448-11de-a6fe-93d51630548a --- .../indels/IndelGenotyperV2Walker.java | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index f985c8d93..020d97deb 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -70,6 +70,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false; @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+ "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200; + @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+ + " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000; private WindowContext tumor_context; private WindowContext normal_context; @@ -201,6 +203,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { +lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString()); currentPosition = read.getAlignmentStart(); + lastRead = read; if ( read.getAlignmentEnd() > contigLength ) { if ( ! outOfContigUserWarned ) { @@ -210,14 +213,13 @@ public class IndelGenotyperV2Walker extends ReadWalker { return 0; } - if ( read.getAlignmentStart() < normal_context.getStart() ) { - // should never happen - throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+ - "Read starts at "+read.getReferenceName()+":"+read.getAlignmentStart()+ " (cigar="+read.getCigarString()+ - "); window starts at "+normal_context.getStart()); - } - - lastRead = read; +// if ( read.getAlignmentStart() < normal_context.getStart() ) { +// if ( skippingReads ) return 0; // return if we are legitimately skipping reads over some bad region. +// // this should never happen +// throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+ +// "Read starts at "+read.getReferenceName()+":"+read.getAlignmentStart()+ " (cigar="+read.getCigarString()+ +// "); window starts at "+normal_context.getStart()); +// } // a little trick here: we want to make sure that current read completely fits into the current // window so that we can accumulate indel observations over the whole length of the read. @@ -268,8 +270,27 @@ public class IndelGenotyperV2Walker extends ReadWalker { } else { throw new StingException("Unrecognized read group in merged stream: "+rg); } + if ( tumor_context.getReads().size() > MAX_READ_NUMBER ) { + System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+ + refName+':'+tumor_context.getStart()+'-'+tumor_context.getStop()+" in tumor sample. The whole window will be dropped."); + tumor_context.shift(WINDOW_SIZE); + normal_context.shift(WINDOW_SIZE); + } + if ( normal_context.getReads().size() > MAX_READ_NUMBER ) { + System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+ + refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+" in normal sample. The whole window will be dropped"); + tumor_context.shift(WINDOW_SIZE); + normal_context.shift(WINDOW_SIZE); + } + + } else { normal_context.add(read, ref); + if ( normal_context.getReads().size() > MAX_READ_NUMBER ) { + System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+ + refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+". The whole window will be dropped"); + normal_context.shift(WINDOW_SIZE); + } } return 1; @@ -1163,6 +1184,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { public void add(SAMRecord read, char [] ref) { + if ( read.getAlignmentStart() < start ) return; // silently ignore reads starting before the window start + ExpandedSAMRecord er = new ExpandedSAMRecord(read,ref,read.getAlignmentStart()-start,this); //TODO duplicate records may actually indicate a problem with input bam file; throw an exception when the bug in CleanedReadInjector is fixed if ( reads.contains(er)) return; // ignore duplicate records