Let's try not to die suffocating when a bad region with humongous coverage is encountered. New option: -maxNumberOfReads (--mnr), with default of 10,000. If count of reads cached in the current window reaches the specified limit, the whole window is immediately shifted by the whole window length and all currently cached reads are dropped. NOTE: this also means that we are not going to call ANY indels from the current window, even though we could try using just the reads cached so far.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2921 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6ca6c98980
commit
073fdd8ec7
|
|
@ -70,6 +70,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
|
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
|
||||||
@Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
|
@Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
|
||||||
"May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
|
"May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
|
||||||
|
@Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
|
||||||
|
" the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
|
||||||
|
|
||||||
private WindowContext tumor_context;
|
private WindowContext tumor_context;
|
||||||
private WindowContext normal_context;
|
private WindowContext normal_context;
|
||||||
|
|
@ -201,6 +203,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
+lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString());
|
+lastRead.getAlignmentEnd()+" cigar="+lastRead.getCigarString());
|
||||||
|
|
||||||
currentPosition = read.getAlignmentStart();
|
currentPosition = read.getAlignmentStart();
|
||||||
|
lastRead = read;
|
||||||
|
|
||||||
if ( read.getAlignmentEnd() > contigLength ) {
|
if ( read.getAlignmentEnd() > contigLength ) {
|
||||||
if ( ! outOfContigUserWarned ) {
|
if ( ! outOfContigUserWarned ) {
|
||||||
|
|
@ -210,14 +213,13 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( read.getAlignmentStart() < normal_context.getStart() ) {
|
// if ( read.getAlignmentStart() < normal_context.getStart() ) {
|
||||||
// should never happen
|
// if ( skippingReads ) return 0; // return if we are legitimately skipping reads over some bad region.
|
||||||
throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
// // this should never happen
|
||||||
"Read starts at "+read.getReferenceName()+":"+read.getAlignmentStart()+ " (cigar="+read.getCigarString()+
|
// throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
||||||
"); window starts at "+normal_context.getStart());
|
// "Read starts at "+read.getReferenceName()+":"+read.getAlignmentStart()+ " (cigar="+read.getCigarString()+
|
||||||
}
|
// "); window starts at "+normal_context.getStart());
|
||||||
|
// }
|
||||||
lastRead = read;
|
|
||||||
|
|
||||||
// a little trick here: we want to make sure that current read completely fits into the current
|
// a little trick here: we want to make sure that current read completely fits into the current
|
||||||
// window so that we can accumulate indel observations over the whole length of the read.
|
// window so that we can accumulate indel observations over the whole length of the read.
|
||||||
|
|
@ -268,8 +270,27 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
} else {
|
} else {
|
||||||
throw new StingException("Unrecognized read group in merged stream: "+rg);
|
throw new StingException("Unrecognized read group in merged stream: "+rg);
|
||||||
}
|
}
|
||||||
|
if ( tumor_context.getReads().size() > MAX_READ_NUMBER ) {
|
||||||
|
System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
|
||||||
|
refName+':'+tumor_context.getStart()+'-'+tumor_context.getStop()+" in tumor sample. The whole window will be dropped.");
|
||||||
|
tumor_context.shift(WINDOW_SIZE);
|
||||||
|
normal_context.shift(WINDOW_SIZE);
|
||||||
|
}
|
||||||
|
if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
|
||||||
|
System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
|
||||||
|
refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+" in normal sample. The whole window will be dropped");
|
||||||
|
tumor_context.shift(WINDOW_SIZE);
|
||||||
|
normal_context.shift(WINDOW_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
normal_context.add(read, ref);
|
normal_context.add(read, ref);
|
||||||
|
if ( normal_context.getReads().size() > MAX_READ_NUMBER ) {
|
||||||
|
System.out.println("WARNING: a count of "+MAX_READ_NUMBER+" reads reached in a window "+
|
||||||
|
refName+':'+normal_context.getStart()+'-'+normal_context.getStop()+". The whole window will be dropped");
|
||||||
|
normal_context.shift(WINDOW_SIZE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -1163,6 +1184,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
public void add(SAMRecord read, char [] ref) {
|
public void add(SAMRecord read, char [] ref) {
|
||||||
|
|
||||||
|
if ( read.getAlignmentStart() < start ) return; // silently ignore reads starting before the window start
|
||||||
|
|
||||||
ExpandedSAMRecord er = new ExpandedSAMRecord(read,ref,read.getAlignmentStart()-start,this);
|
ExpandedSAMRecord er = new ExpandedSAMRecord(read,ref,read.getAlignmentStart()-start,this);
|
||||||
//TODO duplicate records may actually indicate a problem with input bam file; throw an exception when the bug in CleanedReadInjector is fixed
|
//TODO duplicate records may actually indicate a problem with input bam file; throw an exception when the bug in CleanedReadInjector is fixed
|
||||||
if ( reads.contains(er)) return; // ignore duplicate records
|
if ( reads.contains(er)) return; // ignore duplicate records
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue