A most stupid bug. In cases when more than one indel variant was present in cleaned bam file, the "consensus" (max. # of occurences) call was computed incorrectly, and most of the times the call itself was not made at all. Fortunately, the locations where we see multiple indels are a minority, and many of them are suspicious anyway (manifestation of alignment problems?). Could change results of POOLED calls though.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1448 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-08-20 22:31:44 +00:00
parent 6313c465fb
commit 0bdecd8651
1 changed files with 40 additions and 17 deletions

View File

@ -59,6 +59,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="refseq", shortName="refseq", @Argument(fullName="refseq", shortName="refseq",
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated as GENOMIC/UTR/INTRON/CODING", required=false) doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated as GENOMIC/UTR/INTRON/CODING", required=false)
String RefseqFileName = null; String RefseqFileName = null;
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging",required=false) Boolean DEBUG = false;
private static int WINDOW_SIZE = 200; private static int WINDOW_SIZE = 200;
private RunningCoverage coverage; private RunningCoverage coverage;
@ -167,6 +168,11 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
@Override @Override
public Integer map(char[] ref, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
if ( DEBUG ) {
// System.out.println("DEBUG>> read at "+ read.getAlignmentStart()+"-"+read.getAlignmentEnd()+
// "("+read.getCigarString()+")");
if ( read.getDuplicateReadFlag() ) System.out.println("DEBUG>> Duplicated read (IGNORED)");
}
if ( read.getReadUnmappedFlag() || if ( read.getReadUnmappedFlag() ||
read.getDuplicateReadFlag() || read.getDuplicateReadFlag() ||
@ -272,10 +278,15 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
IndelVariant v = null; IndelVariant v = null;
for ( IndelVariant var : variants ) { for ( IndelVariant var : variants ) {
if ( DEBUG ) System.out.println("DEBUG>> Variant "+var.getBases()+" (cnt="+var.getCount()+")");
int cnt = var.getCount(); int cnt = var.getCount();
total_variant_count +=cnt; total_variant_count +=cnt;
if ( cnt > max_variant_count ) v = var; if ( cnt > max_variant_count ) {
v = var;
max_variant_count = cnt;
} }
}
if ( DEBUG ) System.out.println("DEBUG>> Returning: "+v.getBases()+" (cnt="+v.getCount()+") with total count of "+total_variant_count);
return new Pair<IndelVariant,Integer>(v,total_variant_count); return new Pair<IndelVariant,Integer>(v,total_variant_count);
} }
@ -287,9 +298,12 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
* @return * @return
*/ */
private boolean isCall(Pair<IndelVariant,Integer> p, int coverage) { private boolean isCall(Pair<IndelVariant,Integer> p, int coverage) {
return ( p.first.getCount() >= minIndelCount && boolean ret = ( p.first.getCount() >= minIndelCount &&
(double)p.first.getCount() > minFraction * coverage && (double)p.first.getCount() > minFraction * coverage &&
(double) p.first.getCount() > minConsensusFraction*p.second ); (double) p.first.getCount() > minConsensusFraction*p.second );
if ( DEBUG && ! ret ) System.out.println("DEBUG>> NOT a call: count="+p.first.count+" total_count="+p.second+" cov="+coverage+
" minConsensusF="+((double)p.first.count)/p.second+" minF="+((double)p.first.count)/coverage);
return ret;
} }
/** Build output line for bed file and write it to the specified output writer if the latter is not null; /** Build output line for bed file and write it to the specified output writer if the latter is not null;
@ -345,10 +359,10 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
// but it may end up being smaller (delayed shift), if we have not // but it may end up being smaller (delayed shift), if we have not
// covered MISMATCH_WIDTH bases to the right of the last indel yet. // covered MISMATCH_WIDTH bases to the right of the last indel yet.
boolean debug = false; // boolean debug = false;
// if ( coverage.getStart() <= 19661504 && coverage.getStop() >= 19661504 ) debug = true; // if ( coverage.getStart() <= 19661504 && coverage.getStop() >= 19661504 ) debug = true;
if ( debug ) System.out.println("Window: ["+coverage.getStart()+", "+coverage.getStop()+"]; shifting to: "+position); if ( DEBUG ) System.out.println("DEBUG>> Window: ["+coverage.getStart()+", "+coverage.getStop()+"]; shift requested: to "+position);
// walk along the coverage window and emit indels up to the position we are trying ot shift the window to // walk along the coverage window and emit indels up to the position we are trying ot shift the window to
for ( long pos = coverage.getStart() ; pos < Math.min(position,coverage.getStop()+1) ; pos++ ) { for ( long pos = coverage.getStart() ; pos < Math.min(position,coverage.getStop()+1) ; pos++ ) {
@ -366,7 +380,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
long left = Math.max( pos-MISMATCH_WIDTH, coverage.getStart() ); long left = Math.max( pos-MISMATCH_WIDTH, coverage.getStart() );
long right = pos+MISMATCH_WIDTH; long right = pos+MISMATCH_WIDTH;
if ( debug) System.out.println(" Indel at "+pos); if ( DEBUG ) System.out.println("DEBUG>> Indel at "+pos);
// if right < position we are shifting to, we already have all the coverage within MISMATCH_WINDOW bases around the indel, // if right < position we are shifting to, we already have all the coverage within MISMATCH_WINDOW bases around the indel,
// so we can proceed with counting mismatches and emitting the indel // so we can proceed with counting mismatches and emitting the indel
@ -377,7 +391,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
// we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it; // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
// instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
move_to = left; move_to = left;
if ( debug ) System.out.println(" waiting for coverage; shifting to "+ left); if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ left);
break; break;
} }
@ -402,16 +416,11 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
Pair<IndelVariant,Integer> p = findConsensus(variants); Pair<IndelVariant,Integer> p = findConsensus(variants);
if ( isCall(p,cov) ) { if ( isCall(p,cov) ) {
if ( debug ) System.out.println(" is a CALL (printed)");
String message = makeBedLine(p,cov,pos,output); String message = makeBedLine(p,cov,pos,output);
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotation)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotation));
if ( verbose ) out.println(message + "\t"+ annotationString); if ( verbose ) out.println(message + "\t"+ annotationString);
} else {
if ( debug ) System.out.println(" NOT a call: count="+p.first.count+" total_count="+p.second+" cov="+cov+
" minConsensusF="+((double)p.first.count)/p.second+" minF="+((double)p.first.count)/cov);
} }
coverage.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again coverage.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again
// (we might otherwise in the case when 1) there will be another indel that follows // (we might otherwise in the case when 1) there will be another indel that follows
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
@ -420,7 +429,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); // System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
// } // }
} }
if ( debug ) System.out.println(" --> Actual shift to " + move_to+" ("+position+")"); if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to+" ("+position+")");
coverage.shift((int)(move_to - coverage.getStart() ) ); coverage.shift((int)(move_to - coverage.getStart() ) );
} }
@ -445,8 +454,20 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
int tumor_cov = coverage.coverageAt(pos); int tumor_cov = coverage.coverageAt(pos);
int normal_cov = normal_coverage.coverageAt(pos); int normal_cov = normal_coverage.coverageAt(pos);
if ( tumor_cov < minCoverage ) continue; // low coverage if ( tumor_cov < minCoverage ) {
if ( normal_cov < minNormalCoverage ) continue; // low coverage if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumor_cov+" (SKIPPED)");
}
continue; // low coverage
}
if ( normal_cov < minNormalCoverage ) {
if ( DEBUG ) {
System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normal_cov+" (SKIPPED)");
}
continue; // low coverage
}
if ( DEBUG ) System.out.println("DEBUG>> Indel in tumor at "+pos);
long left = Math.max( pos-MISMATCH_WIDTH, coverage.getStart() ); long left = Math.max( pos-MISMATCH_WIDTH, coverage.getStart() );
long right = pos+MISMATCH_WIDTH; long right = pos+MISMATCH_WIDTH;
@ -457,6 +478,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
// we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it; // we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
// instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage // instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
move_to = left; move_to = left;
if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ left);
break; break;
} }
@ -528,6 +550,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
// } // }
} }
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to+" ("+position+")");
coverage.shift((int)(move_to - coverage.getStart() ) ); coverage.shift((int)(move_to - coverage.getStart() ) );
normal_coverage.shift((int)(move_to - normal_coverage.getStart() ) ); normal_coverage.shift((int)(move_to - normal_coverage.getStart() ) );
} }