Now calls single-sample indels too, with all the V2 level stats and bells. This officialy obsoletes IndelGenotyperWalker (V1). In addition, the alignments spanning beyond the contig end are now completely ignored (with a user warning), this applies to both single-sample and paired (somatic) calls. You just wait, Eric, I'll get you the docs with the next commit!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2728 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0fb032a436
commit
40262e2070
|
|
@ -69,11 +69,14 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
private WindowContext tumor_context;
|
private WindowContext tumor_context;
|
||||||
private WindowContext normal_context;
|
private WindowContext normal_context;
|
||||||
private int currentContigIndex = -1;
|
private int currentContigIndex = -1;
|
||||||
|
private int contigLength = -1; // we see to much messy data with reads hanging out of contig ends...
|
||||||
private int currentPosition = -1; // position of the last read we've seen on the current contig
|
private int currentPosition = -1; // position of the last read we've seen on the current contig
|
||||||
private String refName = null;
|
private String refName = null;
|
||||||
private java.io.Writer output = null;
|
private java.io.Writer output = null;
|
||||||
private GenomeLoc location = null;
|
private GenomeLoc location = null;
|
||||||
|
|
||||||
|
boolean outOfContigUserWarned = false;
|
||||||
|
|
||||||
private SeekableRODIterator<rodRefSeq> refseqIterator=null;
|
private SeekableRODIterator<rodRefSeq> refseqIterator=null;
|
||||||
|
|
||||||
private Set<String> normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able
|
private Set<String> normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able
|
||||||
|
|
@ -172,6 +175,8 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
refName = new String(read.getReferenceName());
|
refName = new String(read.getReferenceName());
|
||||||
|
|
||||||
location = GenomeLocParser.setContig(location,refName);
|
location = GenomeLocParser.setContig(location,refName);
|
||||||
|
contigLength = GenomeLocParser.getContigInfo(refName).getSequenceLength();
|
||||||
|
outOfContigUserWarned = false;
|
||||||
|
|
||||||
normal_context.clear(); // reset coverage window; this will also set reference position to 0
|
normal_context.clear(); // reset coverage window; this will also set reference position to 0
|
||||||
if ( call_somatic) tumor_context.clear();
|
if ( call_somatic) tumor_context.clear();
|
||||||
|
|
@ -192,6 +197,14 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
currentPosition = read.getAlignmentStart();
|
currentPosition = read.getAlignmentStart();
|
||||||
|
|
||||||
|
if ( read.getAlignmentEnd() > contigLength ) {
|
||||||
|
if ( ! outOfContigUserWarned ) {
|
||||||
|
System.out.println("WARNING: Reads aligned past contig length on "+ location.getContig()+"; all such reads will be skipped");
|
||||||
|
outOfContigUserWarned = true;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if ( read.getAlignmentStart() < normal_context.getStart() ) {
|
if ( read.getAlignmentStart() < normal_context.getStart() ) {
|
||||||
// should never happen
|
// should never happen
|
||||||
throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
throw new StingException("Read "+read.getReadName()+": out of order on the contig\n"+
|
||||||
|
|
@ -264,10 +277,94 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
*
|
*
|
||||||
* @param position
|
* @param position
|
||||||
*/
|
*/
|
||||||
private void emit(long position, boolean force) {
|
private void emit(long position, boolean force) {
|
||||||
|
|
||||||
}
|
position = adjustPosition(position);
|
||||||
|
long move_to = position;
|
||||||
|
|
||||||
|
for ( long pos = normal_context.getStart() ; pos < Math.min(position,normal_context.getStop()+1) ; pos++ ) {
|
||||||
|
|
||||||
|
if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels
|
||||||
|
|
||||||
|
IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH);
|
||||||
|
|
||||||
|
if ( normalCall.getCoverage() < minCoverage ) {
|
||||||
|
if ( DEBUG ) {
|
||||||
|
System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)");
|
||||||
|
}
|
||||||
|
continue; // low coverage
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( DEBUG ) System.out.println("DEBUG>> Indel at "+pos);
|
||||||
|
|
||||||
|
long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() );
|
||||||
|
long right = pos+normalCall.getVariant().lengthOnRef()+NQS_WIDTH-1;
|
||||||
|
|
||||||
|
if ( right >= position && ! force) {
|
||||||
|
// we are not asked to force-shift, and there is more coverage around the current indel that we still need to collect
|
||||||
|
|
||||||
|
// we are not asked to force-shift, and there's still additional coverage to the right of current indel, so its too early to emit it;
|
||||||
|
// instead we shift only up to current indel pos - MISMATCH_WIDTH, so that we could keep collecting that coverage
|
||||||
|
move_to = adjustPosition(left);
|
||||||
|
if ( DEBUG ) System.out.println("DEBUG>> waiting for coverage; actual shift performed to "+ move_to);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right:
|
||||||
|
if ( right > normal_context.getStop() ) right = normal_context.getStop();
|
||||||
|
|
||||||
|
location = GenomeLocParser.setStart(location,pos);
|
||||||
|
location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
|
||||||
|
RODRecordList<rodRefSeq> annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location));
|
||||||
|
|
||||||
|
if ( normalCall.failsNQSMismatch() ) {
|
||||||
|
String fullRecord = makeFullRecord(normalCall);
|
||||||
|
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||||
|
out.println(fullRecord+
|
||||||
|
"SAMPLE_TOO_DIRTY\t"+annotationString);
|
||||||
|
normal_context.indelsAt(pos).clear();
|
||||||
|
// we dealt with this indel; don't want to see it again
|
||||||
|
// (we might otherwise in the case when 1) there is another indel that follows
|
||||||
|
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||||
|
continue; // too dirty
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( normalCall.isCall() ) {
|
||||||
|
String message = normalCall.makeBedLine(output);
|
||||||
|
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||||
|
|
||||||
|
StringBuilder fullRecord = new StringBuilder();
|
||||||
|
fullRecord.append(makeFullRecord(normalCall));
|
||||||
|
|
||||||
|
if ( verbose ) {
|
||||||
|
if ( refseqIterator == null ) out.println(fullRecord + "\t");
|
||||||
|
else out.println(fullRecord + "\t"+ annotationString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
normal_context.indelsAt(pos).clear();
|
||||||
|
// we dealt with this indel; don't want to see it again
|
||||||
|
// (we might otherwise in the case when 1) there is another indel that follows
|
||||||
|
// within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel)
|
||||||
|
|
||||||
|
// for ( IndelVariant var : variants ) {
|
||||||
|
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+position+")");
|
||||||
|
normal_context.shift((int)(move_to - normal_context.getStart() ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A shortcut. Returns true if we got indels within the specified interval in single and only window context
|
||||||
|
* (for single-sample calls) or in either of the two window contexts (for two-sample/somatic calls)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private boolean indelsPresentInInterval(long start, long stop) {
|
||||||
|
if ( tumor_context == null ) return normal_context.hasIndelsInInterval(start,stop);
|
||||||
|
return tumor_context.hasIndelsInInterval(start,stop) ||
|
||||||
|
normal_context.hasIndelsInInterval(start,stop);
|
||||||
|
}
|
||||||
/** Takes the position, to which window shift is requested, and tries to adjust it in such a way that no NQS window is broken.
|
/** Takes the position, to which window shift is requested, and tries to adjust it in such a way that no NQS window is broken.
|
||||||
* Namely, this method checks, iteratively, if there is an indel within NQS_WIDTH bases ahead of initially requested or adjusted
|
* Namely, this method checks, iteratively, if there is an indel within NQS_WIDTH bases ahead of initially requested or adjusted
|
||||||
* shift position. If there is such an indel,
|
* shift position. If there is such an indel,
|
||||||
|
|
@ -282,8 +379,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
long initial_request = request;
|
long initial_request = request;
|
||||||
int attempts = 0;
|
int attempts = 0;
|
||||||
boolean failure = false;
|
boolean failure = false;
|
||||||
while ( tumor_context.hasIndelsInInterval(request,request+NQS_WIDTH) ||
|
while ( indelsPresentInInterval(request,request+NQS_WIDTH) ) {
|
||||||
normal_context.hasIndelsInInterval(request,request+NQS_WIDTH) ) {
|
|
||||||
request -= NQS_WIDTH;
|
request -= NQS_WIDTH;
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
|
if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
|
||||||
attempts++;
|
attempts++;
|
||||||
|
|
@ -300,8 +396,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
// first position after the shift (this is bad for other reasons); if it breaks a nqs window, so be it
|
// first position after the shift (this is bad for other reasons); if it breaks a nqs window, so be it
|
||||||
request = initial_request;
|
request = initial_request;
|
||||||
attempts = 0;
|
attempts = 0;
|
||||||
while ( tumor_context.hasIndelsInInterval(request,request+1) ||
|
while ( indelsPresentInInterval(request,request+1) ) {
|
||||||
normal_context.hasIndelsInInterval(request,request+1) ) {
|
|
||||||
request--;
|
request--;
|
||||||
if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
|
if ( DEBUG ) System.out.println("DEBUG>> indel observations present within "+NQS_WIDTH+" bases ahead. Resetting shift to "+request);
|
||||||
attempts++;
|
attempts++;
|
||||||
|
|
@ -434,6 +529,15 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
||||||
return fullRecord.toString();
|
return fullRecord.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String makeFullRecord(IndelPrecall normalCall) {
|
||||||
|
StringBuilder fullRecord = new StringBuilder();
|
||||||
|
fullRecord.append(normalCall.makeEventString());
|
||||||
|
fullRecord.append('\t');
|
||||||
|
fullRecord.append(normalCall.makeStatsString(""));
|
||||||
|
fullRecord.append('\t');
|
||||||
|
return fullRecord.toString();
|
||||||
|
}
|
||||||
|
|
||||||
private String getAnnotationString(RODRecordList<rodRefSeq> ann) {
|
private String getAnnotationString(RODRecordList<rodRefSeq> ann) {
|
||||||
if ( ann == null ) return annGenomic;
|
if ( ann == null ) return annGenomic;
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue