bug fixed when a read with alignment end exactly at the window boundary and with last cigar element being an indel would cause index-out-of-bounds exception
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@992 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a12009e9e7
commit
d5cd883b99
|
|
@ -10,6 +10,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
@ -27,6 +28,9 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
@Argument(fullName="somatic", shortName="somatic",
|
@Argument(fullName="somatic", shortName="somatic",
|
||||||
doc="Perform somatic calls; two input alignment files must be specified", required=false)
|
doc="Perform somatic calls; two input alignment files must be specified", required=false)
|
||||||
public boolean call_somatic = false;
|
public boolean call_somatic = false;
|
||||||
|
@Argument(fullName="verbose", shortName="verbose",
|
||||||
|
doc="Tell us what you are calling now (printed to stdout)", required=false)
|
||||||
|
public boolean verbose = false;
|
||||||
|
|
||||||
private static int WINDOW_SIZE = 200;
|
private static int WINDOW_SIZE = 200;
|
||||||
private RunningCoverage coverage;
|
private RunningCoverage coverage;
|
||||||
|
|
@ -141,12 +145,25 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
throw new StingException("Read "+read.getReadName()+": out of order on the contig");
|
throw new StingException("Read "+read.getReadName()+": out of order on the contig");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// a little trick here: we want to make sure that current read completely fits into the current
|
||||||
|
// window so that we can accumulate the coverage/indel counts over the whole length of the read.
|
||||||
|
// The ::getAlignmentEnd() method returns the last position on the reference where bases from the
|
||||||
|
// read actually match (M or D cigar elements). After our cleaning procedure, we can have reads that end
|
||||||
|
// with I element, which is not gonna be counted into alignment length on the reference. On the other hand,
|
||||||
|
// in this program we assign insertions, internally, to the first base *after* the insertion position.
|
||||||
|
// Hence, we have to make sure that that extra base is already in the window or we will get IndexOutOfBounds.
|
||||||
|
|
||||||
if ( read.getAlignmentEnd() > coverage.getStop()) {
|
long alignmentEnd = read.getAlignmentEnd();
|
||||||
|
Cigar c = read.getCigar();
|
||||||
|
if ( c.getCigarElement(c.numCigarElements()-1).getOperator() == CigarOperator.I) alignmentEnd++;
|
||||||
|
|
||||||
|
if ( alignmentEnd > coverage.getStop()) {
|
||||||
|
|
||||||
// we don't emit anything until we reach a read that does not fit into the current window.
|
// we don't emit anything until we reach a read that does not fit into the current window.
|
||||||
// At that point we shift the window to the start of that read and emit everything prior to
|
// At that point we shift the window to the start of that read and emit everything prior to
|
||||||
// that position (reads are sorted, so we are not gonna see any more coverage at those lower positions)
|
// that position (reads are sorted, so we are not gonna see any more coverage at those lower positions).
|
||||||
|
// Clearly, we assume here that window is large enough to accomodate any single read, so simply shifting
|
||||||
|
// the window to the read's start will ensure that the read fits...
|
||||||
|
|
||||||
if ( call_somatic ) emit_somatic( read.getAlignmentStart() );
|
if ( call_somatic ) emit_somatic( read.getAlignmentStart() );
|
||||||
else emit( read.getAlignmentStart() );
|
else emit( read.getAlignmentStart() );
|
||||||
|
|
@ -287,7 +304,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
} else {
|
} else {
|
||||||
message += "\tGERMLINE";
|
message += "\tGERMLINE";
|
||||||
}
|
}
|
||||||
logger.info(message);
|
if ( verbose ) System.out.println(message);
|
||||||
}
|
}
|
||||||
// for ( IndelVariant var : variants ) {
|
// for ( IndelVariant var : variants ) {
|
||||||
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount());
|
||||||
|
|
@ -467,7 +484,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
// will increment position on the read below, there's no 'break' statement yet...
|
// will increment position on the read below, there's no 'break' statement yet...
|
||||||
case H:
|
case H:
|
||||||
case S:
|
case S:
|
||||||
// here we also skip hard and soft-clipped bases on th eread; according to SAM format specification,
|
// here we also skip hard and soft-clipped bases on the read; according to SAM format specification,
|
||||||
// alignment start position on the reference points to where the actually aligned
|
// alignment start position on the reference points to where the actually aligned
|
||||||
// (not clipped) bases go, so we do not need to increment reference position here
|
// (not clipped) bases go, so we do not need to increment reference position here
|
||||||
posOnRead += ce.getLength();
|
posOnRead += ce.getLength();
|
||||||
|
|
@ -492,7 +509,15 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
if ( i == 0 ) logger.warn("Indel at the start of the read "+r.getReadName());
|
if ( i == 0 ) logger.warn("Indel at the start of the read "+r.getReadName());
|
||||||
if ( i == nCigarElems - 1) logger.warn("Indel at the end of the read "+r.getReadName());
|
if ( i == nCigarElems - 1) logger.warn("Indel at the end of the read "+r.getReadName());
|
||||||
|
|
||||||
|
try {
|
||||||
updateCount(indelPosition, type, bases);
|
updateCount(indelPosition, type, bases);
|
||||||
|
} catch (IndexOutOfBoundsException e) {
|
||||||
|
System.out.println("Read "+r.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+
|
||||||
|
"Read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
|
||||||
|
r.getAlignmentStart()+"; end="+r.getAlignmentEnd()+"; window start="+getStart()+
|
||||||
|
"; window end="+getStop());
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue