Adding support of X and = CIGAR operators to the GATK
This commit is contained in:
parent
2a113977a9
commit
eca9613356
|
|
@ -568,6 +568,8 @@ public class GenotypingEngine {
|
|||
refPos += elementLength;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
int numSinceMismatch = -1;
|
||||
int stopOfMismatch = -1;
|
||||
int startOfMismatch = -1;
|
||||
|
|
|
|||
|
|
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
|
|||
|
|
@ -1025,7 +1025,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
elements.add(ce);
|
||||
break;
|
||||
case M:
|
||||
altIdx += elementLength;
|
||||
case EQ:
|
||||
case X:
|
||||
altIdx += elementLength;
|
||||
case N:
|
||||
if ( reference.length < refIdx + elementLength )
|
||||
ok_flag = false;
|
||||
|
|
@ -1287,6 +1289,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
int elementLength = ce.getLength();
|
||||
switch ( ce.getOperator() ) {
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
|
||||
if ( refIdx >= reference.length )
|
||||
break;
|
||||
|
|
@ -1432,6 +1436,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
fromIndex += elementLength;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
case I:
|
||||
System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength);
|
||||
System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength);
|
||||
|
|
|
|||
|
|
@ -2057,7 +2057,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
break; // do not count gaps or clipped bases
|
||||
case I:
|
||||
case M:
|
||||
readLength += cel.getLength();
|
||||
case EQ:
|
||||
case X:
|
||||
readLength += cel.getLength();
|
||||
break; // advance along the gapless block in the alignment
|
||||
default :
|
||||
throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
|
||||
|
|
@ -2094,7 +2096,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
|
||||
break;
|
||||
case M:
|
||||
for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
|
||||
case EQ:
|
||||
case X:
|
||||
for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
|
||||
if ( readBases[posOnRead] != ref[posOnRef] ) { // mismatch!
|
||||
mms++;
|
||||
mismatch_flags[posOnRef] = 1;
|
||||
|
|
|
|||
|
|
@ -551,7 +551,7 @@ public class BAQ {
|
|||
switch (elt.getOperator()) {
|
||||
case N: return null; // cannot handle these
|
||||
case H : case P : case D: break; // ignore pads, hard clips, and deletions
|
||||
case I : case S: case M:
|
||||
case I : case S: case M: case EQ: case X:
|
||||
int prev = readI;
|
||||
readI += elt.getLength();
|
||||
if ( includeClippedBases || elt.getOperator() != CigarOperator.S) {
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.recalibration.EventType;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
|
||||
public class AlignmentUtils {
|
||||
|
|
@ -71,9 +70,18 @@ public class AlignmentUtils {
|
|||
if (readIdx > endOnRead) break;
|
||||
|
||||
CigarElement ce = c.getCigarElement(i);
|
||||
final int elementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case X:
|
||||
mc.numMismatches += elementLength;
|
||||
for (int j = 0; j < elementLength; j++)
|
||||
mc.mismatchQualities += r.getBaseQualities()[readIdx+j];
|
||||
case EQ:
|
||||
refIndex += elementLength;
|
||||
readIdx += elementLength;
|
||||
break;
|
||||
case M:
|
||||
for (int j = 0; j < ce.getLength(); j++, refIndex++, readIdx++) {
|
||||
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
|
||||
if (refIndex >= refSeq.length)
|
||||
continue;
|
||||
if (readIdx < startOnRead) continue;
|
||||
|
|
@ -92,11 +100,11 @@ public class AlignmentUtils {
|
|||
break;
|
||||
case I:
|
||||
case S:
|
||||
readIdx += ce.getLength();
|
||||
readIdx += elementLength;
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
refIndex += ce.getLength();
|
||||
refIndex += elementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
|
|
@ -164,6 +172,8 @@ public class AlignmentUtils {
|
|||
CigarElement ce = c.getCigarElement(i);
|
||||
int cigarElementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case EQ:
|
||||
case X:
|
||||
case M:
|
||||
for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
|
||||
// are we past the ref window?
|
||||
|
|
@ -204,111 +214,6 @@ public class AlignmentUtils {
|
|||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of mismatches in the pileup element within the given reference context.
|
||||
*
|
||||
* @param read the SAMRecord
|
||||
* @param ref the reference context
|
||||
* @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good
|
||||
* @param windowSize window size (on each side) to test
|
||||
* @return a bitset representing which bases are good
|
||||
*/
|
||||
public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) {
|
||||
// first determine the positions with mismatches
|
||||
int readLength = read.getReadLength();
|
||||
BitSet mismatches = new BitSet(readLength);
|
||||
|
||||
// it's possible we aren't starting at the beginning of a read,
|
||||
// and we don't need to look at any of the previous context outside our window
|
||||
// (although we do need future context)
|
||||
int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize);
|
||||
int currentReadPos = read.getAlignmentStart();
|
||||
|
||||
byte[] refBases = ref.getBases();
|
||||
int refIndex = readStartPos - ref.getWindow().getStart();
|
||||
if (refIndex < 0) {
|
||||
throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus());
|
||||
}
|
||||
|
||||
byte[] readBases = read.getReadBases();
|
||||
int readIndex = 0;
|
||||
|
||||
Cigar c = read.getCigar();
|
||||
|
||||
for (int i = 0; i < c.numCigarElements(); i++) {
|
||||
CigarElement ce = c.getCigarElement(i);
|
||||
int cigarElementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
for (int j = 0; j < cigarElementLength; j++, readIndex++) {
|
||||
// skip over unwanted bases
|
||||
if (currentReadPos++ < readStartPos)
|
||||
continue;
|
||||
|
||||
// this is possible if reads extend beyond the contig end
|
||||
if (refIndex >= refBases.length)
|
||||
break;
|
||||
|
||||
byte refChr = refBases[refIndex];
|
||||
byte readChr = readBases[readIndex];
|
||||
if (readChr != refChr)
|
||||
mismatches.set(readIndex);
|
||||
|
||||
refIndex++;
|
||||
}
|
||||
break;
|
||||
case I:
|
||||
case S:
|
||||
readIndex += cigarElementLength;
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
if (currentReadPos >= readStartPos)
|
||||
refIndex += cigarElementLength;
|
||||
currentReadPos += cigarElementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// all bits are set to false by default
|
||||
BitSet result = new BitSet(readLength);
|
||||
|
||||
int currentPos = 0, leftPos = 0, rightPos;
|
||||
int mismatchCount = 0;
|
||||
|
||||
// calculate how many mismatches exist in the windows to the left/right
|
||||
for (rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) {
|
||||
if (mismatches.get(rightPos))
|
||||
mismatchCount++;
|
||||
}
|
||||
if (mismatchCount <= maxMismatches)
|
||||
result.set(currentPos);
|
||||
|
||||
// now, traverse over the read positions
|
||||
while (currentPos < readLength) {
|
||||
// add a new rightmost position
|
||||
if (rightPos < readLength && mismatches.get(rightPos++))
|
||||
mismatchCount++;
|
||||
// re-penalize the previous position
|
||||
if (mismatches.get(currentPos++))
|
||||
mismatchCount++;
|
||||
// don't penalize the current position
|
||||
if (mismatches.get(currentPos))
|
||||
mismatchCount--;
|
||||
// subtract the leftmost position
|
||||
if (leftPos < currentPos - windowSize && mismatches.get(leftPos++))
|
||||
mismatchCount--;
|
||||
|
||||
if (mismatchCount <= maxMismatches)
|
||||
result.set(currentPos);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
|
||||
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
|
||||
|
|
@ -367,45 +272,6 @@ public class AlignmentUtils {
|
|||
return n;
|
||||
}
|
||||
|
||||
public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) {
|
||||
|
||||
final byte[] alignment = new byte[read.length];
|
||||
int refPos = 0;
|
||||
int alignPos = 0;
|
||||
|
||||
for (int iii = 0; iii < cigar.numCigarElements(); iii++) {
|
||||
|
||||
final CigarElement ce = cigar.getCigarElement(iii);
|
||||
final int elementLength = ce.getLength();
|
||||
|
||||
switch (ce.getOperator()) {
|
||||
case I:
|
||||
case S:
|
||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||
alignment[alignPos++] = '+';
|
||||
}
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
refPos += elementLength;
|
||||
break;
|
||||
case M:
|
||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||
alignment[alignPos] = ref[refPos];
|
||||
alignPos++;
|
||||
refPos++;
|
||||
}
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
}
|
||||
}
|
||||
return alignment;
|
||||
}
|
||||
|
||||
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
|
||||
|
||||
int numHQSoftClips = 0;
|
||||
|
|
@ -426,6 +292,8 @@ public class AlignmentUtils {
|
|||
break;
|
||||
case M:
|
||||
case I:
|
||||
case EQ:
|
||||
case X:
|
||||
alignPos += elementLength;
|
||||
break;
|
||||
case H:
|
||||
|
|
@ -488,6 +356,8 @@ public class AlignmentUtils {
|
|||
}
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
if (pos + elementLength - 1 >= pileupOffset) {
|
||||
return alignmentPos + (pileupOffset - pos);
|
||||
} else {
|
||||
|
|
@ -519,6 +389,8 @@ public class AlignmentUtils {
|
|||
case D:
|
||||
case N:
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
alignmentLength += elementLength;
|
||||
break;
|
||||
case I:
|
||||
|
|
@ -565,6 +437,8 @@ public class AlignmentUtils {
|
|||
}
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||
alignment[alignPos] = read[readPos];
|
||||
alignPos++;
|
||||
|
|
@ -798,6 +672,8 @@ public class AlignmentUtils {
|
|||
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readIndex += length;
|
||||
refIndex += length;
|
||||
totalRefBases += length;
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
|
|||
int elementSize = cigarElement.getLength();
|
||||
while(elementSize > 0) {
|
||||
switch(cigarElement.getOperator()) {
|
||||
case M: matchMismatches++; break;
|
||||
case M: case EQ: case X: matchMismatches++; break;
|
||||
case I: insertions++; break;
|
||||
case D: deletions++; break;
|
||||
default: others++; break;
|
||||
|
|
|
|||
|
|
@ -41,6 +41,46 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXandEQOperators() {
|
||||
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
r1.setReadBases(bases1);
|
||||
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r1.setCigarString("10M");
|
||||
|
||||
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
r2.setReadBases(bases2);
|
||||
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r2.setCigarString("3=1X5=1X");
|
||||
|
||||
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
r3.setReadBases(bases2);
|
||||
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r3.setCigarString("3=1X5M1X");
|
||||
|
||||
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
r4.setReadBases(bases2);
|
||||
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r4.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
Assert.assertEquals(pileup.depthOfCoverage(), 4);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsInRegularPileup() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
|
|
|
|||
Loading…
Reference in New Issue