"update integration tests in CombineVariantsIntegrationTest"
Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
6fefdaf428
|
|
@ -568,6 +568,8 @@ public class GenotypingEngine {
|
||||||
refPos += elementLength;
|
refPos += elementLength;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
int numSinceMismatch = -1;
|
int numSinceMismatch = -1;
|
||||||
int stopOfMismatch = -1;
|
int stopOfMismatch = -1;
|
||||||
int startOfMismatch = -1;
|
int startOfMismatch = -1;
|
||||||
|
|
|
||||||
|
|
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
readOffset++;
|
readOffset++;
|
||||||
genomeOffset++;
|
genomeOffset++;
|
||||||
done = true;
|
done = true;
|
||||||
|
|
|
||||||
|
|
@ -1025,7 +1025,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
elements.add(ce);
|
elements.add(ce);
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
altIdx += elementLength;
|
case EQ:
|
||||||
|
case X:
|
||||||
|
altIdx += elementLength;
|
||||||
case N:
|
case N:
|
||||||
if ( reference.length < refIdx + elementLength )
|
if ( reference.length < refIdx + elementLength )
|
||||||
ok_flag = false;
|
ok_flag = false;
|
||||||
|
|
@ -1287,6 +1289,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
int elementLength = ce.getLength();
|
int elementLength = ce.getLength();
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
|
for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
|
||||||
if ( refIdx >= reference.length )
|
if ( refIdx >= reference.length )
|
||||||
break;
|
break;
|
||||||
|
|
@ -1432,6 +1436,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
fromIndex += elementLength;
|
fromIndex += elementLength;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
case I:
|
case I:
|
||||||
System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength);
|
System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength);
|
||||||
System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength);
|
System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength);
|
||||||
|
|
|
||||||
|
|
@ -2057,7 +2057,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
break; // do not count gaps or clipped bases
|
break; // do not count gaps or clipped bases
|
||||||
case I:
|
case I:
|
||||||
case M:
|
case M:
|
||||||
readLength += cel.getLength();
|
case EQ:
|
||||||
|
case X:
|
||||||
|
readLength += cel.getLength();
|
||||||
break; // advance along the gapless block in the alignment
|
break; // advance along the gapless block in the alignment
|
||||||
default :
|
default :
|
||||||
throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
|
throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator());
|
||||||
|
|
@ -2094,7 +2096,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
|
case EQ:
|
||||||
|
case X:
|
||||||
|
for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) {
|
||||||
if ( readBases[posOnRead] != ref[posOnRef] ) { // mismatch!
|
if ( readBases[posOnRead] != ref[posOnRef] ) { // mismatch!
|
||||||
mms++;
|
mms++;
|
||||||
mismatch_flags[posOnRef] = 1;
|
mismatch_flags[posOnRef] = 1;
|
||||||
|
|
|
||||||
|
|
@ -551,7 +551,7 @@ public class BAQ {
|
||||||
switch (elt.getOperator()) {
|
switch (elt.getOperator()) {
|
||||||
case N: return null; // cannot handle these
|
case N: return null; // cannot handle these
|
||||||
case H : case P : case D: break; // ignore pads, hard clips, and deletions
|
case H : case P : case D: break; // ignore pads, hard clips, and deletions
|
||||||
case I : case S: case M:
|
case I : case S: case M: case EQ: case X:
|
||||||
int prev = readI;
|
int prev = readI;
|
||||||
readI += elt.getLength();
|
readI += elt.getLength();
|
||||||
if ( includeClippedBases || elt.getOperator() != CigarOperator.S) {
|
if ( includeClippedBases || elt.getOperator() != CigarOperator.S) {
|
||||||
|
|
|
||||||
|
|
@ -155,7 +155,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
|
|
||||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP, PE> pileup) {
|
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP, PE> pileup) {
|
||||||
size += pileup.getNumberOfElements();
|
size += pileup.getNumberOfElements();
|
||||||
abstractSize += pileup.depthOfCoverage();
|
abstractSize = pileup.depthOfCoverage() + (abstractSize == -1 ? 0 : abstractSize);
|
||||||
nDeletions += pileup.getNumberOfDeletions();
|
nDeletions += pileup.getNumberOfDeletions();
|
||||||
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
|
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.BitSet;
|
|
||||||
|
|
||||||
|
|
||||||
public class AlignmentUtils {
|
public class AlignmentUtils {
|
||||||
|
|
@ -71,9 +70,18 @@ public class AlignmentUtils {
|
||||||
if (readIdx > endOnRead) break;
|
if (readIdx > endOnRead) break;
|
||||||
|
|
||||||
CigarElement ce = c.getCigarElement(i);
|
CigarElement ce = c.getCigarElement(i);
|
||||||
|
final int elementLength = ce.getLength();
|
||||||
switch (ce.getOperator()) {
|
switch (ce.getOperator()) {
|
||||||
|
case X:
|
||||||
|
mc.numMismatches += elementLength;
|
||||||
|
for (int j = 0; j < elementLength; j++)
|
||||||
|
mc.mismatchQualities += r.getBaseQualities()[readIdx+j];
|
||||||
|
case EQ:
|
||||||
|
refIndex += elementLength;
|
||||||
|
readIdx += elementLength;
|
||||||
|
break;
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0; j < ce.getLength(); j++, refIndex++, readIdx++) {
|
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
|
||||||
if (refIndex >= refSeq.length)
|
if (refIndex >= refSeq.length)
|
||||||
continue;
|
continue;
|
||||||
if (readIdx < startOnRead) continue;
|
if (readIdx < startOnRead) continue;
|
||||||
|
|
@ -92,11 +100,11 @@ public class AlignmentUtils {
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
case S:
|
case S:
|
||||||
readIdx += ce.getLength();
|
readIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
case N:
|
case N:
|
||||||
refIndex += ce.getLength();
|
refIndex += elementLength;
|
||||||
break;
|
break;
|
||||||
case H:
|
case H:
|
||||||
case P:
|
case P:
|
||||||
|
|
@ -164,6 +172,8 @@ public class AlignmentUtils {
|
||||||
CigarElement ce = c.getCigarElement(i);
|
CigarElement ce = c.getCigarElement(i);
|
||||||
int cigarElementLength = ce.getLength();
|
int cigarElementLength = ce.getLength();
|
||||||
switch (ce.getOperator()) {
|
switch (ce.getOperator()) {
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
|
for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
|
||||||
// are we past the ref window?
|
// are we past the ref window?
|
||||||
|
|
@ -204,111 +214,6 @@ public class AlignmentUtils {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of mismatches in the pileup element within the given reference context.
|
|
||||||
*
|
|
||||||
* @param read the SAMRecord
|
|
||||||
* @param ref the reference context
|
|
||||||
* @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good
|
|
||||||
* @param windowSize window size (on each side) to test
|
|
||||||
* @return a bitset representing which bases are good
|
|
||||||
*/
|
|
||||||
public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) {
|
|
||||||
// first determine the positions with mismatches
|
|
||||||
int readLength = read.getReadLength();
|
|
||||||
BitSet mismatches = new BitSet(readLength);
|
|
||||||
|
|
||||||
// it's possible we aren't starting at the beginning of a read,
|
|
||||||
// and we don't need to look at any of the previous context outside our window
|
|
||||||
// (although we do need future context)
|
|
||||||
int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize);
|
|
||||||
int currentReadPos = read.getAlignmentStart();
|
|
||||||
|
|
||||||
byte[] refBases = ref.getBases();
|
|
||||||
int refIndex = readStartPos - ref.getWindow().getStart();
|
|
||||||
if (refIndex < 0) {
|
|
||||||
throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus());
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] readBases = read.getReadBases();
|
|
||||||
int readIndex = 0;
|
|
||||||
|
|
||||||
Cigar c = read.getCigar();
|
|
||||||
|
|
||||||
for (int i = 0; i < c.numCigarElements(); i++) {
|
|
||||||
CigarElement ce = c.getCigarElement(i);
|
|
||||||
int cigarElementLength = ce.getLength();
|
|
||||||
switch (ce.getOperator()) {
|
|
||||||
case M:
|
|
||||||
for (int j = 0; j < cigarElementLength; j++, readIndex++) {
|
|
||||||
// skip over unwanted bases
|
|
||||||
if (currentReadPos++ < readStartPos)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// this is possible if reads extend beyond the contig end
|
|
||||||
if (refIndex >= refBases.length)
|
|
||||||
break;
|
|
||||||
|
|
||||||
byte refChr = refBases[refIndex];
|
|
||||||
byte readChr = readBases[readIndex];
|
|
||||||
if (readChr != refChr)
|
|
||||||
mismatches.set(readIndex);
|
|
||||||
|
|
||||||
refIndex++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case I:
|
|
||||||
case S:
|
|
||||||
readIndex += cigarElementLength;
|
|
||||||
break;
|
|
||||||
case D:
|
|
||||||
case N:
|
|
||||||
if (currentReadPos >= readStartPos)
|
|
||||||
refIndex += cigarElementLength;
|
|
||||||
currentReadPos += cigarElementLength;
|
|
||||||
break;
|
|
||||||
case H:
|
|
||||||
case P:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// all bits are set to false by default
|
|
||||||
BitSet result = new BitSet(readLength);
|
|
||||||
|
|
||||||
int currentPos = 0, leftPos = 0, rightPos;
|
|
||||||
int mismatchCount = 0;
|
|
||||||
|
|
||||||
// calculate how many mismatches exist in the windows to the left/right
|
|
||||||
for (rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) {
|
|
||||||
if (mismatches.get(rightPos))
|
|
||||||
mismatchCount++;
|
|
||||||
}
|
|
||||||
if (mismatchCount <= maxMismatches)
|
|
||||||
result.set(currentPos);
|
|
||||||
|
|
||||||
// now, traverse over the read positions
|
|
||||||
while (currentPos < readLength) {
|
|
||||||
// add a new rightmost position
|
|
||||||
if (rightPos < readLength && mismatches.get(rightPos++))
|
|
||||||
mismatchCount++;
|
|
||||||
// re-penalize the previous position
|
|
||||||
if (mismatches.get(currentPos++))
|
|
||||||
mismatchCount++;
|
|
||||||
// don't penalize the current position
|
|
||||||
if (mismatches.get(currentPos))
|
|
||||||
mismatchCount--;
|
|
||||||
// subtract the leftmost position
|
|
||||||
if (leftPos < currentPos - windowSize && mismatches.get(leftPos++))
|
|
||||||
mismatchCount--;
|
|
||||||
|
|
||||||
if (mismatchCount <= maxMismatches)
|
|
||||||
result.set(currentPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
|
* Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
|
||||||
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
|
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
|
||||||
|
|
@ -367,45 +272,6 @@ public class AlignmentUtils {
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) {
|
|
||||||
|
|
||||||
final byte[] alignment = new byte[read.length];
|
|
||||||
int refPos = 0;
|
|
||||||
int alignPos = 0;
|
|
||||||
|
|
||||||
for (int iii = 0; iii < cigar.numCigarElements(); iii++) {
|
|
||||||
|
|
||||||
final CigarElement ce = cigar.getCigarElement(iii);
|
|
||||||
final int elementLength = ce.getLength();
|
|
||||||
|
|
||||||
switch (ce.getOperator()) {
|
|
||||||
case I:
|
|
||||||
case S:
|
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
|
||||||
alignment[alignPos++] = '+';
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case D:
|
|
||||||
case N:
|
|
||||||
refPos += elementLength;
|
|
||||||
break;
|
|
||||||
case M:
|
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
|
||||||
alignment[alignPos] = ref[refPos];
|
|
||||||
alignPos++;
|
|
||||||
refPos++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case H:
|
|
||||||
case P:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return alignment;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
|
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
|
||||||
|
|
||||||
int numHQSoftClips = 0;
|
int numHQSoftClips = 0;
|
||||||
|
|
@ -426,6 +292,8 @@ public class AlignmentUtils {
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
case I:
|
case I:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
alignPos += elementLength;
|
alignPos += elementLength;
|
||||||
break;
|
break;
|
||||||
case H:
|
case H:
|
||||||
|
|
@ -488,6 +356,8 @@ public class AlignmentUtils {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
if (pos + elementLength - 1 >= pileupOffset) {
|
if (pos + elementLength - 1 >= pileupOffset) {
|
||||||
return alignmentPos + (pileupOffset - pos);
|
return alignmentPos + (pileupOffset - pos);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -519,6 +389,8 @@ public class AlignmentUtils {
|
||||||
case D:
|
case D:
|
||||||
case N:
|
case N:
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
alignmentLength += elementLength;
|
alignmentLength += elementLength;
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
|
|
@ -565,6 +437,8 @@ public class AlignmentUtils {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||||
alignment[alignPos] = read[readPos];
|
alignment[alignPos] = read[readPos];
|
||||||
alignPos++;
|
alignPos++;
|
||||||
|
|
@ -798,6 +672,8 @@ public class AlignmentUtils {
|
||||||
|
|
||||||
switch (ce.getOperator()) {
|
switch (ce.getOperator()) {
|
||||||
case M:
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case X:
|
||||||
readIndex += length;
|
readIndex += length;
|
||||||
refIndex += length;
|
refIndex += length;
|
||||||
totalRefBases += length;
|
totalRefBases += length;
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
|
||||||
int elementSize = cigarElement.getLength();
|
int elementSize = cigarElement.getLength();
|
||||||
while(elementSize > 0) {
|
while(elementSize > 0) {
|
||||||
switch(cigarElement.getOperator()) {
|
switch(cigarElement.getOperator()) {
|
||||||
case M: matchMismatches++; break;
|
case M: case EQ: case X: matchMismatches++; break;
|
||||||
case I: insertions++; break;
|
case I: insertions++; break;
|
||||||
case D: deletions++; break;
|
case D: deletions++; break;
|
||||||
default: others++; break;
|
default: others++; break;
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,46 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testXandEQOperators() {
|
||||||
|
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||||
|
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||||
|
|
||||||
|
// create a test version of the Reads object
|
||||||
|
ReadProperties readAttributes = createTestReadProperties();
|
||||||
|
|
||||||
|
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||||
|
r1.setReadBases(bases1);
|
||||||
|
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||||
|
r1.setCigarString("10M");
|
||||||
|
|
||||||
|
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||||
|
r2.setReadBases(bases2);
|
||||||
|
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||||
|
r2.setCigarString("3=1X5=1X");
|
||||||
|
|
||||||
|
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||||
|
r3.setReadBases(bases2);
|
||||||
|
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||||
|
r3.setCigarString("3=1X5M1X");
|
||||||
|
|
||||||
|
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||||
|
r4.setReadBases(bases2);
|
||||||
|
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||||
|
r4.setCigarString("10M");
|
||||||
|
|
||||||
|
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||||
|
|
||||||
|
// create the iterator by state with the fake reads and fake records
|
||||||
|
li = makeLTBS(reads,readAttributes);
|
||||||
|
|
||||||
|
while (li.hasNext()) {
|
||||||
|
AlignmentContext context = li.next();
|
||||||
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
|
Assert.assertEquals(pileup.depthOfCoverage(), 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIndelsInRegularPileup() {
|
public void testIndelsInRegularPileup() {
|
||||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue