AlignmentUtils.calcNumDifferentBases computes the number of bases that differ between a reference and read sequence given a cigar between the two.

This commit is contained in:
Mark DePristo 2013-03-06 13:52:53 -05:00
parent a783f19ab1
commit 752440707d
2 changed files with 68 additions and 1 deletions

View File

@ -48,6 +48,45 @@ public final class AlignmentUtils {
// cannot be instantiated
private AlignmentUtils() { }
/**
* Get the number of bases at which refSeq and readSeq differ, given their alignment
*
* @param cigar the alignment of readSeq to refSeq
* @param refSeq the bases of the reference sequence
* @param readSeq the bases of the read sequence
* @return the number of bases that differ between refSeq and readSeq
*/
public static int calcNumDifferentBases(final Cigar cigar, final byte[] refSeq, final byte[] readSeq) {
int refIndex = 0, readIdx = 0, delta = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
final int elementLength = ce.getLength();
switch (ce.getOperator()) {
case X:case EQ:case M:
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++)
delta += refSeq[refIndex] != readSeq[readIdx] ? 1 : 0;
break;
case I:
delta += elementLength;
case S:
readIdx += elementLength;
break;
case D:
delta += elementLength;
case N:
refIndex += elementLength;
break;
case H:
case P:
break;
default:
throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
return delta;
}
public static class MismatchCount {
public int numMismatches = 0;
public long mismatchQualities = 0;

View File

@ -37,7 +37,7 @@ import org.testng.annotations.Test;
import java.util.*;
public class AlignmentUtilsUnitTest {
private final static boolean DEBUG = false;
private final static boolean DEBUG = true;
private SAMFileHeader header;
/** Basic aligned and mapped read. */
@ -145,6 +145,34 @@ public class AlignmentUtilsUnitTest {
}
@DataProvider(name = "CalcNumDifferentBasesData")
public Object[][] makeCalcNumDifferentBasesData() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{"5M", "ACGTA", "ACGTA", 0});
tests.add(new Object[]{"5M", "ACGTA", "ACGTT", 1});
tests.add(new Object[]{"5M", "ACGTA", "TCGTT", 2});
tests.add(new Object[]{"5M", "ACGTA", "TTGTT", 3});
tests.add(new Object[]{"5M", "ACGTA", "TTTTT", 4});
tests.add(new Object[]{"5M", "ACGTA", "TTTCT", 5});
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTA", 3});
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTT", 4});
tests.add(new Object[]{"2M3I3M", "ACGTA", "TCNNNGTT", 5});
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACA", 2});
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACT", 3});
tests.add(new Object[]{"2M2D1M", "ACGTA", "TCT", 4});
tests.add(new Object[]{"2M2D1M", "ACGTA", "TGT", 5});
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "CalcNumDifferentBasesData")
public void testCalcNumDifferentBases(final String cigarString, final String ref, final String read, final int expectedDifferences) {
final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences);
}
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
public Object[][] makeNumAlignedBasesCountingSoftClips() {
List<Object[]> tests = new ArrayList<Object[]>();