diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index d59d0ef63..58f70d4b6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -48,6 +48,45 @@ public final class AlignmentUtils { // cannot be instantiated private AlignmentUtils() { } + /** + * Get the number of bases at which refSeq and readSeq differ, given their alignment + * + * @param cigar the alignment of readSeq to refSeq + * @param refSeq the bases of the reference sequence + * @param readSeq the bases of the read sequence + * @return the number of bases that differ between refSeq and readSeq + */ + public static int calcNumDifferentBases(final Cigar cigar, final byte[] refSeq, final byte[] readSeq) { + int refIndex = 0, readIdx = 0, delta = 0; + + for (final CigarElement ce : cigar.getCigarElements()) { + final int elementLength = ce.getLength(); + switch (ce.getOperator()) { + case X:case EQ:case M: + for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) + delta += refSeq[refIndex] != readSeq[readIdx] ? 1 : 0; + break; + case I: + delta += elementLength; + case S: + readIdx += elementLength; + break; + case D: + delta += elementLength; + case N: + refIndex += elementLength; + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported"); + } + } + + return delta; + } + public static class MismatchCount { public int numMismatches = 0; public long mismatchQualities = 0; diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index ae01c6c63..660dadc00 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -37,7 +37,7 @@ import org.testng.annotations.Test; import java.util.*; public class AlignmentUtilsUnitTest { - private final static boolean DEBUG = false; + private final static boolean DEBUG = true; private SAMFileHeader header; /** Basic aligned and mapped read. */ @@ -145,6 +145,34 @@ public class AlignmentUtilsUnitTest { } + @DataProvider(name = "CalcNumDifferentBasesData") + public Object[][] makeCalcNumDifferentBasesData() { + List tests = new ArrayList(); + + tests.add(new Object[]{"5M", "ACGTA", "ACGTA", 0}); + tests.add(new Object[]{"5M", "ACGTA", "ACGTT", 1}); + tests.add(new Object[]{"5M", "ACGTA", "TCGTT", 2}); + tests.add(new Object[]{"5M", "ACGTA", "TTGTT", 3}); + tests.add(new Object[]{"5M", "ACGTA", "TTTTT", 4}); + tests.add(new Object[]{"5M", "ACGTA", "TTTCT", 5}); + tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTA", 3}); + tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTT", 4}); + tests.add(new Object[]{"2M3I3M", "ACGTA", "TCNNNGTT", 5}); + tests.add(new Object[]{"2M2D1M", "ACGTA", "ACA", 2}); + tests.add(new Object[]{"2M2D1M", "ACGTA", "ACT", 3}); + tests.add(new Object[]{"2M2D1M", "ACGTA", "TCT", 4}); + tests.add(new Object[]{"2M2D1M", "ACGTA", "TGT", 5}); + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "CalcNumDifferentBasesData") + public void testCalcNumDifferentBases(final String cigarString, final String ref, final String read, final int expectedDifferences) { + final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString); + Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences); + } + + @DataProvider(name = "NumAlignedBasesCountingSoftClips") public Object[][] makeNumAlignedBasesCountingSoftClips() { List tests = new ArrayList();