AlignmentUtils.calcNumDifferentBases computes the number of bases that differ between a reference and read sequence given a cigar between the two.
This commit is contained in:
parent
a783f19ab1
commit
752440707d
|
|
@ -48,6 +48,45 @@ public final class AlignmentUtils {
|
||||||
// cannot be instantiated
|
// cannot be instantiated
|
||||||
private AlignmentUtils() { }
|
private AlignmentUtils() { }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of bases at which refSeq and readSeq differ, given their alignment
|
||||||
|
*
|
||||||
|
* @param cigar the alignment of readSeq to refSeq
|
||||||
|
* @param refSeq the bases of the reference sequence
|
||||||
|
* @param readSeq the bases of the read sequence
|
||||||
|
* @return the number of bases that differ between refSeq and readSeq
|
||||||
|
*/
|
||||||
|
public static int calcNumDifferentBases(final Cigar cigar, final byte[] refSeq, final byte[] readSeq) {
|
||||||
|
int refIndex = 0, readIdx = 0, delta = 0;
|
||||||
|
|
||||||
|
for (final CigarElement ce : cigar.getCigarElements()) {
|
||||||
|
final int elementLength = ce.getLength();
|
||||||
|
switch (ce.getOperator()) {
|
||||||
|
case X:case EQ:case M:
|
||||||
|
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++)
|
||||||
|
delta += refSeq[refIndex] != readSeq[readIdx] ? 1 : 0;
|
||||||
|
break;
|
||||||
|
case I:
|
||||||
|
delta += elementLength;
|
||||||
|
case S:
|
||||||
|
readIdx += elementLength;
|
||||||
|
break;
|
||||||
|
case D:
|
||||||
|
delta += elementLength;
|
||||||
|
case N:
|
||||||
|
refIndex += elementLength;
|
||||||
|
break;
|
||||||
|
case H:
|
||||||
|
case P:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
|
|
||||||
public static class MismatchCount {
|
public static class MismatchCount {
|
||||||
public int numMismatches = 0;
|
public int numMismatches = 0;
|
||||||
public long mismatchQualities = 0;
|
public long mismatchQualities = 0;
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ import org.testng.annotations.Test;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class AlignmentUtilsUnitTest {
|
public class AlignmentUtilsUnitTest {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = true;
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
||||||
/** Basic aligned and mapped read. */
|
/** Basic aligned and mapped read. */
|
||||||
|
|
@ -145,6 +145,34 @@ public class AlignmentUtilsUnitTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@DataProvider(name = "CalcNumDifferentBasesData")
|
||||||
|
public Object[][] makeCalcNumDifferentBasesData() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "ACGTA", 0});
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "ACGTT", 1});
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "TCGTT", 2});
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "TTGTT", 3});
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "TTTTT", 4});
|
||||||
|
tests.add(new Object[]{"5M", "ACGTA", "TTTCT", 5});
|
||||||
|
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTA", 3});
|
||||||
|
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTT", 4});
|
||||||
|
tests.add(new Object[]{"2M3I3M", "ACGTA", "TCNNNGTT", 5});
|
||||||
|
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACA", 2});
|
||||||
|
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACT", 3});
|
||||||
|
tests.add(new Object[]{"2M2D1M", "ACGTA", "TCT", 4});
|
||||||
|
tests.add(new Object[]{"2M2D1M", "ACGTA", "TGT", 5});
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "CalcNumDifferentBasesData")
|
||||||
|
public void testCalcNumDifferentBases(final String cigarString, final String ref, final String read, final int expectedDifferences) {
|
||||||
|
final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
|
||||||
|
Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
|
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
|
||||||
public Object[][] makeNumAlignedBasesCountingSoftClips() {
|
public Object[][] makeNumAlignedBasesCountingSoftClips() {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue