AlignmentUtils.calcNumDifferentBases computes the number of bases that differ between a reference and read sequence given a cigar between the two.
This commit is contained in:
parent
a783f19ab1
commit
752440707d
|
|
@ -48,6 +48,45 @@ public final class AlignmentUtils {
|
|||
// cannot be instantiated
|
||||
private AlignmentUtils() { }
|
||||
|
||||
/**
|
||||
* Get the number of bases at which refSeq and readSeq differ, given their alignment
|
||||
*
|
||||
* @param cigar the alignment of readSeq to refSeq
|
||||
* @param refSeq the bases of the reference sequence
|
||||
* @param readSeq the bases of the read sequence
|
||||
* @return the number of bases that differ between refSeq and readSeq
|
||||
*/
|
||||
public static int calcNumDifferentBases(final Cigar cigar, final byte[] refSeq, final byte[] readSeq) {
|
||||
int refIndex = 0, readIdx = 0, delta = 0;
|
||||
|
||||
for (final CigarElement ce : cigar.getCigarElements()) {
|
||||
final int elementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case X:case EQ:case M:
|
||||
for (int j = 0; j < elementLength; j++, refIndex++, readIdx++)
|
||||
delta += refSeq[refIndex] != readSeq[readIdx] ? 1 : 0;
|
||||
break;
|
||||
case I:
|
||||
delta += elementLength;
|
||||
case S:
|
||||
readIdx += elementLength;
|
||||
break;
|
||||
case D:
|
||||
delta += elementLength;
|
||||
case N:
|
||||
refIndex += elementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
|
||||
}
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
public static class MismatchCount {
|
||||
public int numMismatches = 0;
|
||||
public long mismatchQualities = 0;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.*;
|
||||
|
||||
public class AlignmentUtilsUnitTest {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static boolean DEBUG = true;
|
||||
private SAMFileHeader header;
|
||||
|
||||
/** Basic aligned and mapped read. */
|
||||
|
|
@ -145,6 +145,34 @@ public class AlignmentUtilsUnitTest {
|
|||
}
|
||||
|
||||
|
||||
@DataProvider(name = "CalcNumDifferentBasesData")
|
||||
public Object[][] makeCalcNumDifferentBasesData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{"5M", "ACGTA", "ACGTA", 0});
|
||||
tests.add(new Object[]{"5M", "ACGTA", "ACGTT", 1});
|
||||
tests.add(new Object[]{"5M", "ACGTA", "TCGTT", 2});
|
||||
tests.add(new Object[]{"5M", "ACGTA", "TTGTT", 3});
|
||||
tests.add(new Object[]{"5M", "ACGTA", "TTTTT", 4});
|
||||
tests.add(new Object[]{"5M", "ACGTA", "TTTCT", 5});
|
||||
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTA", 3});
|
||||
tests.add(new Object[]{"2M3I3M", "ACGTA", "ACNNNGTT", 4});
|
||||
tests.add(new Object[]{"2M3I3M", "ACGTA", "TCNNNGTT", 5});
|
||||
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACA", 2});
|
||||
tests.add(new Object[]{"2M2D1M", "ACGTA", "ACT", 3});
|
||||
tests.add(new Object[]{"2M2D1M", "ACGTA", "TCT", 4});
|
||||
tests.add(new Object[]{"2M2D1M", "ACGTA", "TGT", 5});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "CalcNumDifferentBasesData")
|
||||
public void testCalcNumDifferentBases(final String cigarString, final String ref, final String read, final int expectedDifferences) {
|
||||
final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
|
||||
Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences);
|
||||
}
|
||||
|
||||
|
||||
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
|
||||
public Object[][] makeNumAlignedBasesCountingSoftClips() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue