UnitTest for calcNumHighQualityBases in AlignmentUtils

This commit is contained in:
Mark DePristo 2013-01-31 13:57:23 -05:00
parent 6ec1e613a2
commit c3c4e2785b
2 changed files with 76 additions and 16 deletions

View File

@ -261,16 +261,23 @@ public class AlignmentUtils {
return n;
}
/**
* Calculate the number of bases that are soft clipped in read with quality score greater than threshold
*
* @param read a non-null GATKSAMRecord
* @param qualThreshold consider bases with quals > this value as high quality. Must be >= 0
* @return positive integer
*/
@Ensures("result >= 0")
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
if ( read == null ) throw new IllegalArgumentException("Read cannot be null");
if ( qualThreshold < 0 ) throw new IllegalArgumentException("Expected qualThreshold to be a positive byte but saw " + qualThreshold);
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
int numHQSoftClips = 0;
int alignPos = 0;
final Cigar cigar = read.getCigar();
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
@ -279,21 +286,16 @@ public class AlignmentUtils {
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
}
break;
case M:
case I:
case EQ:
case X:
case M: case I: case EQ: case X:
alignPos += elementLength;
break;
case H:
case P:
case D:
case N:
case H: case P: case D: case N:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
throw new IllegalStateException("Unsupported cigar operator: " + ce.getOperator());
}
}
return numHQSoftClips;
}

View File

@ -25,13 +25,16 @@
package org.broadinstitute.sting.utils.sam;
import junit.framework.Assert;
import net.sf.samtools.*;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class AlignmentUtilsUnitTest {
@ -161,4 +164,59 @@ public class AlignmentUtilsUnitTest {
Assert.assertEquals(consolidatedCigar.toString(), AlignmentUtils.consolidateCigar(unconsolidatedCigar).toString());
}
}
@DataProvider(name = "SoftClipsDataProvider")
public Object[][] makeSoftClipsDataProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
// this functionality can be adapted to provide input data for whatever you might want in your data
for ( final int lengthOfLeftClip : Arrays.asList(0, 1, 10) ) {
for ( final int lengthOfRightClip : Arrays.asList(0, 1, 10) ) {
for ( final int qualThres : Arrays.asList(10, 20, 30) ) {
for ( final String middleOp : Arrays.asList("M", "D") ) {
for ( final int matchSize : Arrays.asList(0, 1, 10) ) {
final byte[] left = makeQualArray(lengthOfLeftClip, qualThres);
final byte[] right = makeQualArray(lengthOfRightClip, qualThres);
int n = 0;
for ( int i = 0; i < left.length; i++ ) n += left[i] > qualThres ? 1 : 0;
for ( int i = 0; i < right.length; i++ ) n += right[i] > qualThres ? 1 : 0;
tests.add(new Object[]{left, matchSize, middleOp, right, qualThres, n});
}
}
}
}
}
return tests.toArray(new Object[][]{});
}
private byte[] makeQualArray(final int length, final int qualThreshold) {
final byte[] array = new byte[length];
for ( int i = 0; i < array.length; i++ )
array[i] = (byte)(qualThreshold + ( i % 2 == 0 ? 1 : - 1 ));
return array;
}
@Test(dataProvider = "SoftClipsDataProvider")
public void testSoftClipsData(final byte[] qualsOfSoftClipsOnLeft, final int middleSize, final String middleOp, final byte[] qualOfSoftClipsOnRight, final int qualThreshold, final int numExpected) {
final int readLength = (middleOp.equals("D") ? 0 : middleSize) + qualOfSoftClipsOnRight.length + qualsOfSoftClipsOnLeft.length;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
final byte[] matchBytes = middleOp.equals("D") ? new byte[]{} : Utils.dupBytes((byte)30, middleSize);
final byte[] quals = ArrayUtils.addAll(ArrayUtils.addAll(qualsOfSoftClipsOnLeft, matchBytes), qualOfSoftClipsOnRight);
// set the read's bases and quals
read.setReadBases(bases);
read.setBaseQualities(quals);
final StringBuilder cigar = new StringBuilder();
if (qualsOfSoftClipsOnLeft.length > 0 ) cigar.append(qualsOfSoftClipsOnLeft.length + "S");
if (middleSize > 0 ) cigar.append(middleSize + middleOp);
if (qualOfSoftClipsOnRight.length > 0 ) cigar.append(qualOfSoftClipsOnRight.length + "S");
read.setCigarString(cigar.toString());
final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold);
Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString());
}
}