Refactor of ReadClipper unit tests

* expanded the systematic cigar string space test framework Roger wrote to all tests
* moved utility functions into Utils and ReadUtils
* cleaned up unused classes
This commit is contained in:
Mauricio Carneiro 2011-12-15 13:09:46 -05:00
parent 7c58d8e37d
commit e61e5c7589
8 changed files with 280 additions and 388 deletions

View File

@ -654,4 +654,18 @@ public class Utils {
// handle exception
}
}
public static byte [] arrayFromArrayWithLength(byte[] array, int length) {
byte [] output = new byte[length];
for (int j = 0; j < length; j++)
output[j] = array[(j % array.length)];
return output;
}
public static void fillArrayWithByte(byte[] array, byte value) {
for (int i=0; i<array.length; i++)
array[i] = value;
}
}

View File

@ -172,7 +172,7 @@ public class ReadClipper {
//check if the clipped read can still be clipped in the range requested
if (op.start < clippedRead.getReadLength()) {
ClippingOp fixedOperation = op;
if (op.stop > clippedRead.getReadLength())
if (op.stop >= clippedRead.getReadLength())
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
clippedRead = fixedOperation.apply(algorithm, clippedRead);

View File

@ -910,4 +910,9 @@ public class ReadUtils {
return comp.compare(read1, read2);
}
// TEST UTILITIES
}

View File

@ -1,18 +0,0 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/29/11
* Time: 4:53 PM
* To change this template use File | Settings | File Templates.
*/
public class CigarStringTestPair {
public String toTest;
public String expected;
public CigarStringTestPair(String ToTest, String Expected) {
this.toTest = ToTest;
this.expected = Expected;
}
}

View File

@ -2,8 +2,8 @@ package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
@ -21,9 +21,149 @@ public class ClipReadsTestUtils {
//Should contain all the utils needed for tests to mass produce
//reads, cigars, and other needed classes
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
final static byte [] BASES = {'A', 'C', 'T', 'G'};
final static byte [] QUALS = {2, 15, 25, 30};
final static String CIGAR = "4M";
final static CigarElement[] cigarElements = { new CigarElement(1, CigarOperator.HARD_CLIP),
new CigarElement(1, CigarOperator.SOFT_CLIP),
new CigarElement(1, CigarOperator.INSERTION),
new CigarElement(1, CigarOperator.DELETION),
new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
}
/**
* This function generates every valid permutation of cigar strings with a given length.
*
* A valid cigar object obeys the following rules:
* - No Hard/Soft clips in the middle of the read
* - No deletions in the beginning / end of the read
* - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
*
* @param maximumLength the maximum number of elements in the cigar
* @return a list with all valid Cigar objects
*/
public static List<Cigar> generateCigarList(int maximumLength) {
int numCigarElements = cigarElements.length;
LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
byte [] cigarCombination = new byte[maximumLength];
Utils.fillArrayWithByte(cigarCombination, (byte) 0); // we start off with all 0's in the combination array.
int currentIndex = 0;
while (true) {
Cigar cigar = createCigarFromCombination(cigarCombination); // create the cigar
cigar = combineAdjacentCigarElements(cigar); // combine adjacent elements
if (isCigarValid(cigar)) { // check if it's valid
cigarList.add(cigar); // add it
}
boolean currentIndexChanged = false;
while (currentIndex < maximumLength && cigarCombination[currentIndex] == numCigarElements - 1) {
currentIndex++; // find the next index to increment
currentIndexChanged = true; // keep track of the fact that we have changed indices!
}
if (currentIndex == maximumLength) // if we hit the end of the array, we're done.
break;
cigarCombination[currentIndex]++; // otherwise advance the current index
if (currentIndexChanged) { // if we have changed index, then...
for (int i = 0; i < currentIndex; i++)
cigarCombination[i] = 0; // reset everything from 0->currentIndex
currentIndex = 0; // go back to the first index
}
}
return cigarList;
}
private static boolean isCigarValid(Cigar cigar) {
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
CigarOperator startingOp = null;
CigarOperator endingOp = null;
// check if it doesn't start with deletions
boolean readHasStarted = false; // search the list of elements for the starting operator
for (CigarElement cigarElement : cigar.getCigarElements()) {
if (!readHasStarted) {
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
readHasStarted = true;
startingOp = cigarElement.getOperator();
}
}
cigarElementStack.push(cigarElement);
}
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
while (!cigarElementStack.empty()) {
CigarElement cigarElement = cigarElementStack.pop();
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
readHasStarted = true;
endingOp = cigarElement.getOperator();
break;
}
}
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
}
return false;
}
private static Cigar createCigarFromCombination(byte[] cigarCombination) {
Cigar cigar = new Cigar();
for (byte i : cigarCombination) {
cigar.add(cigarElements[i]);
}
return cigar;
}
/**
* Combines equal adjacent elements of a Cigar object
*
* @param rawCigar the cigar object
* @return a combined cigar object
*/
private static Cigar combineAdjacentCigarElements(Cigar rawCigar) {
Cigar combinedCigar = new Cigar();
CigarElement lastElement = null;
int lastElementLength = 0;
for (CigarElement cigarElement : rawCigar.getCigarElements()) {
if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
lastElementLength += cigarElement.getLength();
else
{
if (lastElement != null)
combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
lastElement = cigarElement;
lastElementLength = cigarElement.getLength();
}
}
if (lastElement != null)
combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
return combinedCigar;
}
public static GATKSAMRecord makeRead() {
return ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
}
/**
* Asserts that the two reads have the same bases, qualities and cigar strings
*
* @param actual the calculated read
* @param expected the expected read
*/
public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
// If they're both not empty, test their contents
if(!actual.isEmpty() && !expected.isEmpty()) {
@ -36,162 +176,4 @@ public class ClipReadsTestUtils {
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
}
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
// Because quals to char start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testCigar(GATKSAMRecord read, String cigar) {
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
// Because quals to chars start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
if (readBases.length > 0 && baseQuals.length > 0) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
} else
Assert.assertTrue(read.isEmpty());
}
public static byte[] subtractToArray(byte[] array, int n) {
if (array == null)
return null;
byte[] output = new byte[array.length];
for (int i = 0; i < array.length; i++)
output[i] = (byte) (array[i] - n);
return output;
}
// What the test read looks like
// Ref: 10 11 12 13 14 15 16 17
// Read: 0 1 2 3 - - - -
// --------------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
public static GATKSAMRecord makeRead() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length());
output.setReadBases(new String(BASES).getBytes());
output.setBaseQualityString(new String(QUALS));
return output;
}
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength());
output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength()));
output.setCigar(cigar);
return output;
}
private static String cycleString(String string, int length) {
String output = "";
int cycles = (length / string.length()) + 1;
for (int i = 1; i < cycles; i++)
output += string;
for (int j = 0; output.length() < length; j++)
output += string.charAt(j % string.length());
return output;
}
public static Set<Cigar> generateCigars() {
// This function generates every permutation of cigar strings we need.
LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
clippingOptionsStart.add(new Cigar());
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
clippingOptionsEnd.add(new Cigar());
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> indelOptions1 = new LinkedList<Cigar>();
indelOptions1.add(new Cigar());
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D"));
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") );
indelOptions1.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions1.add(TextCigarCodec.getSingleton().decode("1D"));
LinkedList<Cigar> indelOptions2 = new LinkedList<Cigar>();
indelOptions2.add(new Cigar());
indelOptions2.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions2.add(null);
// Start With M as base CigarElements, M,
LinkedList<Cigar> base = new LinkedList<Cigar>();
base.add(TextCigarCodec.getSingleton().decode("1M"));
base.add(TextCigarCodec.getSingleton().decode("5M"));
base.add(TextCigarCodec.getSingleton().decode("25M"));
// Should indel be added as a base?
// Nested loops W00t!
for (Cigar Base : base) {
for (Cigar indelStart : indelOptions1) {
for (Cigar indelEnd : indelOptions2) {
for (Cigar clipStart : clippingOptionsStart) {
for (Cigar clipEnd : clippingOptionsEnd) {
// Create a list of Cigar Elements and construct Cigar
List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
// add starting clipping (H/S)
CigarBuilder.addAll(clipStart.getCigarElements());
// add first base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add first indel
CigarBuilder.addAll(indelStart.getCigarElements());
// add second base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add another indel or nothing (M)
if (indelEnd != null)
CigarBuilder.addAll(indelEnd.getCigarElements());
// add final clipping (S/H)
CigarBuilder.addAll(clipEnd.getCigarElements());
output.add(new Cigar(removeConsecutiveElements(CigarBuilder)));
}
}
}
}
}
return output;
}
private static List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
LinkedList<CigarElement> output = new LinkedList<CigarElement>();
for (CigarElement E : cigarBuilder) {
if (output.isEmpty() || output.getLast().getOperator() != E.getOperator())
output.add(E);
}
return output;
}
}

View File

@ -5,8 +5,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -27,43 +25,43 @@ public class ClippingOpUnitTest extends BaseTest {
@Test
private void testHardClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
// p.cigar);
// }
}
@Test
private void testSoftClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
}
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
// }
}
}

View File

@ -28,16 +28,15 @@ package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
@ -53,157 +52,36 @@ public class ReadClipperUnitTest extends BaseTest {
// TODO: add indels to all test cases
ReadClipper readClipper;
List<Cigar> cigarList;
int maximumCigarSize = 10;
@BeforeMethod
@BeforeClass
public void init() {
readClipper = new ReadClipper(ClipReadsTestUtils.makeRead());
cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize);
}
@Test(enabled = true)
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//int debug = 1;
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader()));
//clip 1 base
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13),
ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(),
"1H2M1H");
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H"));
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)),
pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base from each side
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ),
readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(),
readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes());
}
}
*/
}
@Test(enabled = true)
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader()));
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//logger.warn(debug);
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader()));
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S"));
//Clips only first base
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
// Clip first read
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipByReadCoordinates(0,0),
readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(),
readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes());
}
}
*/
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader()));
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test(enabled = true)
@ -211,29 +89,80 @@ public class ReadClipperUnitTest extends BaseTest {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader()));
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test(enabled = true)
public void testHardClipLowQualEnds() {
logger.warn("Executing testHardClipLowQualEnds");
// Testing clipping that ends inside an insertion
final byte LOW_QUAL = 2;
final byte HIGH_QUAL = 30;
// create a read for every cigar permutation
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
byte [] quals = new byte[readLength];
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
// create a read with nLowQualBases in the left tail
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipLeft, LOW_QUAL);
// Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
// create a read with nLowQualBases in the right tail
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addRight = 0; addRight < nLowQualBases; addRight++)
quals[readLength - addRight - 1] = LOW_QUAL;
read.setBaseQualities(quals);
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipRight, LOW_QUAL);
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
// create a read with nLowQualBases in the both tails
if (nLowQualBases <= readLength/2) {
Utils.fillArrayWithByte(quals, HIGH_QUAL);
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
quals[addBoth] = LOW_QUAL;
quals[readLength - addBoth - 1] = LOW_QUAL;
}
read.setBaseQualities(quals);
GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipBoth, LOW_QUAL);
// Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
}
}
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
}
// ONE OFF Testing clipping that ends inside an insertion
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
final String CIGAR = "1S1M5I1S";
@ -248,17 +177,23 @@ public class ReadClipperUnitTest extends BaseTest {
ReadClipper lowQualClipper = new ReadClipper(read);
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
}
private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) {
if (!read.isEmpty()) {
byte [] quals = read.getBaseQualities();
for (int i=0; i<quals.length; i++)
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
}
}
@Test(enabled = true)
public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
readClipper = new ReadClipper(read);
ReadClipper readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
int sumHardClips = 0;
@ -301,7 +236,7 @@ public class ReadClipperUnitTest extends BaseTest {
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
}
}
}

View File

@ -1,24 +0,0 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/28/11
* Time: 4:07 PM
* To change this template use File | Settings | File Templates.
*/
public class TestParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}