Reorganized the testing code and created ClipReadsTestUtils

Tests are more rigorous and includes many more test cases.
We can tests custom cigars and the generated cigars.
     *Still needs debugging because code is not working.
Created test classes to be used across several tests.

Some cases are still commented out.

Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
This commit is contained in:
Roger Zurawicki 2011-11-29 18:30:23 -05:00 committed by Mauricio Carneiro
parent 0e9c2cefa2
commit 4cbd1f0dec
5 changed files with 442 additions and 267 deletions

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/29/11
* Time: 4:53 PM
* To change this template use File | Settings | File Templates.
*/
public class CigarStringTestPair {
public String toTest;
public String expected;
public CigarStringTestPair(String ToTest, String Expected) {
this.toTest = ToTest;
this.expected = Expected;
}
}

View File

@ -0,0 +1,185 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 6:45 AM
* To change this template use File | Settings | File Templates.
*/
public class ClipReadsTestUtils {
//Should contain all the utils needed for tests to mass produce
//reads, cigars, and other needed classes
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
// Because quals to char start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testCigar(GATKSAMRecord read, String cigar) {
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
// Because quals to chars start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
if (readBases.length > 0 && baseQuals.length > 0) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
} else
Assert.assertTrue(read.isEmpty());
}
private static byte[] subtractToArray(byte[] array, int n) {
if (array == null)
return null;
byte[] output = new byte[array.length];
for (int i = 0; i < array.length; i++)
output[i] = (byte) (array[i] - n);
return output;
}
// What the test read looks like
// Ref: 10 11 12 13 14 15 16 17
// Read: 0 1 2 3 - - - -
// --------------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
public static GATKSAMRecord makeRead() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length());
output.setReadBases(new String(BASES).getBytes());
output.setBaseQualityString(new String(QUALS));
return output;
}
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength());
output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength()));
output.setCigar(cigar);
return output;
}
private static String cycleString(String string, int length) {
String output = "";
int cycles = (length / string.length()) + 1;
for (int i = 1; i < cycles; i++)
output += string;
for (int j = 0; output.length() < length; j++)
output += string.charAt(j % string.length());
return output;
}
public static Set<Cigar> generateCigars() {
// This function generates every permutation of cigar strings we need.
LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
clippingOptionsStart.add(new Cigar());
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
clippingOptionsEnd.add(new Cigar());
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> indelOptions1 = new LinkedList<Cigar>();
indelOptions1.add(new Cigar());
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D"));
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") );
indelOptions1.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions1.add(TextCigarCodec.getSingleton().decode("1D"));
LinkedList<Cigar> indelOptions2 = new LinkedList<Cigar>();
indelOptions2.add(new Cigar());
indelOptions2.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions2.add(null);
// Start With M as base CigarElements, M,
LinkedList<Cigar> base = new LinkedList<Cigar>();
base.add(TextCigarCodec.getSingleton().decode("1M"));
base.add(TextCigarCodec.getSingleton().decode("5M"));
base.add(TextCigarCodec.getSingleton().decode("25M"));
// Should indel be added as a base?
// Nested loops W00t!
for (Cigar Base : base) {
for (Cigar indelStart : indelOptions1) {
for (Cigar indelEnd : indelOptions2) {
for (Cigar clipStart : clippingOptionsStart) {
for (Cigar clipEnd : clippingOptionsEnd) {
// Create a list of Cigar Elements and construct Cigar
List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
// add starting clipping (H/S)
CigarBuilder.addAll(clipStart.getCigarElements());
// add first base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add first indel
CigarBuilder.addAll(indelStart.getCigarElements());
// add second base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add another indel or nothing (M)
if (indelEnd != null)
CigarBuilder.addAll(indelEnd.getCigarElements());
// add final clipping (S/H)
CigarBuilder.addAll(clipEnd.getCigarElements());
output.add(new Cigar(removeConsecutiveElements(CigarBuilder)));
}
}
}
}
}
return output;
}
private static List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
LinkedList<CigarElement> output = new LinkedList<CigarElement>();
for (CigarElement E : cigarBuilder) {
if (output.isEmpty() || output.getLast().getOperator() != E.getOperator())
output.add(E);
}
return output;
}
}

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.utils.clipreads;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 5:17 AM
* To change this template use File | Settings | File Templates.
*/
public class ClippingOpUnitTest extends BaseTest {
ClippingOp clippingOp;
GATKSAMRecord read;
@BeforeTest
public void init() {
read = ClipReadsTestUtils.makeRead();
}
@Test
private void testHardClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test
private void testSoftClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
}
}
}

View File

@ -25,16 +25,19 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.*;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -47,212 +50,205 @@ public class ReadClipperUnitTest extends BaseTest {
// TODO: exception testing, make cases that should fail will fail
//int debug = 0;
// TODO: add indels to all test cases
GATKSAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
private void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
// Because quals to char start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
private void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
// Because quals to chars start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
if ( readBases.length > 0 && baseQuals.length > 0 ) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
}
else
Assert.assertTrue(read.isEmpty());
}
public class testParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}
private byte[] subtractToArray (byte[] array, int n ) {
if ( array == null)
return null;
byte[] output = new byte[array.length];
for ( int i = 0; i < array.length; i++)
output[i] = (byte) ( array[i] - n );
return output;
}
// What the test read looks like
// Ref: 1 2 3 4 5 6 7 8
// Read: 0 1 2 3 - - - -
// -----------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
@BeforeMethod
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
//logger.warn(read.getCigarString());
readClipper = new ReadClipper(ClipReadsTestUtils.makeRead());
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//int debug = 1;
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13),
ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(),
"1H2M1H");
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H"));
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)),
pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base from each side
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ),
readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(),
readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes());
}
}
*/
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
BASES.substring(p.substringStart, p.substringStop).getBytes(),
QUALS.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//logger.warn(debug);
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
BASES.substring(p.substringStart, p.substringStop).getBytes(),
QUALS.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S"));
//Clips only first base
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
// Clip first read
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipByReadCoordinates(0,0),
readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(),
readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes());
}
}
*/
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
BASES.substring(p.substringStart, p.substringStop).getBytes(),
QUALS.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesRightTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
BASES.substring(p.substringStart, p.substringStop).getBytes(),
QUALS.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipLowQualEnds() {
// Needs a thorough redesign
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
BASES.substring(p.substringStart, p.substringStop).getBytes(),
QUALS.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
/* todo find a better way to test lowqual tail clipping on both sides
/* todo find a better way to test lowqual tail clipping on both sides
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
@ -272,127 +268,13 @@ public class ReadClipperUnitTest extends BaseTest {
*/
}
// public class ReadMaker {
//Should have functions that can
// make basic read
// make reads by cigar,
// and by quals,
// package in a readclipper
// This has been already done in the current class
// GATKSAMRecord read;
// ReadClipper readClipper;
public String cycleString ( String string, int length ) {
String output = "";
int cycles = ( length / string.length() ) + 1;
for ( int i = 1; i < cycles; i++ )
output += string;
for ( int j = 0; output.length() < length; j++ )
output += string.charAt( j % string.length() );
return output;
}
private void makeFromCigar( Cigar cigar ) {
readClipper = null;
read = null;
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, cigar.getReadLength());
read.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
read.setBaseQualityString( cycleString(QUALS, cigar.getReadLength() ));
read.setCigar(cigar);
readClipper = new ReadClipper(read);
}
// }
private Set<Cigar> generateCigars() {
// This function generates every permutation of cigar strings we need.
LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
clippingOptionsStart.add( new Cigar() );
clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H1S") );
clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1S") );
clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H") );
LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
clippingOptionsEnd.add( new Cigar() );
clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S1H") );
clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> indelOptions = new LinkedList<Cigar>();
indelOptions.add( new Cigar() );
//indelOptions.add( TextCigarCodec.getSingleton().decode("1I1D"));
//indelOptions.add( TextCigarCodec.getSingleton().decode("1D1I") );
indelOptions.add( TextCigarCodec.getSingleton().decode("1I") );
indelOptions.add( TextCigarCodec.getSingleton().decode("1D") );
// Start With M as base CigarElements, M,
LinkedList<Cigar> base = new LinkedList<Cigar>();
base.add( TextCigarCodec.getSingleton().decode("1M"));
base.add( TextCigarCodec.getSingleton().decode("5M"));
base.add( TextCigarCodec.getSingleton().decode("25M"));
// Should indel be added as a base?
// Nested loops W00t!
for ( Cigar Base : base) {
for ( Cigar indelStart: indelOptions) {
for ( Cigar indelEnd: indelOptions) {
for ( Cigar clipStart: clippingOptionsStart) {
for ( Cigar clipEnd: clippingOptionsEnd) {
// Create a list of Cigar Elements and construct Cigar
List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
CigarBuilder.addAll(Base.getCigarElements());
CigarBuilder.addAll(indelStart.getCigarElements());
CigarBuilder.addAll(Base.getCigarElements());
CigarBuilder.addAll(indelEnd.getCigarElements());
CigarBuilder.addAll(clipEnd.getCigarElements());
//CigarBuilder.addAll(0, indelStart.getCigarElements());
CigarBuilder.addAll(0, clipStart.getCigarElements());
//System.out.println( new Cigar( removeConsecutiveElements(CigarBuilder) ).toString() );
output.add( new Cigar( removeConsecutiveElements(CigarBuilder) ) );
}
}
}
}
}
return output;
}
private List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
LinkedList<CigarElement> output = new LinkedList<CigarElement>();
for ( CigarElement E: cigarBuilder ) {
if ( output.isEmpty() || output.getLast().getOperator() != E.getOperator())
output.add(E);
}
return output;
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test
for ( Cigar cigar: generateCigars() ) {
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
//logger.warn("Testing Cigar: "+cigar.toString());
makeFromCigar( cigar );
try {
readClipper.hardClipLeadingInsertions();
}
catch ( ReviewedStingException e ) {}
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar));
int clipStart = 0;
int clipEnd = 0;
@ -401,41 +283,38 @@ public class ReadClipperUnitTest extends BaseTest {
List<CigarElement> cigarElements = cigar.getCigarElements();
int CigarListLength = cigarElements.size();
// It will know what needs to be clipped based on the start and end of the string, hardclips and softclips
// are added to the amount to clip
if ( cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP ) {
if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) {
//clipStart += cigarElements.get(0).getLength();
if ( cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP ) {
if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(1).getLength();
// Check for leading indel
if ( cigarElements.get(2).getOperator() == CigarOperator.INSERTION ) {
if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
// Check for leading indel
else if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) {
else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
else if ( cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP ) {
} else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(0).getLength();
// Check for leading indel
if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) {
if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
//Check for leading indel
else if ( cigarElements.get(0).getOperator() == CigarOperator.INSERTION ) {
expectEmptyRead = true;
}
if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP ) {
//clipEnd += cigarElements.get(CigarListLength - 1).getLength();
if ( cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP )
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
else if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP )
if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) {
//clipEnd += cigarElements.get(CigarListLength - 1).getLength();
if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
} else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 1).getLength();
String readBases = readClipper.read.getReadString();
@ -448,15 +327,15 @@ public class ReadClipperUnitTest extends BaseTest {
baseQuals = "";
logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
cigar.toString(), readBases.substring( clipStart, readBases.length() - clipEnd ),
baseQuals.substring( clipStart, baseQuals.length() - clipEnd ) ) );
cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
//if (expectEmptyRead)
// testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
//else
testBaseQual(readClipper.hardClipSoftClippedBases(),
readBases.substring( clipStart, readBases.length() - clipEnd ).getBytes(),
baseQuals.substring( clipStart, baseQuals.length() - clipEnd ).getBytes());
logger.warn("Cigar: "+cigar.toString()+" PASSED!");
ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
logger.warn("Cigar: " + cigar.toString() + " PASSED!");
}
// We will use testParameter in the following way
// Right tail, left tail,

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/28/11
* Time: 4:07 PM
* To change this template use File | Settings | File Templates.
*/
public class TestParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}