From 4cbd1f0dec66ec26204dcc4956a830d23527ce47 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Tue, 29 Nov 2011 18:30:23 -0500 Subject: [PATCH] Reorganized the testing code and created ClipReadsTestUtils Tests are more rigorous and includes many more test cases. We can tests custom cigars and the generated cigars. *Still needs debugging because code is not working. Created test classes to be used across several tests. Some cases are still commented out. Signed-off-by: Mauricio Carneiro --- .../utils/clipreads/CigarStringTestPair.java | 18 + .../utils/clipreads/ClipReadsTestUtils.java | 185 ++++++++ .../utils/clipreads/ClippingOpUnitTest.java | 69 +++ .../utils/clipreads/ReadClipperUnitTest.java | 413 +++++++----------- .../sting/utils/clipreads/TestParameter.java | 24 + 5 files changed, 442 insertions(+), 267 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java new file mode 100644 index 000000000..cc9021fae --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.utils.clipreads; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/29/11 + * Time: 4:53 PM + * To change this template use File | Settings | File Templates. + */ +public class CigarStringTestPair { + public String toTest; + public String expected; + + public CigarStringTestPair(String ToTest, String Expected) { + this.toTest = ToTest; + this.expected = Expected; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java new file mode 100644 index 000000000..a5524e6f1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java @@ -0,0 +1,185 @@ +package org.broadinstitute.sting.utils.clipreads; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.TextCigarCodec; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/27/11 + * Time: 6:45 AM + * To change this template use File | Settings | File Templates. + */ +public class ClipReadsTestUtils { + //Should contain all the utils needed for tests to mass produce + //reads, cigars, and other needed classes + + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 + + public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) { + // Because quals to char start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); + + Assert.assertEquals(read.getReadBases(), readBases); + Assert.assertEquals(read.getBaseQualities(), baseQuals); + Assert.assertEquals(read.getCigarString(), cigar); + } + + public static void testCigar(GATKSAMRecord read, String cigar) { + Assert.assertEquals(read.getCigarString(), cigar); + } + + public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) { + // Because quals to chars start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); + + if (readBases.length > 0 && baseQuals.length > 0) { + Assert.assertEquals(read.getReadBases(), readBases); + Assert.assertEquals(read.getBaseQualities(), baseQuals); + } else + Assert.assertTrue(read.isEmpty()); + } + + private static byte[] subtractToArray(byte[] array, int n) { + if (array == null) + return null; + + byte[] output = new byte[array.length]; + + for (int i = 0; i < array.length; i++) + output[i] = (byte) (array[i] - n); + + return output; + } + + // What the test read looks like + // Ref: 10 11 12 13 14 15 16 17 + // Read: 0 1 2 3 - - - - + // -------------------------------- + // Bases: A C T G - - - - + // Quals: ! + 5 ? - - - - + + public static GATKSAMRecord makeRead() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length()); + output.setReadBases(new String(BASES).getBytes()); + output.setBaseQualityString(new String(QUALS)); + + return output; + } + + public static GATKSAMRecord makeReadFromCigar(Cigar cigar) { + + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength()); + output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes()); + output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength())); + output.setCigar(cigar); + + return output; + } + + private static String cycleString(String string, int length) { + String output = ""; + int cycles = (length / string.length()) + 1; + + for (int i = 1; i < cycles; i++) + output += string; + + for (int j = 0; output.length() < length; j++) + output += string.charAt(j % string.length()); + + return output; + } + + public static Set generateCigars() { + + // This function generates every permutation of cigar strings we need. + + LinkedHashSet output = new LinkedHashSet(); + + List clippingOptionsStart = new LinkedList(); + clippingOptionsStart.add(new Cigar()); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S")); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S")); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H")); + + LinkedList clippingOptionsEnd = new LinkedList(); + clippingOptionsEnd.add(new Cigar()); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H")); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S")); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H")); + + + LinkedList indelOptions1 = new LinkedList(); + indelOptions1.add(new Cigar()); + //indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D")); + //indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") ); + indelOptions1.add(TextCigarCodec.getSingleton().decode("1I")); + indelOptions1.add(TextCigarCodec.getSingleton().decode("1D")); + + LinkedList indelOptions2 = new LinkedList(); + indelOptions2.add(new Cigar()); + indelOptions2.add(TextCigarCodec.getSingleton().decode("1I")); + indelOptions2.add(null); + + + // Start With M as base CigarElements, M, + + LinkedList base = new LinkedList(); + base.add(TextCigarCodec.getSingleton().decode("1M")); + base.add(TextCigarCodec.getSingleton().decode("5M")); + base.add(TextCigarCodec.getSingleton().decode("25M")); + // Should indel be added as a base? + + // Nested loops W00t! + for (Cigar Base : base) { + for (Cigar indelStart : indelOptions1) { + for (Cigar indelEnd : indelOptions2) { + for (Cigar clipStart : clippingOptionsStart) { + for (Cigar clipEnd : clippingOptionsEnd) { + // Create a list of Cigar Elements and construct Cigar + List CigarBuilder = new ArrayList(); + // add starting clipping (H/S) + CigarBuilder.addAll(clipStart.getCigarElements()); + // add first base (M) + CigarBuilder.addAll(Base.getCigarElements()); + // add first indel + CigarBuilder.addAll(indelStart.getCigarElements()); + // add second base (M) + CigarBuilder.addAll(Base.getCigarElements()); + // add another indel or nothing (M) + if (indelEnd != null) + CigarBuilder.addAll(indelEnd.getCigarElements()); + // add final clipping (S/H) + CigarBuilder.addAll(clipEnd.getCigarElements()); + + + output.add(new Cigar(removeConsecutiveElements(CigarBuilder))); + + } + } + } + } + } + + return output; + } + + private static List removeConsecutiveElements(List cigarBuilder) { + LinkedList output = new LinkedList(); + for (CigarElement E : cigarBuilder) { + if (output.isEmpty() || output.getLast().getOperator() != E.getOperator()) + output.add(E); + } + return output; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java new file mode 100644 index 000000000..719d04287 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java @@ -0,0 +1,69 @@ +package org.broadinstitute.sting.utils.clipreads; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/27/11 + * Time: 5:17 AM + * To change this template use File | Settings | File Templates. + */ +public class ClippingOpUnitTest extends BaseTest { + + ClippingOp clippingOp; + GATKSAMRecord read; + + @BeforeTest + public void init() { + read = ClipReadsTestUtils.makeRead(); + } + + @Test + private void testHardClip() { + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end + testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start + testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end + + for (TestParameter p : testList) { + init(); + clippingOp = new ClippingOp(p.inputStart, p.inputStop); + logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar); + ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); + } + + } + + @Test + private void testSoftClip() { + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end + testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end + testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start + testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end + + for (TestParameter p : testList) { + init(); + clippingOp = new ClippingOp(p.inputStart, p.inputStop); + logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar); + ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read), + ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar); + } + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index a7944c9ad..73ba5c12d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -25,16 +25,19 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.*; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.TextCigarCodec; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import java.util.*; +import java.util.LinkedList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -47,212 +50,205 @@ public class ReadClipperUnitTest extends BaseTest { // TODO: exception testing, make cases that should fail will fail - //int debug = 0; + // TODO: add indels to all test cases - GATKSAMRecord read, expected; ReadClipper readClipper; - final static String BASES = "ACTG"; - final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 - - private void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) { - // Because quals to char start at 33 for visibility - baseQuals = subtractToArray(baseQuals, 33); - - Assert.assertEquals(read.getReadBases(), readBases); - Assert.assertEquals(read.getBaseQualities(), baseQuals); - Assert.assertEquals(read.getCigarString(), cigar); - } - - private void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) { - // Because quals to chars start at 33 for visibility - baseQuals = subtractToArray(baseQuals, 33); - - if ( readBases.length > 0 && baseQuals.length > 0 ) { - Assert.assertEquals(read.getReadBases(), readBases); - Assert.assertEquals(read.getBaseQualities(), baseQuals); - } - else - Assert.assertTrue(read.isEmpty()); - } - - public class testParameter { - int inputStart; - int inputStop; - int substringStart; - int substringStop; - String cigar; - - public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) { - inputStart = InputStart; - inputStop = InputStop; - substringStart = SubstringStart; - substringStop = SubstringStop; - cigar = Cigar; - } - } - - private byte[] subtractToArray (byte[] array, int n ) { - if ( array == null) - return null; - - byte[] output = new byte[array.length]; - - for ( int i = 0; i < array.length; i++) - output[i] = (byte) ( array[i] - n ); - - return output; - } - - // What the test read looks like - // Ref: 1 2 3 4 5 6 7 8 - // Read: 0 1 2 3 - - - - - // ----------------------------- - // Bases: A C T G - - - - - // Quals: ! + 5 ? - - - - @BeforeMethod public void init() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); - read.setReadBases(new String(BASES).getBytes()); - read.setBaseQualityString(new String(QUALS)); - - readClipper = new ReadClipper(read); - //logger.warn(read.getCigarString()); + readClipper = new ReadClipper(ClipReadsTestUtils.makeRead()); } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); //int debug = 1; //Clip whole read - Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader())); //clip 1 base - expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4); - Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes()); - Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3)); - Assert.assertEquals(expected.getCigarString(), "1H2M1H"); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13), + ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(), + "1H2M1H"); + List cigarStringTestPairs = new LinkedList(); + cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H")); + //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H")); + + for (CigarStringTestPair pair : cigarStringTestPairs) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest))); + ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates( + ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read), + ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)), + pair.expected); + } + /* + for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) { + // The read has to be long enough to clip one base from each side + // This also filters a lot of cigars + if ( cigar.getReadLength() > 26 ) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar )); + System.out.println( "Testing Cigar: "+cigar.toString() ) ; + //cigar length reference plus soft clip + + ClipReadsTestUtils.testBaseQual( + readClipper.hardClipBothEndsByReferenceCoordinates( + ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read), + ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ), + readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(), + readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes()); + } + } + */ } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReadCoordinates() { logger.warn("Executing testHardClipByReadCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end - testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start - testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), - BASES.substring(p.substringStart, p.substringStop).getBytes(), - QUALS.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), p.cigar); } } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinates() { logger.warn("Executing testHardClipByReferenceCoordinates"); //logger.warn(debug); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end - testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start - testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop), - BASES.substring(p.substringStart, p.substringStop).getBytes(), - QUALS.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), p.cigar); } + List cigarStringTestPairs = new LinkedList(); + cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M")); + //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S")); + + //Clips only first base + for (CigarStringTestPair pair : cigarStringTestPairs) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest))); + ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected); + } + /* + for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) { + // The read has to be long enough to clip one base + // This also filters a lot of cigars + if ( cigar.getReadLength() > 26 ) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar )); + System.out.println( "Testing Cigar: "+cigar.toString() ) ; + //cigar length reference plus soft clip + + // Clip first read + ClipReadsTestUtils.testBaseQual( + readClipper.hardClipByReadCoordinates(0,0), + readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(), + readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes()); + } + } + */ } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { init(); logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start - testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start + List testList = new LinkedList(); + testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), - BASES.substring(p.substringStart, p.substringStop).getBytes(), - QUALS.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), p.cigar); } } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinatesRightTail() { init(); logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end - testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), - BASES.substring(p.substringStart, p.substringStop).getBytes(), - QUALS.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), p.cigar); } } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipLowQualEnds() { // Needs a thorough redesign logger.warn("Executing testHardClipByReferenceCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start + List testList = new LinkedList(); + testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart), - BASES.substring(p.substringStart, p.substringStop).getBytes(), - QUALS.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), p.cigar); } - /* todo find a better way to test lowqual tail clipping on both sides + /* todo find a better way to test lowqual tail clipping on both sides // Reverse Quals sequence readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 @@ -272,127 +268,13 @@ public class ReadClipperUnitTest extends BaseTest { */ } - // public class ReadMaker { - //Should have functions that can - // make basic read - // make reads by cigar, - // and by quals, - // package in a readclipper - // This has been already done in the current class - // GATKSAMRecord read; - // ReadClipper readClipper; - - public String cycleString ( String string, int length ) { - String output = ""; - int cycles = ( length / string.length() ) + 1; - - for ( int i = 1; i < cycles; i++ ) - output += string; - - for ( int j = 0; output.length() < length; j++ ) - output += string.charAt( j % string.length() ); - - return output; - - } - - private void makeFromCigar( Cigar cigar ) { - readClipper = null; - read = null; - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, cigar.getReadLength()); - read.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes()); - read.setBaseQualityString( cycleString(QUALS, cigar.getReadLength() )); - read.setCigar(cigar); - readClipper = new ReadClipper(read); - } - - // } - - private Set generateCigars() { - - // This function generates every permutation of cigar strings we need. - - LinkedHashSet output = new LinkedHashSet(); - - List clippingOptionsStart = new LinkedList(); - clippingOptionsStart.add( new Cigar() ); - clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H1S") ); - clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1S") ); - clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H") ); - - LinkedList clippingOptionsEnd = new LinkedList(); - clippingOptionsEnd.add( new Cigar() ); - clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S1H") ); - clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S")); - clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1H")); - - - LinkedList indelOptions = new LinkedList(); - indelOptions.add( new Cigar() ); - //indelOptions.add( TextCigarCodec.getSingleton().decode("1I1D")); - //indelOptions.add( TextCigarCodec.getSingleton().decode("1D1I") ); - indelOptions.add( TextCigarCodec.getSingleton().decode("1I") ); - indelOptions.add( TextCigarCodec.getSingleton().decode("1D") ); - - // Start With M as base CigarElements, M, - - LinkedList base = new LinkedList(); - base.add( TextCigarCodec.getSingleton().decode("1M")); - base.add( TextCigarCodec.getSingleton().decode("5M")); - base.add( TextCigarCodec.getSingleton().decode("25M")); - // Should indel be added as a base? - - // Nested loops W00t! - for ( Cigar Base : base) { - for ( Cigar indelStart: indelOptions) { - for ( Cigar indelEnd: indelOptions) { - for ( Cigar clipStart: clippingOptionsStart) { - for ( Cigar clipEnd: clippingOptionsEnd) { - // Create a list of Cigar Elements and construct Cigar - List CigarBuilder = new ArrayList(); - CigarBuilder.addAll(Base.getCigarElements()); - CigarBuilder.addAll(indelStart.getCigarElements()); - CigarBuilder.addAll(Base.getCigarElements()); - CigarBuilder.addAll(indelEnd.getCigarElements()); - CigarBuilder.addAll(clipEnd.getCigarElements()); - //CigarBuilder.addAll(0, indelStart.getCigarElements()); - CigarBuilder.addAll(0, clipStart.getCigarElements()); - //System.out.println( new Cigar( removeConsecutiveElements(CigarBuilder) ).toString() ); - output.add( new Cigar( removeConsecutiveElements(CigarBuilder) ) ); - - } - } - } - } - } - - return output; - } - - private List removeConsecutiveElements(List cigarBuilder) { - LinkedList output = new LinkedList(); - for ( CigarElement E: cigarBuilder ) { - if ( output.isEmpty() || output.getLast().getOperator() != E.getOperator()) - output.add(E); - } - return output; - } - - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipSoftClippedBases() { // Generate a list of cigars to test - for ( Cigar cigar: generateCigars() ) { + for (Cigar cigar : ClipReadsTestUtils.generateCigars()) { //logger.warn("Testing Cigar: "+cigar.toString()); - makeFromCigar( cigar ); - - - try { - readClipper.hardClipLeadingInsertions(); - } - catch ( ReviewedStingException e ) {} - + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar)); int clipStart = 0; int clipEnd = 0; @@ -401,41 +283,38 @@ public class ReadClipperUnitTest extends BaseTest { List cigarElements = cigar.getCigarElements(); int CigarListLength = cigarElements.size(); - // It will know what needs to be clipped based on the start and end of the string, hardclips and softclips // are added to the amount to clip - if ( cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP ) { + if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) { //clipStart += cigarElements.get(0).getLength(); - if ( cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP ) { + if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) { clipStart += cigarElements.get(1).getLength(); // Check for leading indel - if ( cigarElements.get(2).getOperator() == CigarOperator.INSERTION ) { + if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) { expectEmptyRead = true; } } // Check for leading indel - else if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) { + else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { expectEmptyRead = true; } - } - else if ( cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP ) { + } else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) { clipStart += cigarElements.get(0).getLength(); // Check for leading indel - if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) { + if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { expectEmptyRead = true; } } //Check for leading indel - else if ( cigarElements.get(0).getOperator() == CigarOperator.INSERTION ) { - expectEmptyRead = true; - } - - if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP ) { - //clipEnd += cigarElements.get(CigarListLength - 1).getLength(); - if ( cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP ) - clipEnd += cigarElements.get(CigarListLength - 2).getLength(); + else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) { + expectEmptyRead = true; } - else if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP ) + + if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) { + //clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP) + clipEnd += cigarElements.get(CigarListLength - 2).getLength(); + } else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP) clipEnd += cigarElements.get(CigarListLength - 1).getLength(); String readBases = readClipper.read.getReadString(); @@ -448,15 +327,15 @@ public class ReadClipperUnitTest extends BaseTest { baseQuals = ""; logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s", - cigar.toString(), readBases.substring( clipStart, readBases.length() - clipEnd ), - baseQuals.substring( clipStart, baseQuals.length() - clipEnd ) ) ); + cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd), + baseQuals.substring(clipStart, baseQuals.length() - clipEnd))); //if (expectEmptyRead) // testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] ); //else - testBaseQual(readClipper.hardClipSoftClippedBases(), - readBases.substring( clipStart, readBases.length() - clipEnd ).getBytes(), - baseQuals.substring( clipStart, baseQuals.length() - clipEnd ).getBytes()); - logger.warn("Cigar: "+cigar.toString()+" PASSED!"); + ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(), + readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(), + baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes()); + logger.warn("Cigar: " + cigar.toString() + " PASSED!"); } // We will use testParameter in the following way // Right tail, left tail, diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java new file mode 100644 index 000000000..155fe094e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.clipreads; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/28/11 + * Time: 4:07 PM + * To change this template use File | Settings | File Templates. + */ +public class TestParameter { + int inputStart; + int inputStop; + int substringStart; + int substringStop; + String cigar; + + public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) { + inputStart = InputStart; + inputStop = InputStop; + substringStart = SubstringStart; + substringStop = SubstringStop; + cigar = Cigar; + } +}