From 0e9c2cefa2ce88f578bd7108b5586bcfe977b0ab Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Wed, 23 Nov 2011 06:42:03 -0500 Subject: [PATCH] testHardClipSoftClippedBases works with Matches and Deletions Insertions are a problem so cigar cases with "I" are commented out. The test works with multiple deletions and matches. This is still not a complete test. A lot of cigar test cases are commented out. Added insertions to ReadClipperUnitTest ReadClipper now tests for all indels. Signed-off-by: Mauricio Carneiro --- .../utils/clipreads/ReadClipperUnitTest.java | 270 +++++++++++++++--- 1 file changed, 238 insertions(+), 32 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index ecb5a6d33..a7944c9ad 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import org.testng.annotations.*; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -44,7 +45,7 @@ import java.util.List; */ public class ReadClipperUnitTest extends BaseTest { - // TODO: Add error messages on failed tests + // TODO: exception testing, make cases that should fail will fail //int debug = 0; @@ -53,13 +54,27 @@ public class ReadClipperUnitTest extends BaseTest { final static String BASES = "ACTG"; final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 + private void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) { + // Because quals to char start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); - public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) { Assert.assertEquals(read.getReadBases(), readBases); - Assert.assertEquals(read.getBaseQualityString(), baseQuals); + Assert.assertEquals(read.getBaseQualities(), baseQuals); Assert.assertEquals(read.getCigarString(), cigar); } + private void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) { + // Because quals to chars start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); + + if ( readBases.length > 0 && baseQuals.length > 0 ) { + Assert.assertEquals(read.getReadBases(), readBases); + Assert.assertEquals(read.getBaseQualities(), baseQuals); + } + else + Assert.assertTrue(read.isEmpty()); + } + public class testParameter { int inputStart; int inputStop; @@ -76,6 +91,18 @@ public class ReadClipperUnitTest extends BaseTest { } } + private byte[] subtractToArray (byte[] array, int n ) { + if ( array == null) + return null; + + byte[] output = new byte[array.length]; + + for ( int i = 0; i < array.length; i++) + output[i] = (byte) ( array[i] - n ); + + return output; + } + // What the test read looks like // Ref: 1 2 3 4 5 6 7 8 // Read: 0 1 2 3 - - - - @@ -127,10 +154,10 @@ public class ReadClipperUnitTest extends BaseTest { for ( testParameter p : testList ) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), + BASES.substring(p.substringStart, p.substringStop).getBytes(), + QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } @@ -151,10 +178,10 @@ public class ReadClipperUnitTest extends BaseTest { for ( testParameter p : testList ) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop), + BASES.substring(p.substringStart, p.substringStop).getBytes(), + QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } @@ -174,10 +201,10 @@ public class ReadClipperUnitTest extends BaseTest { for ( testParameter p : testList ) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), + BASES.substring(p.substringStart, p.substringStop).getBytes(), + QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } @@ -197,17 +224,17 @@ public class ReadClipperUnitTest extends BaseTest { for ( testParameter p : testList ) { init(); //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), + BASES.substring(p.substringStart, p.substringStop).getBytes(), + QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } - @Test ( enabled = true ) // TODO This function is returning null reads + @Test ( enabled = true ) public void testHardClipLowQualEnds() { - + // Needs a thorough redesign logger.warn("Executing testHardClipByReferenceCoordinates"); //Clip whole read @@ -220,10 +247,10 @@ public class ReadClipperUnitTest extends BaseTest { for ( testParameter p : testList ) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart), + BASES.substring(p.substringStart, p.substringStop).getBytes(), + QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } /* todo find a better way to test lowqual tail clipping on both sides // Reverse Quals sequence @@ -237,7 +264,7 @@ public class ReadClipperUnitTest extends BaseTest { init(); readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), + testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), BASES.substring(p.substringStart,p.substringStop).getBytes(), QUALS.substring(p.substringStart,p.substringStop), p.cigar ); @@ -245,15 +272,194 @@ public class ReadClipperUnitTest extends BaseTest { */ } - public class CigarReadMaker { + // public class ReadMaker { + //Should have functions that can + // make basic read + // make reads by cigar, + // and by quals, + // package in a readclipper + // This has been already done in the current class + // GATKSAMRecord read; + // ReadClipper readClipper; + + public String cycleString ( String string, int length ) { + String output = ""; + int cycles = ( length / string.length() ) + 1; + + for ( int i = 1; i < cycles; i++ ) + output += string; + + for ( int j = 0; output.length() < length; j++ ) + output += string.charAt( j % string.length() ); + + return output; } - @Test ( enabled = false ) + private void makeFromCigar( Cigar cigar ) { + readClipper = null; + read = null; + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, cigar.getReadLength()); + read.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes()); + read.setBaseQualityString( cycleString(QUALS, cigar.getReadLength() )); + read.setCigar(cigar); + readClipper = new ReadClipper(read); + } + + // } + + private Set generateCigars() { + + // This function generates every permutation of cigar strings we need. + + LinkedHashSet output = new LinkedHashSet(); + + List clippingOptionsStart = new LinkedList(); + clippingOptionsStart.add( new Cigar() ); + clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H1S") ); + clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1S") ); + clippingOptionsStart.add( TextCigarCodec.getSingleton().decode("1H") ); + + LinkedList clippingOptionsEnd = new LinkedList(); + clippingOptionsEnd.add( new Cigar() ); + clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S1H") ); + clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1S")); + clippingOptionsEnd.add( TextCigarCodec.getSingleton().decode("1H")); + + + LinkedList indelOptions = new LinkedList(); + indelOptions.add( new Cigar() ); + //indelOptions.add( TextCigarCodec.getSingleton().decode("1I1D")); + //indelOptions.add( TextCigarCodec.getSingleton().decode("1D1I") ); + indelOptions.add( TextCigarCodec.getSingleton().decode("1I") ); + indelOptions.add( TextCigarCodec.getSingleton().decode("1D") ); + + // Start With M as base CigarElements, M, + + LinkedList base = new LinkedList(); + base.add( TextCigarCodec.getSingleton().decode("1M")); + base.add( TextCigarCodec.getSingleton().decode("5M")); + base.add( TextCigarCodec.getSingleton().decode("25M")); + // Should indel be added as a base? + + // Nested loops W00t! + for ( Cigar Base : base) { + for ( Cigar indelStart: indelOptions) { + for ( Cigar indelEnd: indelOptions) { + for ( Cigar clipStart: clippingOptionsStart) { + for ( Cigar clipEnd: clippingOptionsEnd) { + // Create a list of Cigar Elements and construct Cigar + List CigarBuilder = new ArrayList(); + CigarBuilder.addAll(Base.getCigarElements()); + CigarBuilder.addAll(indelStart.getCigarElements()); + CigarBuilder.addAll(Base.getCigarElements()); + CigarBuilder.addAll(indelEnd.getCigarElements()); + CigarBuilder.addAll(clipEnd.getCigarElements()); + //CigarBuilder.addAll(0, indelStart.getCigarElements()); + CigarBuilder.addAll(0, clipStart.getCigarElements()); + //System.out.println( new Cigar( removeConsecutiveElements(CigarBuilder) ).toString() ); + output.add( new Cigar( removeConsecutiveElements(CigarBuilder) ) ); + + } + } + } + } + } + + return output; + } + + private List removeConsecutiveElements(List cigarBuilder) { + LinkedList output = new LinkedList(); + for ( CigarElement E: cigarBuilder ) { + if ( output.isEmpty() || output.getLast().getOperator() != E.getOperator()) + output.add(E); + } + return output; + } + + @Test ( enabled = true ) public void testHardClipSoftClippedBases() { // Generate a list of cigars to test + for ( Cigar cigar: generateCigars() ) { + //logger.warn("Testing Cigar: "+cigar.toString()); + makeFromCigar( cigar ); + + + try { + readClipper.hardClipLeadingInsertions(); + } + catch ( ReviewedStingException e ) {} + + + int clipStart = 0; + int clipEnd = 0; + boolean expectEmptyRead = false; + + List cigarElements = cigar.getCigarElements(); + int CigarListLength = cigarElements.size(); + + + // It will know what needs to be clipped based on the start and end of the string, hardclips and softclips + // are added to the amount to clip + if ( cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP ) { + //clipStart += cigarElements.get(0).getLength(); + if ( cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP ) { + clipStart += cigarElements.get(1).getLength(); + // Check for leading indel + if ( cigarElements.get(2).getOperator() == CigarOperator.INSERTION ) { + expectEmptyRead = true; + } + } + // Check for leading indel + else if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) { + expectEmptyRead = true; + } + } + else if ( cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP ) { + clipStart += cigarElements.get(0).getLength(); + // Check for leading indel + if ( cigarElements.get(1).getOperator() == CigarOperator.INSERTION ) { + expectEmptyRead = true; + } + } + //Check for leading indel + else if ( cigarElements.get(0).getOperator() == CigarOperator.INSERTION ) { + expectEmptyRead = true; + } + + if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP ) { + //clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + if ( cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP ) + clipEnd += cigarElements.get(CigarListLength - 2).getLength(); + } + else if ( cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP ) + clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + + String readBases = readClipper.read.getReadString(); + String baseQuals = readClipper.read.getBaseQualityString(); + + // "*" is the default empty-sequence-string and for our test it needs to be changed to "" + if (readBases.equals("*")) + readBases = ""; + if (baseQuals.equals("*")) + baseQuals = ""; + + logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s", + cigar.toString(), readBases.substring( clipStart, readBases.length() - clipEnd ), + baseQuals.substring( clipStart, baseQuals.length() - clipEnd ) ) ); + //if (expectEmptyRead) + // testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] ); + //else + testBaseQual(readClipper.hardClipSoftClippedBases(), + readBases.substring( clipStart, readBases.length() - clipEnd ).getBytes(), + baseQuals.substring( clipStart, baseQuals.length() - clipEnd ).getBytes()); + logger.warn("Cigar: "+cigar.toString()+" PASSED!"); + } // We will use testParameter in the following way // Right tail, left tail, + } } \ No newline at end of file