diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 8c1061494..c3684034c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -168,7 +168,14 @@ public class ReadClipper { try { GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone(); for (ClippingOp op : getOps()) { - clippedRead = op.apply(algorithm, clippedRead); + //check if the clipped read can still be clipped in the range requested + if (op.start < clippedRead.getReadLength()) { + ClippingOp fixedOperation = op; + if (op.stop > clippedRead.getReadLength()) + fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1); + + clippedRead = fixedOperation.apply(algorithm, clippedRead); + } } wasClipped = true; ops.clear(); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 26fabade2..cedd56bdf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -200,6 +200,48 @@ public class ArtificialSAMUtils { return rec; } + /** + * Create an artificial read based on the parameters + * + * @param header the SAM header to associate the read with + * @param name the name of the read + * @param refIndex the reference index, i.e. what chromosome to associate it with + * @param alignmentStart where to start the alignment + * @param bases the sequence of the read + * @param qual the qualities of the read + * @param cigar the cigar string of the read + * + * @return the artificial read + */ + public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) { + GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual); + rec.setCigarString(cigar); + return rec; + } + + /** + * Create an artificial read with the following default parameters : + * header: + * numberOfChromosomes = 1 + * startingChromosome = 1 + * chromosomeSize = 1000000 + * read: + * name = "default_read" + * refIndex = 0 + * alignmentStart = 1 + * + * @param bases the sequence of the read + * @param qual the qualities of the read + * @param cigar the cigar string of the read + * + * @return the artificial read + */ + public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); + return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar); + } + + public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java index a5524e6f1..de9d8fb50 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java @@ -24,6 +24,18 @@ public class ClipReadsTestUtils { final static String BASES = "ACTG"; final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 + public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) { + // If they're both not empty, test their contents + if(!actual.isEmpty() && !expected.isEmpty()) { + Assert.assertEquals(actual.getReadBases(), expected.getReadBases()); + Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities()); + Assert.assertEquals(actual.getCigarString(), expected.getCigarString()); + } + // Otherwise test if they're both empty + else + Assert.assertEquals(actual.isEmpty(), expected.isEmpty()); + } + public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) { // Because quals to char start at 33 for visibility baseQuals = subtractToArray(baseQuals, 33); @@ -48,7 +60,7 @@ public class ClipReadsTestUtils { Assert.assertTrue(read.isEmpty()); } - private static byte[] subtractToArray(byte[] array, int n) { + public static byte[] subtractToArray(byte[] array, int n) { if (array == null) return null; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index ff33e3184..650d3f26e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.TextCigarCodec; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; @@ -230,42 +231,25 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipLowQualEnds() { - // Needs a thorough redesign - logger.warn("Executing testHardClipByReferenceCoordinates"); + logger.warn("Executing testHardClipLowQualEnds"); - //Clip whole read - Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader())); + // Testing clipping that ends inside an insertion + final byte[] BASES = {'A','C','G','T','A','C','G','T'}; + final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2}; + final String CIGAR = "1S1M5I1S"; - List testList = new LinkedList(); - testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start - testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start + final byte[] CLIPPED_BASES = {}; + final byte[] CLIPPED_QUALS = {}; + final String CLIPPED_CIGAR = ""; - for (TestParameter p : testList) { - init(); - //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart), - ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), - ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), - p.cigar); - } - /* todo find a better way to test lowqual tail clipping on both sides - // Reverse Quals sequence - readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 - testList = new LinkedList(); - testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end - testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR); + GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR); + + ReadClipper lowQualClipper = new ReadClipper(read); + ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); + - for ( testParameter p : testList ) { - init(); - readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 - //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); - } - */ } @Test(enabled = false)