diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index e7b75f1f2..435f9901a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -328,7 +328,7 @@ public final class LocusIteratorByState extends LocusIterator { * @return true if the read should be excluded from the pileup, false otherwise */ @Requires({"rec != null", "pos > 0"}) - private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) { + private boolean dontIncludeReadInPileup(final GATKSAMRecord rec, final long pos) { return ReadUtils.isBaseInsideAdaptor(rec, pos); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 9cd584d1b..504e718d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.sam; +import com.google.java.contract.Ensures; import net.sf.samtools.*; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -46,6 +47,10 @@ import java.util.Map; * if they are ever modified externally then one must also invoke the * setReadGroup() method here to ensure that the cache is kept up-to-date. * + * WARNING -- GATKSAMRecords cache several values (that are expensive to compute) + * that depending on the inferred insert size and alignment starts and stops of this read and its mate. + * Changing these values in any way will invalidate the cached value. However, we do not monitor those setter + * functions, so modifying a GATKSAMRecord in any way may result in stale cached values. */ public class GATKSAMRecord extends BAMRecord { // ReduceReads specific attribute tags @@ -70,6 +75,7 @@ public class GATKSAMRecord extends BAMRecord { private final static int UNINITIALIZED = -1; private int softStart = UNINITIALIZED; private int softEnd = UNINITIALIZED; + private Integer adapterBoundary = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; @@ -561,4 +567,23 @@ public class GATKSAMRecord extends BAMRecord { } return clone; } + + /** + * A caching version of ReadUtils.getAdaptorBoundary() + * + * @see ReadUtils.getAdaptorBoundary(SAMRecord) for more information about the meaning of this function + * + * WARNING -- this function caches a value depending on the inferred insert size and alignment starts + * and stops of this read and its mate. Changing these values in any way will invalidate the cached value. + * However, we do not monitor those setter functions, so modifying a GATKSAMRecord in any way may + * result in stale cached values. + * + * @return the result of calling ReadUtils.getAdaptorBoundary on this read + */ + @Ensures("result == ReadUtils.getAdaptorBoundary(this)") + public int getAdaptorBoundary() { + if ( adapterBoundary == null ) + adapterBoundary = ReadUtils.getAdaptorBoundary(this); + return adapterBoundary; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 1488f7269..c17e81b9c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -169,7 +169,7 @@ public class ReadUtils { * @return whether or not the base is in the adaptor */ public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) { - final int adaptorBoundary = getAdaptorBoundary(read); + final int adaptorBoundary = read.getAdaptorBoundary(); if (adaptorBoundary == CANNOT_COMPUTE_ADAPTOR_BOUNDARY || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE) return false; diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index f6246b137..792361ac3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -467,19 +467,6 @@ public class ActivityProfileUnitTest extends BaseTest { return -1; } - -// private int findCutSite(final List probs) { -// for ( int i = probs.size() - 2; i > 0; i-- ) { -// double prev = probs.get(i + 1); -// double next = probs.get(i-1); -// double cur = probs.get(i); -// if ( cur < next && cur < prev ) -// return i + 1; -// } -// -// return -1; -// } - @Test(dataProvider = "ActiveRegionCutTests") public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List probs) { final ActivityProfile profile = new ActivityProfile(genomeLocParser); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index e5e28e1f6..4e7a783e5 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -51,7 +51,7 @@ import java.util.*; * testing of the new (non-legacy) version of LocusIteratorByState */ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { - private static final boolean DEBUG = false; + private static final boolean DEBUG = true; protected LocusIteratorByState li; @Test(enabled = true) @@ -686,4 +686,84 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { return read; } } + + // --------------------------------------------------------------------------- + // + // make sure that adapter clipping is working properly in LIBS + // + // --------------------------------------------------------------------------- + @DataProvider(name = "AdapterClippingTest") + public Object[][] makeAdapterClippingTest() { + final List tests = new LinkedList(); + + final int start = 10; +// for ( final int goodBases : Arrays.asList(10) ) { +// for ( final int nClipsOnTheRight : Arrays.asList(0)) { + for ( final int goodBases : Arrays.asList(10, 20, 30) ) { + for ( final int nClips : Arrays.asList(0, 1, 2, 10)) { + for ( final boolean onLeft : Arrays.asList(true, false) ) { + final int readLength = nClips + goodBases; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); + read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); + read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); + read.setCigarString(readLength + "M"); + + if ( onLeft ) { + read.setReadNegativeStrandFlag(true); + read.setMateAlignmentStart(start + nClips); + read.setInferredInsertSize(readLength); + tests.add(new Object[]{nClips, goodBases, 0, read}); + } else { + read.setReadNegativeStrandFlag(false); + read.setMateAlignmentStart(start - 1); + read.setInferredInsertSize(goodBases - 1); + tests.add(new Object[]{0, goodBases, nClips, read}); + } + } + } + } + +// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) { +// final int readLength = nClipsOnTheLeft + goodBases; +// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength); +// read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); +// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength)); +// read.setCigarString(readLength + "M"); +// read.setReadNegativeStrandFlag(true); +// +// read.setMateAlignmentStart(start + nClipsOnTheLeft); +// read.setInferredInsertSize(readLength); +// +// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read}); +// } +// } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "AdapterClippingTest") +// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000) + public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) { + + li = new LocusIteratorByState(new FakeCloseableIterator(Collections.singletonList(read).iterator()), + createTestReadProperties(DownsamplingMethod.NONE, false), + genomeLocParser, + LocusIteratorByState.sampleListForSAMWithoutReadGroups()); + + int expectedPos = read.getAlignmentStart() + nClipsOnLeft; + int nPileups = 0; + while ( li.hasNext() ) { + final AlignmentContext next = li.next(); + Assert.assertEquals(next.getLocation().getStart(), expectedPos); +// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) ) +// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups); +// else +// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups); + nPileups++; + expectedPos++; + } + + final int nExpectedPileups = nReadContainingPileups; + Assert.assertEquals(nPileups, nExpectedPileups, "Wrong number of pileups seen"); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 4194aa6d5..7b48b1b69 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -27,66 +27,99 @@ package org.broadinstitute.sting.utils.sam; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.LinkedList; +import java.util.List; + public class ReadUtilsUnitTest extends BaseTest { - @Test - public void testGetAdaptorBoundary() { + private interface GetAdaptorFunc { + public int getAdaptor(final GATKSAMRecord record); + } + + @DataProvider(name = "AdaptorGetter") + public Object[][] makeActiveRegionCutTests() { + final List tests = new LinkedList(); + + tests.add( new Object[]{ new GetAdaptorFunc() { + @Override public int getAdaptor(final GATKSAMRecord record) { return ReadUtils.getAdaptorBoundary(record); } + }}); + + tests.add( new Object[]{ new GetAdaptorFunc() { + @Override public int getAdaptor(final GATKSAMRecord record) { return record.getAdaptorBoundary(); } + }}); + + return tests.toArray(new Object[][]{}); + } + + private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) { final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'}; final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30}; final String cigar = "8M"; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar); + read.setMateAlignmentStart(mateStart); + read.setInferredInsertSize(fragmentSize); + return read; + } + + @Test(dataProvider = "AdaptorGetter") + public void testGetAdaptorBoundary(final GetAdaptorFunc get) { final int fragmentSize = 10; final int mateStart = 1000; final int BEFORE = mateStart - 2; final int AFTER = mateStart + 2; int myStart, boundary; - - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar); - read.setMateAlignmentStart(mateStart); - read.setInferredInsertSize(fragmentSize); + GATKSAMRecord read; // Test case 1: positive strand, first read + read = makeRead(fragmentSize, mateStart); myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); // Test case 2: positive strand, second read + read = makeRead(fragmentSize, mateStart); myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(false); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, myStart + fragmentSize + 1); // Test case 3: negative strand, second read + read = makeRead(fragmentSize, mateStart); myStart = AFTER; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); // Test case 4: negative strand, first read + read = makeRead(fragmentSize, mateStart); myStart = BEFORE; read.setAlignmentStart(myStart); read.setReadNegativeStrandFlag(true); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, mateStart - 1); // Test case 5: mate is mapped to another chromosome (test both strands) + read = makeRead(fragmentSize, mateStart); read.setInferredInsertSize(0); read.setReadNegativeStrandFlag(true); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setReadNegativeStrandFlag(false); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setInferredInsertSize(10); // Test case 6: read is unmapped + read = makeRead(fragmentSize, mateStart); read.setReadUnmappedFlag(true); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); read.setReadUnmappedFlag(false); @@ -94,19 +127,22 @@ public class ReadUtilsUnitTest extends BaseTest { // <--------| // |------> // first read: + read = makeRead(fragmentSize, mateStart); myStart = 980; read.setAlignmentStart(myStart); read.setInferredInsertSize(20); read.setReadNegativeStrandFlag(true); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); // second read: + read = makeRead(fragmentSize, mateStart); myStart = 1000; read.setAlignmentStart(myStart); + read.setInferredInsertSize(20); read.setMateAlignmentStart(980); read.setReadNegativeStrandFlag(false); - boundary = ReadUtils.getAdaptorBoundary(read); + boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); } }