Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
99cb8d68e9
|
|
@ -328,7 +328,7 @@ public final class LocusIteratorByState extends LocusIterator {
|
|||
* @return true if the read should be excluded from the pileup, false otherwise
|
||||
*/
|
||||
@Requires({"rec != null", "pos > 0"})
|
||||
private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) {
|
||||
private boolean dontIncludeReadInPileup(final GATKSAMRecord rec, final long pos) {
|
||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -46,6 +47,10 @@ import java.util.Map;
|
|||
* if they are ever modified externally then one must also invoke the
|
||||
* setReadGroup() method here to ensure that the cache is kept up-to-date.
|
||||
*
|
||||
* WARNING -- GATKSAMRecords cache several values (that are expensive to compute)
|
||||
* that depending on the inferred insert size and alignment starts and stops of this read and its mate.
|
||||
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
||||
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord {
|
||||
// ReduceReads specific attribute tags
|
||||
|
|
@ -70,6 +75,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
private final static int UNINITIALIZED = -1;
|
||||
private int softStart = UNINITIALIZED;
|
||||
private int softEnd = UNINITIALIZED;
|
||||
private Integer adapterBoundary = null;
|
||||
|
||||
// because some values can be null, we don't want to duplicate effort
|
||||
private boolean retrievedReadGroup = false;
|
||||
|
|
@ -561,4 +567,23 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
}
|
||||
return clone;
|
||||
}
|
||||
|
||||
/**
|
||||
* A caching version of ReadUtils.getAdaptorBoundary()
|
||||
*
|
||||
* @see ReadUtils.getAdaptorBoundary(SAMRecord) for more information about the meaning of this function
|
||||
*
|
||||
* WARNING -- this function caches a value depending on the inferred insert size and alignment starts
|
||||
* and stops of this read and its mate. Changing these values in any way will invalidate the cached value.
|
||||
* However, we do not monitor those setter functions, so modifying a GATKSAMRecord in any way may
|
||||
* result in stale cached values.
|
||||
*
|
||||
* @return the result of calling ReadUtils.getAdaptorBoundary on this read
|
||||
*/
|
||||
@Ensures("result == ReadUtils.getAdaptorBoundary(this)")
|
||||
public int getAdaptorBoundary() {
|
||||
if ( adapterBoundary == null )
|
||||
adapterBoundary = ReadUtils.getAdaptorBoundary(this);
|
||||
return adapterBoundary;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ public class ReadUtils {
|
|||
* @return whether or not the base is in the adaptor
|
||||
*/
|
||||
public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
|
||||
final int adaptorBoundary = getAdaptorBoundary(read);
|
||||
final int adaptorBoundary = read.getAdaptorBoundary();
|
||||
if (adaptorBoundary == CANNOT_COMPUTE_ADAPTOR_BOUNDARY || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -467,19 +467,6 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// private int findCutSite(final List<Double> probs) {
|
||||
// for ( int i = probs.size() - 2; i > 0; i-- ) {
|
||||
// double prev = probs.get(i + 1);
|
||||
// double next = probs.get(i-1);
|
||||
// double cur = probs.get(i);
|
||||
// if ( cur < next && cur < prev )
|
||||
// return i + 1;
|
||||
// }
|
||||
//
|
||||
// return -1;
|
||||
// }
|
||||
|
||||
@Test(dataProvider = "ActiveRegionCutTests")
|
||||
public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List<Double> probs) {
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import java.util.*;
|
|||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||
private static final boolean DEBUG = false;
|
||||
private static final boolean DEBUG = true;
|
||||
protected LocusIteratorByState li;
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -686,4 +686,84 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
return read;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// make sure that adapter clipping is working properly in LIBS
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
@DataProvider(name = "AdapterClippingTest")
|
||||
public Object[][] makeAdapterClippingTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final int start = 10;
|
||||
// for ( final int goodBases : Arrays.asList(10) ) {
|
||||
// for ( final int nClipsOnTheRight : Arrays.asList(0)) {
|
||||
for ( final int goodBases : Arrays.asList(10, 20, 30) ) {
|
||||
for ( final int nClips : Arrays.asList(0, 1, 2, 10)) {
|
||||
for ( final boolean onLeft : Arrays.asList(true, false) ) {
|
||||
final int readLength = nClips + goodBases;
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
|
||||
read.setCigarString(readLength + "M");
|
||||
|
||||
if ( onLeft ) {
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
read.setMateAlignmentStart(start + nClips);
|
||||
read.setInferredInsertSize(readLength);
|
||||
tests.add(new Object[]{nClips, goodBases, 0, read});
|
||||
} else {
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setMateAlignmentStart(start - 1);
|
||||
read.setInferredInsertSize(goodBases - 1);
|
||||
tests.add(new Object[]{0, goodBases, nClips, read});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) {
|
||||
// final int readLength = nClipsOnTheLeft + goodBases;
|
||||
// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
|
||||
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
|
||||
// read.setCigarString(readLength + "M");
|
||||
// read.setReadNegativeStrandFlag(true);
|
||||
//
|
||||
// read.setMateAlignmentStart(start + nClipsOnTheLeft);
|
||||
// read.setInferredInsertSize(readLength);
|
||||
//
|
||||
// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read});
|
||||
// }
|
||||
// }
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "AdapterClippingTest")
|
||||
// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000)
|
||||
public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) {
|
||||
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(Collections.singletonList(read).iterator()),
|
||||
createTestReadProperties(DownsamplingMethod.NONE, false),
|
||||
genomeLocParser,
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
||||
int expectedPos = read.getAlignmentStart() + nClipsOnLeft;
|
||||
int nPileups = 0;
|
||||
while ( li.hasNext() ) {
|
||||
final AlignmentContext next = li.next();
|
||||
Assert.assertEquals(next.getLocation().getStart(), expectedPos);
|
||||
// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) )
|
||||
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups);
|
||||
// else
|
||||
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups);
|
||||
nPileups++;
|
||||
expectedPos++;
|
||||
}
|
||||
|
||||
final int nExpectedPileups = nReadContainingPileups;
|
||||
Assert.assertEquals(nPileups, nExpectedPileups, "Wrong number of pileups seen");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,66 +27,99 @@ package org.broadinstitute.sting.utils.sam;
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class ReadUtilsUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testGetAdaptorBoundary() {
|
||||
private interface GetAdaptorFunc {
|
||||
public int getAdaptor(final GATKSAMRecord record);
|
||||
}
|
||||
|
||||
@DataProvider(name = "AdaptorGetter")
|
||||
public Object[][] makeActiveRegionCutTests() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
tests.add( new Object[]{ new GetAdaptorFunc() {
|
||||
@Override public int getAdaptor(final GATKSAMRecord record) { return ReadUtils.getAdaptorBoundary(record); }
|
||||
}});
|
||||
|
||||
tests.add( new Object[]{ new GetAdaptorFunc() {
|
||||
@Override public int getAdaptor(final GATKSAMRecord record) { return record.getAdaptorBoundary(); }
|
||||
}});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) {
|
||||
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
||||
final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
|
||||
final String cigar = "8M";
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
|
||||
read.setMateAlignmentStart(mateStart);
|
||||
read.setInferredInsertSize(fragmentSize);
|
||||
return read;
|
||||
}
|
||||
|
||||
@Test(dataProvider = "AdaptorGetter")
|
||||
public void testGetAdaptorBoundary(final GetAdaptorFunc get) {
|
||||
final int fragmentSize = 10;
|
||||
final int mateStart = 1000;
|
||||
final int BEFORE = mateStart - 2;
|
||||
final int AFTER = mateStart + 2;
|
||||
int myStart, boundary;
|
||||
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
|
||||
read.setMateAlignmentStart(mateStart);
|
||||
read.setInferredInsertSize(fragmentSize);
|
||||
GATKSAMRecord read;
|
||||
|
||||
// Test case 1: positive strand, first read
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = BEFORE;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
||||
|
||||
// Test case 2: positive strand, second read
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = AFTER;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
||||
|
||||
// Test case 3: negative strand, second read
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = AFTER;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, mateStart - 1);
|
||||
|
||||
// Test case 4: negative strand, first read
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = BEFORE;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, mateStart - 1);
|
||||
|
||||
// Test case 5: mate is mapped to another chromosome (test both strands)
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
read.setInferredInsertSize(0);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
read.setInferredInsertSize(10);
|
||||
|
||||
// Test case 6: read is unmapped
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
read.setReadUnmappedFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
read.setReadUnmappedFlag(false);
|
||||
|
||||
|
|
@ -94,19 +127,22 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
// <--------|
|
||||
// |------>
|
||||
// first read:
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = 980;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setInferredInsertSize(20);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
|
||||
// second read:
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
myStart = 1000;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setInferredInsertSize(20);
|
||||
read.setMateAlignmentStart(980);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue