Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
99cb8d68e9
|
|
@ -328,7 +328,7 @@ public final class LocusIteratorByState extends LocusIterator {
|
||||||
* @return true if the read should be excluded from the pileup, false otherwise
|
* @return true if the read should be excluded from the pileup, false otherwise
|
||||||
*/
|
*/
|
||||||
@Requires({"rec != null", "pos > 0"})
|
@Requires({"rec != null", "pos > 0"})
|
||||||
private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) {
|
private boolean dontIncludeReadInPileup(final GATKSAMRecord rec, final long pos) {
|
||||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -46,6 +47,10 @@ import java.util.Map;
|
||||||
* if they are ever modified externally then one must also invoke the
|
* if they are ever modified externally then one must also invoke the
|
||||||
* setReadGroup() method here to ensure that the cache is kept up-to-date.
|
* setReadGroup() method here to ensure that the cache is kept up-to-date.
|
||||||
*
|
*
|
||||||
|
* WARNING -- GATKSAMRecords cache several values (that are expensive to compute)
|
||||||
|
* that depending on the inferred insert size and alignment starts and stops of this read and its mate.
|
||||||
|
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
|
||||||
|
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecord extends BAMRecord {
|
public class GATKSAMRecord extends BAMRecord {
|
||||||
// ReduceReads specific attribute tags
|
// ReduceReads specific attribute tags
|
||||||
|
|
@ -70,6 +75,7 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
private final static int UNINITIALIZED = -1;
|
private final static int UNINITIALIZED = -1;
|
||||||
private int softStart = UNINITIALIZED;
|
private int softStart = UNINITIALIZED;
|
||||||
private int softEnd = UNINITIALIZED;
|
private int softEnd = UNINITIALIZED;
|
||||||
|
private Integer adapterBoundary = null;
|
||||||
|
|
||||||
// because some values can be null, we don't want to duplicate effort
|
// because some values can be null, we don't want to duplicate effort
|
||||||
private boolean retrievedReadGroup = false;
|
private boolean retrievedReadGroup = false;
|
||||||
|
|
@ -561,4 +567,23 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
}
|
}
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A caching version of ReadUtils.getAdaptorBoundary()
|
||||||
|
*
|
||||||
|
* @see ReadUtils.getAdaptorBoundary(SAMRecord) for more information about the meaning of this function
|
||||||
|
*
|
||||||
|
* WARNING -- this function caches a value depending on the inferred insert size and alignment starts
|
||||||
|
* and stops of this read and its mate. Changing these values in any way will invalidate the cached value.
|
||||||
|
* However, we do not monitor those setter functions, so modifying a GATKSAMRecord in any way may
|
||||||
|
* result in stale cached values.
|
||||||
|
*
|
||||||
|
* @return the result of calling ReadUtils.getAdaptorBoundary on this read
|
||||||
|
*/
|
||||||
|
@Ensures("result == ReadUtils.getAdaptorBoundary(this)")
|
||||||
|
public int getAdaptorBoundary() {
|
||||||
|
if ( adapterBoundary == null )
|
||||||
|
adapterBoundary = ReadUtils.getAdaptorBoundary(this);
|
||||||
|
return adapterBoundary;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,7 @@ public class ReadUtils {
|
||||||
* @return whether or not the base is in the adaptor
|
* @return whether or not the base is in the adaptor
|
||||||
*/
|
*/
|
||||||
public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
|
public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
|
||||||
final int adaptorBoundary = getAdaptorBoundary(read);
|
final int adaptorBoundary = read.getAdaptorBoundary();
|
||||||
if (adaptorBoundary == CANNOT_COMPUTE_ADAPTOR_BOUNDARY || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
|
if (adaptorBoundary == CANNOT_COMPUTE_ADAPTOR_BOUNDARY || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -467,19 +467,6 @@ public class ActivityProfileUnitTest extends BaseTest {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// private int findCutSite(final List<Double> probs) {
|
|
||||||
// for ( int i = probs.size() - 2; i > 0; i-- ) {
|
|
||||||
// double prev = probs.get(i + 1);
|
|
||||||
// double next = probs.get(i-1);
|
|
||||||
// double cur = probs.get(i);
|
|
||||||
// if ( cur < next && cur < prev )
|
|
||||||
// return i + 1;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return -1;
|
|
||||||
// }
|
|
||||||
|
|
||||||
@Test(dataProvider = "ActiveRegionCutTests")
|
@Test(dataProvider = "ActiveRegionCutTests")
|
||||||
public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List<Double> probs) {
|
public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List<Double> probs) {
|
||||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ import java.util.*;
|
||||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||||
*/
|
*/
|
||||||
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
private static final boolean DEBUG = false;
|
private static final boolean DEBUG = true;
|
||||||
protected LocusIteratorByState li;
|
protected LocusIteratorByState li;
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -686,4 +686,84 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// make sure that adapter clipping is working properly in LIBS
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
@DataProvider(name = "AdapterClippingTest")
|
||||||
|
public Object[][] makeAdapterClippingTest() {
|
||||||
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
|
final int start = 10;
|
||||||
|
// for ( final int goodBases : Arrays.asList(10) ) {
|
||||||
|
// for ( final int nClipsOnTheRight : Arrays.asList(0)) {
|
||||||
|
for ( final int goodBases : Arrays.asList(10, 20, 30) ) {
|
||||||
|
for ( final int nClips : Arrays.asList(0, 1, 2, 10)) {
|
||||||
|
for ( final boolean onLeft : Arrays.asList(true, false) ) {
|
||||||
|
final int readLength = nClips + goodBases;
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
|
||||||
|
read.setCigarString(readLength + "M");
|
||||||
|
|
||||||
|
if ( onLeft ) {
|
||||||
|
read.setReadNegativeStrandFlag(true);
|
||||||
|
read.setMateAlignmentStart(start + nClips);
|
||||||
|
read.setInferredInsertSize(readLength);
|
||||||
|
tests.add(new Object[]{nClips, goodBases, 0, read});
|
||||||
|
} else {
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
read.setMateAlignmentStart(start - 1);
|
||||||
|
read.setInferredInsertSize(goodBases - 1);
|
||||||
|
tests.add(new Object[]{0, goodBases, nClips, read});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) {
|
||||||
|
// final int readLength = nClipsOnTheLeft + goodBases;
|
||||||
|
// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
|
||||||
|
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
|
// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
|
||||||
|
// read.setCigarString(readLength + "M");
|
||||||
|
// read.setReadNegativeStrandFlag(true);
|
||||||
|
//
|
||||||
|
// read.setMateAlignmentStart(start + nClipsOnTheLeft);
|
||||||
|
// read.setInferredInsertSize(readLength);
|
||||||
|
//
|
||||||
|
// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read});
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "AdapterClippingTest")
|
||||||
|
// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000)
|
||||||
|
public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) {
|
||||||
|
|
||||||
|
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(Collections.singletonList(read).iterator()),
|
||||||
|
createTestReadProperties(DownsamplingMethod.NONE, false),
|
||||||
|
genomeLocParser,
|
||||||
|
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||||
|
|
||||||
|
int expectedPos = read.getAlignmentStart() + nClipsOnLeft;
|
||||||
|
int nPileups = 0;
|
||||||
|
while ( li.hasNext() ) {
|
||||||
|
final AlignmentContext next = li.next();
|
||||||
|
Assert.assertEquals(next.getLocation().getStart(), expectedPos);
|
||||||
|
// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) )
|
||||||
|
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups);
|
||||||
|
// else
|
||||||
|
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups);
|
||||||
|
nPileups++;
|
||||||
|
expectedPos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int nExpectedPileups = nReadContainingPileups;
|
||||||
|
Assert.assertEquals(nPileups, nExpectedPileups, "Wrong number of pileups seen");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,66 +27,99 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
public class ReadUtilsUnitTest extends BaseTest {
|
public class ReadUtilsUnitTest extends BaseTest {
|
||||||
@Test
|
private interface GetAdaptorFunc {
|
||||||
public void testGetAdaptorBoundary() {
|
public int getAdaptor(final GATKSAMRecord record);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "AdaptorGetter")
|
||||||
|
public Object[][] makeActiveRegionCutTests() {
|
||||||
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
|
tests.add( new Object[]{ new GetAdaptorFunc() {
|
||||||
|
@Override public int getAdaptor(final GATKSAMRecord record) { return ReadUtils.getAdaptorBoundary(record); }
|
||||||
|
}});
|
||||||
|
|
||||||
|
tests.add( new Object[]{ new GetAdaptorFunc() {
|
||||||
|
@Override public int getAdaptor(final GATKSAMRecord record) { return record.getAdaptorBoundary(); }
|
||||||
|
}});
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) {
|
||||||
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
||||||
final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
|
final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
|
||||||
final String cigar = "8M";
|
final String cigar = "8M";
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
|
||||||
|
read.setMateAlignmentStart(mateStart);
|
||||||
|
read.setInferredInsertSize(fragmentSize);
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "AdaptorGetter")
|
||||||
|
public void testGetAdaptorBoundary(final GetAdaptorFunc get) {
|
||||||
final int fragmentSize = 10;
|
final int fragmentSize = 10;
|
||||||
final int mateStart = 1000;
|
final int mateStart = 1000;
|
||||||
final int BEFORE = mateStart - 2;
|
final int BEFORE = mateStart - 2;
|
||||||
final int AFTER = mateStart + 2;
|
final int AFTER = mateStart + 2;
|
||||||
int myStart, boundary;
|
int myStart, boundary;
|
||||||
|
GATKSAMRecord read;
|
||||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
|
|
||||||
read.setMateAlignmentStart(mateStart);
|
|
||||||
read.setInferredInsertSize(fragmentSize);
|
|
||||||
|
|
||||||
// Test case 1: positive strand, first read
|
// Test case 1: positive strand, first read
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = BEFORE;
|
myStart = BEFORE;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
read.setReadNegativeStrandFlag(false);
|
read.setReadNegativeStrandFlag(false);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
||||||
|
|
||||||
// Test case 2: positive strand, second read
|
// Test case 2: positive strand, second read
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = AFTER;
|
myStart = AFTER;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
read.setReadNegativeStrandFlag(false);
|
read.setReadNegativeStrandFlag(false);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
|
||||||
|
|
||||||
// Test case 3: negative strand, second read
|
// Test case 3: negative strand, second read
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = AFTER;
|
myStart = AFTER;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
read.setReadNegativeStrandFlag(true);
|
read.setReadNegativeStrandFlag(true);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, mateStart - 1);
|
Assert.assertEquals(boundary, mateStart - 1);
|
||||||
|
|
||||||
// Test case 4: negative strand, first read
|
// Test case 4: negative strand, first read
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = BEFORE;
|
myStart = BEFORE;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
read.setReadNegativeStrandFlag(true);
|
read.setReadNegativeStrandFlag(true);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, mateStart - 1);
|
Assert.assertEquals(boundary, mateStart - 1);
|
||||||
|
|
||||||
// Test case 5: mate is mapped to another chromosome (test both strands)
|
// Test case 5: mate is mapped to another chromosome (test both strands)
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
read.setInferredInsertSize(0);
|
read.setInferredInsertSize(0);
|
||||||
read.setReadNegativeStrandFlag(true);
|
read.setReadNegativeStrandFlag(true);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
read.setReadNegativeStrandFlag(false);
|
read.setReadNegativeStrandFlag(false);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
read.setInferredInsertSize(10);
|
read.setInferredInsertSize(10);
|
||||||
|
|
||||||
// Test case 6: read is unmapped
|
// Test case 6: read is unmapped
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
read.setReadUnmappedFlag(true);
|
read.setReadUnmappedFlag(true);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
read.setReadUnmappedFlag(false);
|
read.setReadUnmappedFlag(false);
|
||||||
|
|
||||||
|
|
@ -94,19 +127,22 @@ public class ReadUtilsUnitTest extends BaseTest {
|
||||||
// <--------|
|
// <--------|
|
||||||
// |------>
|
// |------>
|
||||||
// first read:
|
// first read:
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = 980;
|
myStart = 980;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
read.setInferredInsertSize(20);
|
read.setInferredInsertSize(20);
|
||||||
read.setReadNegativeStrandFlag(true);
|
read.setReadNegativeStrandFlag(true);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
|
|
||||||
// second read:
|
// second read:
|
||||||
|
read = makeRead(fragmentSize, mateStart);
|
||||||
myStart = 1000;
|
myStart = 1000;
|
||||||
read.setAlignmentStart(myStart);
|
read.setAlignmentStart(myStart);
|
||||||
|
read.setInferredInsertSize(20);
|
||||||
read.setMateAlignmentStart(980);
|
read.setMateAlignmentStart(980);
|
||||||
read.setReadNegativeStrandFlag(false);
|
read.setReadNegativeStrandFlag(false);
|
||||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue