Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Ami Levy-Moonshine 2013-01-25 16:07:38 -05:00
commit 99cb8d68e9
6 changed files with 159 additions and 31 deletions

View File

@ -328,7 +328,7 @@ public final class LocusIteratorByState extends LocusIterator {
* @return true if the read should be excluded from the pileup, false otherwise
*/
@Requires({"rec != null", "pos > 0"})
private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) {
private boolean dontIncludeReadInPileup(final GATKSAMRecord rec, final long pos) {
return ReadUtils.isBaseInsideAdaptor(rec, pos);
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -46,6 +47,10 @@ import java.util.Map;
* if they are ever modified externally then one must also invoke the
* setReadGroup() method here to ensure that the cache is kept up-to-date.
*
* WARNING -- GATKSAMRecords cache several values (that are expensive to compute)
* that depending on the inferred insert size and alignment starts and stops of this read and its mate.
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
*/
public class GATKSAMRecord extends BAMRecord {
// ReduceReads specific attribute tags
@ -70,6 +75,7 @@ public class GATKSAMRecord extends BAMRecord {
private final static int UNINITIALIZED = -1;
private int softStart = UNINITIALIZED;
private int softEnd = UNINITIALIZED;
private Integer adapterBoundary = null;
// because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false;
@ -561,4 +567,23 @@ public class GATKSAMRecord extends BAMRecord {
}
return clone;
}
/**
* A caching version of ReadUtils.getAdaptorBoundary()
*
* @see ReadUtils.getAdaptorBoundary(SAMRecord) for more information about the meaning of this function
*
* WARNING -- this function caches a value depending on the inferred insert size and alignment starts
* and stops of this read and its mate. Changing these values in any way will invalidate the cached value.
* However, we do not monitor those setter functions, so modifying a GATKSAMRecord in any way may
* result in stale cached values.
*
* @return the result of calling ReadUtils.getAdaptorBoundary on this read
*/
@Ensures("result == ReadUtils.getAdaptorBoundary(this)")
public int getAdaptorBoundary() {
if ( adapterBoundary == null )
adapterBoundary = ReadUtils.getAdaptorBoundary(this);
return adapterBoundary;
}
}

View File

@ -169,7 +169,7 @@ public class ReadUtils {
* @return whether or not the base is in the adaptor
*/
public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
final int adaptorBoundary = getAdaptorBoundary(read);
final int adaptorBoundary = read.getAdaptorBoundary();
if (adaptorBoundary == CANNOT_COMPUTE_ADAPTOR_BOUNDARY || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
return false;

View File

@ -467,19 +467,6 @@ public class ActivityProfileUnitTest extends BaseTest {
return -1;
}
// private int findCutSite(final List<Double> probs) {
// for ( int i = probs.size() - 2; i > 0; i-- ) {
// double prev = probs.get(i + 1);
// double next = probs.get(i-1);
// double cur = probs.get(i);
// if ( cur < next && cur < prev )
// return i + 1;
// }
//
// return -1;
// }
@Test(dataProvider = "ActiveRegionCutTests")
public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List<Double> probs) {
final ActivityProfile profile = new ActivityProfile(genomeLocParser);

View File

@ -51,7 +51,7 @@ import java.util.*;
* testing of the new (non-legacy) version of LocusIteratorByState
*/
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
private static final boolean DEBUG = false;
private static final boolean DEBUG = true;
protected LocusIteratorByState li;
@Test(enabled = true)
@ -686,4 +686,84 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
return read;
}
}
// ---------------------------------------------------------------------------
//
// make sure that adapter clipping is working properly in LIBS
//
// ---------------------------------------------------------------------------
@DataProvider(name = "AdapterClippingTest")
public Object[][] makeAdapterClippingTest() {
final List<Object[]> tests = new LinkedList<Object[]>();
final int start = 10;
// for ( final int goodBases : Arrays.asList(10) ) {
// for ( final int nClipsOnTheRight : Arrays.asList(0)) {
for ( final int goodBases : Arrays.asList(10, 20, 30) ) {
for ( final int nClips : Arrays.asList(0, 1, 2, 10)) {
for ( final boolean onLeft : Arrays.asList(true, false) ) {
final int readLength = nClips + goodBases;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
read.setCigarString(readLength + "M");
if ( onLeft ) {
read.setReadNegativeStrandFlag(true);
read.setMateAlignmentStart(start + nClips);
read.setInferredInsertSize(readLength);
tests.add(new Object[]{nClips, goodBases, 0, read});
} else {
read.setReadNegativeStrandFlag(false);
read.setMateAlignmentStart(start - 1);
read.setInferredInsertSize(goodBases - 1);
tests.add(new Object[]{0, goodBases, nClips, read});
}
}
}
}
// for ( final int nClipsOnTheLeft : Arrays.asList(0, 1, 2, 10)) {
// final int readLength = nClipsOnTheLeft + goodBases;
// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1" , 0, start, readLength);
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
// read.setBaseQualities(Utils.dupBytes((byte) '@', readLength));
// read.setCigarString(readLength + "M");
// read.setReadNegativeStrandFlag(true);
//
// read.setMateAlignmentStart(start + nClipsOnTheLeft);
// read.setInferredInsertSize(readLength);
//
// tests.add(new Object[]{nClipsOnTheLeft, goodBases, 0, read});
// }
// }
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "AdapterClippingTest")
// @Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads", timeOut = 100000)
public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainingPileups, final int nClipsOnRight, final GATKSAMRecord read) {
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(Collections.singletonList(read).iterator()),
createTestReadProperties(DownsamplingMethod.NONE, false),
genomeLocParser,
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
int expectedPos = read.getAlignmentStart() + nClipsOnLeft;
int nPileups = 0;
while ( li.hasNext() ) {
final AlignmentContext next = li.next();
Assert.assertEquals(next.getLocation().getStart(), expectedPos);
// if ( nPileups < nClipsOnLeft || nPileups > (nClipsOnLeft + nReadContainingPileups) )
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 0, "Expected empty pileups when the read is in the adapter clipping zone at " + nPileups);
// else
// Assert.assertEquals(next.getBasePileup().getNumberOfElements(), 1, "Expected a pileups with 1 element when the read is within the good part of the read at " + nPileups);
nPileups++;
expectedPos++;
}
final int nExpectedPileups = nReadContainingPileups;
Assert.assertEquals(nPileups, nExpectedPileups, "Wrong number of pileups seen");
}
}

View File

@ -27,66 +27,99 @@ package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
public class ReadUtilsUnitTest extends BaseTest {
@Test
public void testGetAdaptorBoundary() {
private interface GetAdaptorFunc {
public int getAdaptor(final GATKSAMRecord record);
}
@DataProvider(name = "AdaptorGetter")
public Object[][] makeActiveRegionCutTests() {
final List<Object[]> tests = new LinkedList<Object[]>();
tests.add( new Object[]{ new GetAdaptorFunc() {
@Override public int getAdaptor(final GATKSAMRecord record) { return ReadUtils.getAdaptorBoundary(record); }
}});
tests.add( new Object[]{ new GetAdaptorFunc() {
@Override public int getAdaptor(final GATKSAMRecord record) { return record.getAdaptorBoundary(); }
}});
return tests.toArray(new Object[][]{});
}
private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) {
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
final String cigar = "8M";
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
read.setMateAlignmentStart(mateStart);
read.setInferredInsertSize(fragmentSize);
return read;
}
@Test(dataProvider = "AdaptorGetter")
public void testGetAdaptorBoundary(final GetAdaptorFunc get) {
final int fragmentSize = 10;
final int mateStart = 1000;
final int BEFORE = mateStart - 2;
final int AFTER = mateStart + 2;
int myStart, boundary;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
read.setMateAlignmentStart(mateStart);
read.setInferredInsertSize(fragmentSize);
GATKSAMRecord read;
// Test case 1: positive strand, first read
read = makeRead(fragmentSize, mateStart);
myStart = BEFORE;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
// Test case 2: positive strand, second read
read = makeRead(fragmentSize, mateStart);
myStart = AFTER;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
// Test case 3: negative strand, second read
read = makeRead(fragmentSize, mateStart);
myStart = AFTER;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, mateStart - 1);
// Test case 4: negative strand, first read
read = makeRead(fragmentSize, mateStart);
myStart = BEFORE;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, mateStart - 1);
// Test case 5: mate is mapped to another chromosome (test both strands)
read = makeRead(fragmentSize, mateStart);
read.setInferredInsertSize(0);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
read.setInferredInsertSize(10);
// Test case 6: read is unmapped
read = makeRead(fragmentSize, mateStart);
read.setReadUnmappedFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
read.setReadUnmappedFlag(false);
@ -94,19 +127,22 @@ public class ReadUtilsUnitTest extends BaseTest {
// <--------|
// |------>
// first read:
read = makeRead(fragmentSize, mateStart);
myStart = 980;
read.setAlignmentStart(myStart);
read.setInferredInsertSize(20);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
// second read:
read = makeRead(fragmentSize, mateStart);
myStart = 1000;
read.setAlignmentStart(myStart);
read.setInferredInsertSize(20);
read.setMateAlignmentStart(980);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
}
}