ReadBackedPileup cleanup
-- Only ReadBackedPileupImpl (concrete class) and ReadBackedPileup (interface) live, moved all functionality of AbstractReadBackedPileup into the impl -- ReadBackedPileupImpl was literally a shell class after we removed extended events. A few bits of code cleanup and we reduced a bunch of class complexity in the gatk -- ReadBackedPileups no longer accept pre-cached values (size, nMapQ reads, etc) but now lazy load these values as needed -- Created optimized calculation routines to iterator over all of the reads in the pileup in whatever order is most efficient as well. -- New LIBS no longer calculates size, n mapq, and n deletion reads while making pileups. -- Added commons-collections for IteratorChain
This commit is contained in:
parent
e3e3ae29b2
commit
9e23c592e6
1
ivy.xml
1
ivy.xml
|
|
@ -61,6 +61,7 @@
|
||||||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||||
<dependency org="commons-io" name="commons-io" rev="2.1"/>
|
<dependency org="commons-io" name="commons-io" rev="2.1"/>
|
||||||
|
<dependency org="commons-collections" name="commons-collections" rev="3.2.1"/>
|
||||||
<dependency org="org.apache.commons" name="commons-math" rev="2.2"/>
|
<dependency org="org.apache.commons" name="commons-math" rev="2.2"/>
|
||||||
|
|
||||||
<!-- Lucene core utilities -->
|
<!-- Lucene core utilities -->
|
||||||
|
|
|
||||||
|
|
@ -242,39 +242,30 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final Iterator<AlignmentStateMachine> iterator = readStates.iterator(sample);
|
final Iterator<AlignmentStateMachine> iterator = readStates.iterator(sample);
|
||||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||||
|
|
||||||
int size = 0; // number of elements in this sample's pileup
|
|
||||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
|
||||||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
final AlignmentStateMachine state = iterator.next(); // state object with the read/offset information
|
// state object with the read/offset information
|
||||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
final AlignmentStateMachine state = iterator.next();
|
||||||
final CigarOperator op = state.getCigarOperator(); // current cigar operator
|
final GATKSAMRecord read = (GATKSAMRecord) state.getRead();
|
||||||
|
final CigarOperator op = state.getCigarOperator();
|
||||||
|
|
||||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!dontIncludeReadInPileup(read, location.getStart())) {
|
if (!dontIncludeReadInPileup(read, location.getStart())) {
|
||||||
if ( op == CigarOperator.D ) {
|
if ( ! includeReadsWithDeletionAtLoci && op == CigarOperator.D ) {
|
||||||
if ( ! includeReadsWithDeletionAtLoci )
|
continue;
|
||||||
continue;
|
|
||||||
nDeletions++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pile.add(state.makePileupElement());
|
pile.add(state.makePileupElement());
|
||||||
size++;
|
|
||||||
|
|
||||||
if ( read.getMappingQuality() == 0 )
|
|
||||||
nMQ0Reads++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
if (! pile.isEmpty() ) // if this pileup added at least one base, add it to the full pileup
|
||||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile));
|
||||||
}
|
}
|
||||||
|
|
||||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.pileup;
|
package org.broadinstitute.sting.utils.pileup;
|
||||||
|
|
||||||
|
import org.apache.commons.collections.iterators.IteratorChain;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -35,6 +37,20 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
abstract class PileupElementTracker<PE extends PileupElement> implements Iterable<PE> {
|
abstract class PileupElementTracker<PE extends PileupElement> implements Iterable<PE> {
|
||||||
public abstract int size();
|
public abstract int size();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate through the PEs here, but in any order, which may improve performance
|
||||||
|
* if you don't care about the underlying order the reads are coming to you in.
|
||||||
|
* @return an iteratable over all pileup elements in this tracker
|
||||||
|
*/
|
||||||
|
public abstract Iterable<PE> unorderedIterable();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as @see #unorderedIterable but the actual iterator itself
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Iterator<PE> unorderedIterator() { return unorderedIterable().iterator(); }
|
||||||
|
|
||||||
public abstract PileupElementTracker<PE> copy();
|
public abstract PileupElementTracker<PE> copy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -65,6 +81,7 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<PE> iterator() { return pileup.iterator(); }
|
public Iterator<PE> iterator() { return pileup.iterator(); }
|
||||||
|
public Iterable<PE> unorderedIterable() { return this; }
|
||||||
}
|
}
|
||||||
|
|
||||||
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
|
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
|
||||||
|
|
@ -113,4 +130,25 @@ class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElem
|
||||||
public int size() {
|
public int size() {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Iterable<PE> unorderedIterable() {
|
||||||
|
return new Iterable<PE>() {
|
||||||
|
@Override
|
||||||
|
public Iterator<PE> iterator() {
|
||||||
|
return new Iterator<PE>() {
|
||||||
|
final private IteratorChain chain = new IteratorChain();
|
||||||
|
|
||||||
|
{ // initialize the chain with the unordered iterators of the per sample pileups
|
||||||
|
for ( PileupElementTracker<PE> pet : pileup.values() ) {
|
||||||
|
chain.addIterator(pet.unorderedIterator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Override public boolean hasNext() { return chain.hasNext(); }
|
||||||
|
@Override public PE next() { return (PE)chain.next(); }
|
||||||
|
@Override public void remove() { throw new UnsupportedOperationException("Cannot remove"); }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -25,12 +25,18 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.pileup;
|
package org.broadinstitute.sting.utils.pileup;
|
||||||
|
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -39,6 +45,17 @@ import java.util.*;
|
||||||
* Test routines for read-backed pileup.
|
* Test routines for read-backed pileup.
|
||||||
*/
|
*/
|
||||||
public class ReadBackedPileupUnitTest {
|
public class ReadBackedPileupUnitTest {
|
||||||
|
protected static SAMFileHeader header;
|
||||||
|
protected GenomeLocParser genomeLocParser;
|
||||||
|
private GenomeLoc loc;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void beforeClass() {
|
||||||
|
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
|
loc = genomeLocParser.createGenomeLoc("chr1", 1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure that basic read group splitting works.
|
* Ensure that basic read group splitting works.
|
||||||
*/
|
*/
|
||||||
|
|
@ -195,4 +212,98 @@ public class ReadBackedPileupUnitTest {
|
||||||
missingSamplePileup = pileup.getPileupForSample("not here");
|
missingSamplePileup = pileup.getPileupForSample("not here");
|
||||||
Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
|
Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int sampleI = 0;
|
||||||
|
private class RBPCountTest {
|
||||||
|
final String sample;
|
||||||
|
final int nReads, nMapq0, nDeletions;
|
||||||
|
|
||||||
|
private RBPCountTest(int nReads, int nMapq0, int nDeletions) {
|
||||||
|
this.sample = "sample" + sampleI++;
|
||||||
|
this.nReads = nReads;
|
||||||
|
this.nMapq0 = nMapq0;
|
||||||
|
this.nDeletions = nDeletions;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<PileupElement> makeReads( final int n, final int mapq, final String op ) {
|
||||||
|
final int readLength = 3;
|
||||||
|
|
||||||
|
final List<PileupElement> elts = new LinkedList<PileupElement>();
|
||||||
|
for ( int i = 0; i < n; i++ ) {
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||||
|
read.setCigarString("1M1" + op + "1M");
|
||||||
|
read.setMappingQuality(mapq);
|
||||||
|
final int baseOffset = op.equals("M") ? 1 : 0;
|
||||||
|
final CigarElement cigarElement = read.getCigar().getCigarElement(1);
|
||||||
|
elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return elts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ReadBackedPileupImpl makePileup() {
|
||||||
|
final List<PileupElement> elts = new LinkedList<PileupElement>();
|
||||||
|
|
||||||
|
elts.addAll(makeReads(nMapq0, 0, "M"));
|
||||||
|
elts.addAll(makeReads(nDeletions, 30, "D"));
|
||||||
|
elts.addAll(makeReads(nReads - nMapq0 - nDeletions, 30, "M"));
|
||||||
|
|
||||||
|
return new ReadBackedPileupImpl(loc, elts);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "RBPCountTest{" +
|
||||||
|
"sample='" + sample + '\'' +
|
||||||
|
", nReads=" + nReads +
|
||||||
|
", nMapq0=" + nMapq0 +
|
||||||
|
", nDeletions=" + nDeletions +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "RBPCountingTest")
|
||||||
|
public Object[][] makeRBPCountingTest() {
|
||||||
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
|
for ( final int nMapq : Arrays.asList(0, 10, 20) ) {
|
||||||
|
for ( final int nDeletions : Arrays.asList(0, 10, 20) ) {
|
||||||
|
for ( final int nReg : Arrays.asList(0, 10, 20) ) {
|
||||||
|
final int total = nMapq + nDeletions + nReg;
|
||||||
|
if ( total > 0 )
|
||||||
|
tests.add(new Object[]{new RBPCountTest(total, nMapq, nDeletions)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "RBPCountingTest")
|
||||||
|
public void testRBPCountingTestSinglePileup(RBPCountTest params) {
|
||||||
|
testRBPCounts(params.makePileup(), params);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "RBPCountingTest")
|
||||||
|
public void testRBPCountingTestMultiSample(RBPCountTest params) {
|
||||||
|
final RBPCountTest newSample = new RBPCountTest(2, 1, 1);
|
||||||
|
final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<String, ReadBackedPileupImpl>();
|
||||||
|
pileupsBySample.put(newSample.sample, newSample.makePileup());
|
||||||
|
pileupsBySample.put(params.sample, params.makePileup());
|
||||||
|
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample);
|
||||||
|
testRBPCounts(pileup, new RBPCountTest(params.nReads + 2, params.nMapq0 + 1, params.nDeletions + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void testRBPCounts(final ReadBackedPileup rbp, RBPCountTest expected) {
|
||||||
|
for ( int cycles = 0; cycles < 3; cycles++ ) {
|
||||||
|
// multiple cycles to make sure caching is working
|
||||||
|
Assert.assertEquals(rbp.getNumberOfElements(), expected.nReads);
|
||||||
|
Assert.assertEquals(rbp.depthOfCoverage(), expected.nReads);
|
||||||
|
Assert.assertEquals(rbp.getNumberOfDeletions(), expected.nDeletions);
|
||||||
|
Assert.assertEquals(rbp.getNumberOfMappingQualityZeroReads(), expected.nMapq0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue