ReadBackedPileup cleanup
-- Only ReadBackedPileupImpl (concrete class) and ReadBackedPileup (interface) live, moved all functionality of AbstractReadBackedPileup into the impl -- ReadBackedPileupImpl was literally a shell class after we removed extended events. A few bits of code cleanup and we reduced a bunch of class complexity in the gatk -- ReadBackedPileups no longer accept pre-cached values (size, nMapQ reads, etc) but now lazy load these values as needed -- Created optimized calculation routines to iterator over all of the reads in the pileup in whatever order is most efficient as well. -- New LIBS no longer calculates size, n mapq, and n deletion reads while making pileups. -- Added commons-collections for IteratorChain
This commit is contained in:
parent
e3e3ae29b2
commit
9e23c592e6
1
ivy.xml
1
ivy.xml
|
|
@ -61,6 +61,7 @@
|
|||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="2.1"/>
|
||||
<dependency org="commons-collections" name="commons-collections" rev="3.2.1"/>
|
||||
<dependency org="org.apache.commons" name="commons-math" rev="2.2"/>
|
||||
|
||||
<!-- Lucene core utilities -->
|
||||
|
|
|
|||
|
|
@ -242,39 +242,30 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
final Iterator<AlignmentStateMachine> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final AlignmentStateMachine state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCigarOperator(); // current cigar operator
|
||||
// state object with the read/offset information
|
||||
final AlignmentStateMachine state = iterator.next();
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead();
|
||||
final CigarOperator op = state.getCigarOperator();
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (!dontIncludeReadInPileup(read, location.getStart())) {
|
||||
if ( op == CigarOperator.D ) {
|
||||
if ( ! includeReadsWithDeletionAtLoci )
|
||||
continue;
|
||||
nDeletions++;
|
||||
if ( ! includeReadsWithDeletionAtLoci && op == CigarOperator.D ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pile.add(state.makePileupElement());
|
||||
size++;
|
||||
|
||||
if ( read.getMappingQuality() == 0 )
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
if (! pile.isEmpty() ) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.apache.commons.collections.iterators.IteratorChain;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -35,6 +37,20 @@ import java.util.*;
|
|||
*/
|
||||
abstract class PileupElementTracker<PE extends PileupElement> implements Iterable<PE> {
|
||||
public abstract int size();
|
||||
|
||||
/**
|
||||
* Iterate through the PEs here, but in any order, which may improve performance
|
||||
* if you don't care about the underlying order the reads are coming to you in.
|
||||
* @return an iteratable over all pileup elements in this tracker
|
||||
*/
|
||||
public abstract Iterable<PE> unorderedIterable();
|
||||
|
||||
/**
|
||||
* Same as @see #unorderedIterable but the actual iterator itself
|
||||
* @return
|
||||
*/
|
||||
public Iterator<PE> unorderedIterator() { return unorderedIterable().iterator(); }
|
||||
|
||||
public abstract PileupElementTracker<PE> copy();
|
||||
}
|
||||
|
||||
|
|
@ -65,6 +81,7 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
|
|||
}
|
||||
|
||||
public Iterator<PE> iterator() { return pileup.iterator(); }
|
||||
public Iterable<PE> unorderedIterable() { return this; }
|
||||
}
|
||||
|
||||
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
|
||||
|
|
@ -113,4 +130,25 @@ class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElem
|
|||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
public Iterable<PE> unorderedIterable() {
|
||||
return new Iterable<PE>() {
|
||||
@Override
|
||||
public Iterator<PE> iterator() {
|
||||
return new Iterator<PE>() {
|
||||
final private IteratorChain chain = new IteratorChain();
|
||||
|
||||
{ // initialize the chain with the unordered iterators of the per sample pileups
|
||||
for ( PileupElementTracker<PE> pet : pileup.values() ) {
|
||||
chain.addIterator(pet.unorderedIterator());
|
||||
}
|
||||
}
|
||||
@Override public boolean hasNext() { return chain.hasNext(); }
|
||||
@Override public PE next() { return (PE)chain.next(); }
|
||||
@Override public void remove() { throw new UnsupportedOperationException("Cannot remove"); }
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -25,12 +25,18 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -39,6 +45,17 @@ import java.util.*;
|
|||
* Test routines for read-backed pileup.
|
||||
*/
|
||||
public class ReadBackedPileupUnitTest {
|
||||
protected static SAMFileHeader header;
|
||||
protected GenomeLocParser genomeLocParser;
|
||||
private GenomeLoc loc;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
loc = genomeLocParser.createGenomeLoc("chr1", 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that basic read group splitting works.
|
||||
*/
|
||||
|
|
@ -195,4 +212,98 @@ public class ReadBackedPileupUnitTest {
|
|||
missingSamplePileup = pileup.getPileupForSample("not here");
|
||||
Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
|
||||
}
|
||||
}
|
||||
|
||||
private static int sampleI = 0;
|
||||
private class RBPCountTest {
|
||||
final String sample;
|
||||
final int nReads, nMapq0, nDeletions;
|
||||
|
||||
private RBPCountTest(int nReads, int nMapq0, int nDeletions) {
|
||||
this.sample = "sample" + sampleI++;
|
||||
this.nReads = nReads;
|
||||
this.nMapq0 = nMapq0;
|
||||
this.nDeletions = nDeletions;
|
||||
}
|
||||
|
||||
private List<PileupElement> makeReads( final int n, final int mapq, final String op ) {
|
||||
final int readLength = 3;
|
||||
|
||||
final List<PileupElement> elts = new LinkedList<PileupElement>();
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||
read.setCigarString("1M1" + op + "1M");
|
||||
read.setMappingQuality(mapq);
|
||||
final int baseOffset = op.equals("M") ? 1 : 0;
|
||||
final CigarElement cigarElement = read.getCigar().getCigarElement(1);
|
||||
elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0));
|
||||
}
|
||||
|
||||
return elts;
|
||||
}
|
||||
|
||||
private ReadBackedPileupImpl makePileup() {
|
||||
final List<PileupElement> elts = new LinkedList<PileupElement>();
|
||||
|
||||
elts.addAll(makeReads(nMapq0, 0, "M"));
|
||||
elts.addAll(makeReads(nDeletions, 30, "D"));
|
||||
elts.addAll(makeReads(nReads - nMapq0 - nDeletions, 30, "M"));
|
||||
|
||||
return new ReadBackedPileupImpl(loc, elts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "RBPCountTest{" +
|
||||
"sample='" + sample + '\'' +
|
||||
", nReads=" + nReads +
|
||||
", nMapq0=" + nMapq0 +
|
||||
", nDeletions=" + nDeletions +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RBPCountingTest")
|
||||
public Object[][] makeRBPCountingTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
for ( final int nMapq : Arrays.asList(0, 10, 20) ) {
|
||||
for ( final int nDeletions : Arrays.asList(0, 10, 20) ) {
|
||||
for ( final int nReg : Arrays.asList(0, 10, 20) ) {
|
||||
final int total = nMapq + nDeletions + nReg;
|
||||
if ( total > 0 )
|
||||
tests.add(new Object[]{new RBPCountTest(total, nMapq, nDeletions)});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "RBPCountingTest")
|
||||
public void testRBPCountingTestSinglePileup(RBPCountTest params) {
|
||||
testRBPCounts(params.makePileup(), params);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "RBPCountingTest")
|
||||
public void testRBPCountingTestMultiSample(RBPCountTest params) {
|
||||
final RBPCountTest newSample = new RBPCountTest(2, 1, 1);
|
||||
final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<String, ReadBackedPileupImpl>();
|
||||
pileupsBySample.put(newSample.sample, newSample.makePileup());
|
||||
pileupsBySample.put(params.sample, params.makePileup());
|
||||
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample);
|
||||
testRBPCounts(pileup, new RBPCountTest(params.nReads + 2, params.nMapq0 + 1, params.nDeletions + 1));
|
||||
}
|
||||
|
||||
|
||||
private void testRBPCounts(final ReadBackedPileup rbp, RBPCountTest expected) {
|
||||
for ( int cycles = 0; cycles < 3; cycles++ ) {
|
||||
// multiple cycles to make sure caching is working
|
||||
Assert.assertEquals(rbp.getNumberOfElements(), expected.nReads);
|
||||
Assert.assertEquals(rbp.depthOfCoverage(), expected.nReads);
|
||||
Assert.assertEquals(rbp.getNumberOfDeletions(), expected.nDeletions);
|
||||
Assert.assertEquals(rbp.getNumberOfMappingQualityZeroReads(), expected.nMapq0);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue