Misc bug fixes: fix tracking of nInsertions with sample-split pileup constructor. Fix performance

issue building up pileups from pileups of individual sample data.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3598 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-20 20:32:27 +00:00
parent f18ac069e2
commit 1d50fc7087
5 changed files with 43 additions and 16 deletions

View File

@ -359,7 +359,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base // In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position. // and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) { if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> fullExtendedEventPileup = new HashMap<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>>(); Map<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>> fullExtendedEventPileup =
new HashMap<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>>();
SAMRecordState our1stState = readStates.getFirst(); SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over // get current location on the reference and decrement it by 1: the indels we just stepped over
@ -411,9 +412,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
if ( state.getRead().getMappingQuality() == 0 ) { if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++; nMQ0Reads++;
} }
// TODO: sample split!
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
} }
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
} }
hasExtendedEvents = false; // we are done with extended events prior to current ref base hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc); // System.out.println("Indel(s) at "+loc);
@ -421,7 +421,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup)); nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup));
} else { } else {
GenomeLoc location = getLocation(); GenomeLoc location = getLocation();
Map<String,AbstractReadBackedPileup<?,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<?,PileupElement>>(); Map<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>>();
// todo -- performance problem -- should be lazy, really // todo -- performance problem -- should be lazy, really
for(String sampleName: sampleNames) { for(String sampleName: sampleNames) {

View File

@ -113,6 +113,16 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
calculateCachedData(); calculateCachedData();
} }
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
this.loc = loc;
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
for(Map.Entry<String,AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
addPileupToCumulativeStats(pileupEntry.getValue());
}
this.pileupElementTracker = tracker;
}
/** /**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
@ -134,6 +144,12 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
} }
} }
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP,PE> pileup) {
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
/** /**
* Helper routine for converting reads and offset lists to a PileupElement list. * Helper routine for converting reads and offset lists to a PileupElement list.
* *

View File

@ -44,7 +44,7 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator
for(String sampleName: tracker.getSamples()) { for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sampleName); PileupElementTracker<PE> trackerPerSample = tracker.getElements(sampleName);
if(trackerPerSample.size() != 0) if(trackerPerSample.size() != 0)
perSampleIterators.add(new PeekableIterator<PE>(tracker.iterator())); perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));
} }
} }

View File

@ -32,8 +32,8 @@ import java.util.*;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileup, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup { public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileup, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
private int nInsertions = 0; private int nInsertions;
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) { public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) {
super(loc,pileupElements); super(loc,pileupElements);
@ -55,8 +55,8 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
this.nInsertions = nInsertions; this.nInsertions = nInsertions;
} }
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> pileupElementsBySample) { public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<ExtendedEventPileupElement>(pileupElementsBySample)); super(loc,pileupElementsBySample);
} }
/** /**
@ -68,15 +68,26 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
protected void calculateCachedData() { protected void calculateCachedData() {
super.calculateCachedData(); super.calculateCachedData();
nDeletions = 0; nInsertions = 0;
maxDeletionLength = 0; nMQ0Reads = 0;
for ( ExtendedEventPileupElement p : this.toExtendedIterable()) {
for ( ExtendedEventPileupElement p : this.toExtendedIterable() ) {
if ( p.isDeletion() ) { if ( p.isDeletion() ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
} else {
if ( p.isInsertion() ) nInsertions++;
} }
} }
} }
@Override
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement> pileup) {
super.addPileupToCumulativeStats(pileup);
ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup)pileup);
this.nInsertions += extendedEventPileup.getNumberOfInsertions();
this.maxDeletionLength += extendedEventPileup.getMaxDeletionLength();
}
@Override @Override
protected ReadBackedExtendedEventPileup createNewPileup(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) { protected ReadBackedExtendedEventPileup createNewPileup(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {

View File

@ -47,8 +47,8 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
super(loc,pileupElements); super(loc,pileupElements);
} }
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,PileupElement>> pileupElementsBySample) { public ReadBackedPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<PileupElement>(pileupElementsBySample)); super(loc,pileupElementsBySample);
} }
/** /**