Misc bug fixes: fix tracking of nInsertions with sample-split pileup constructor. Fix performance

issue building up pileups from pileups of individual sample data.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3598 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-20 20:32:27 +00:00
parent f18ac069e2
commit 1d50fc7087
5 changed files with 43 additions and 16 deletions

View File

@ -359,7 +359,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> fullExtendedEventPileup = new HashMap<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>>();
Map<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>> fullExtendedEventPileup =
new HashMap<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>>();
SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over
@ -411,9 +412,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
// TODO: sample split!
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
}
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc);
@ -421,7 +421,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup));
} else {
GenomeLoc location = getLocation();
Map<String,AbstractReadBackedPileup<?,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<?,PileupElement>>();
Map<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>>();
// todo -- performance problem -- should be lazy, really
for(String sampleName: sampleNames) {

View File

@ -113,6 +113,16 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
calculateCachedData();
}
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
this.loc = loc;
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
for(Map.Entry<String,AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
addPileupToCumulativeStats(pileupEntry.getValue());
}
this.pileupElementTracker = tracker;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
@ -134,6 +144,12 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
}
}
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP,PE> pileup) {
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*

View File

@ -44,7 +44,7 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sampleName);
if(trackerPerSample.size() != 0)
perSampleIterators.add(new PeekableIterator<PE>(tracker.iterator()));
perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));
}
}

View File

@ -32,8 +32,8 @@ import java.util.*;
import net.sf.samtools.SAMRecord;
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileup, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
private int nInsertions = 0;
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
private int nInsertions;
private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) {
super(loc,pileupElements);
@ -55,8 +55,8 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
this.nInsertions = nInsertions;
}
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<ExtendedEventPileupElement>(pileupElementsBySample));
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}
/**
@ -68,15 +68,26 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
protected void calculateCachedData() {
super.calculateCachedData();
nDeletions = 0;
maxDeletionLength = 0;
for ( ExtendedEventPileupElement p : this.toExtendedIterable()) {
nInsertions = 0;
nMQ0Reads = 0;
for ( ExtendedEventPileupElement p : this.toExtendedIterable() ) {
if ( p.isDeletion() ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
} else {
if ( p.isInsertion() ) nInsertions++;
}
}
}
}
@Override
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement> pileup) {
super.addPileupToCumulativeStats(pileup);
ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup)pileup);
this.nInsertions += extendedEventPileup.getNumberOfInsertions();
this.maxDeletionLength += extendedEventPileup.getMaxDeletionLength();
}
@Override
protected ReadBackedExtendedEventPileup createNewPileup(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {

View File

@ -47,8 +47,8 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
super(loc,pileupElements);
}
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,PileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<PileupElement>(pileupElementsBySample));
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}
/**