Added hasPileupBeenDownsampled() boolean to ReadBackedPileup, so that a pileup can report whether or not (but not how much) it's been downsampled.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3649 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-28 04:56:33 +00:00
parent 8e848ccd84
commit 3a9d426ca8
5 changed files with 93 additions and 14 deletions

View File

@ -370,6 +370,7 @@ public class LocusIteratorByState extends LocusIterator {
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
boolean hasBeenSampled = loc.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent();
size = 0;
nDeletions = 0;
@ -413,7 +414,7 @@ public class LocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
}
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads,hasBeenSampled));
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc);
@ -427,6 +428,7 @@ public class LocusIteratorByState extends LocusIterator {
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
boolean hasBeenSampled = location.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent();
size = 0;
nDeletions = 0;
@ -456,7 +458,7 @@ public class LocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
}
if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads,hasBeenSampled));
}
updateReadStates(); // critical - must be called after we get the current state offsets and location
@ -570,7 +572,9 @@ public class LocusIteratorByState extends LocusIterator {
for(String sampleName: sampleNames)
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
ReadSelector primaryReadSelector= samplePartitioner;
ReadSelector primaryReadSelector = samplePartitioner;
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)
primaryReadSelector = new NRandomReadSelector(primaryReadSelector,targetCoverage);
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
}
@ -644,6 +648,7 @@ public class LocusIteratorByState extends LocusIterator {
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
int numReads = statesBySample.size();
int downsamplingExtent = aggregator.getDownsamplingExtent();
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
long readLimit = aggregator.getNumReadsSeen();
@ -653,6 +658,7 @@ public class LocusIteratorByState extends LocusIterator {
mrlViolation = true;
}
addReadsToSample(statesBySample,newReads,readLimit,mrlViolation);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
else {
int[] counts = statesBySample.getCountsPerAlignmentStart();
@ -688,8 +694,10 @@ public class LocusIteratorByState extends LocusIterator {
readOffset += counts[i];
}
statesBySample.purge(toPurge);
downsamplingExtent = Math.max(downsamplingExtent,statesBySample.purge(toPurge));
addReadsToSample(statesBySample,newReads,targetCoverage-numReads,false);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
}
chainedReadSelector.reset();
@ -737,6 +745,7 @@ public class LocusIteratorByState extends LocusIterator {
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
private int downsamplingExtent = 0;
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
readStates.addAll(states);
@ -756,6 +765,14 @@ public class LocusIteratorByState extends LocusIterator {
return readStates.size();
}
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
this.downsamplingExtent = Math.max(this.downsamplingExtent,downsamplingExtent);
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public int[] getCountsPerAlignmentStart() {
int[] counts = new int[readStateCounter.size()];
int index = 0;
@ -790,9 +807,12 @@ public class LocusIteratorByState extends LocusIterator {
* Purge the given elements from the bitset. If an element in the bitset is true, purge
* the corresponding read state.
* @param elements bits from the set to purge.
* @return the extent of the final downsampled read.
*/
public void purge(final BitSet elements) {
if(elements.isEmpty() || readStates.isEmpty()) return;
public int purge(final BitSet elements) {
int downsamplingExtent = 0;
if(elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
@ -806,7 +826,8 @@ public class LocusIteratorByState extends LocusIterator {
int removedCount = 0;
while(readStateIterator.hasNext() && toPurge >= 0) {
readStateIterator.next();
SAMRecordState state = readStateIterator.next();
downsamplingExtent = Math.max(downsamplingExtent,state.getRead().getAlignmentEnd());
if(readIndex == toPurge) {
readStateIterator.remove();
@ -826,6 +847,8 @@ public class LocusIteratorByState extends LocusIterator {
}
totalReadStates -= removedCount;
return downsamplingExtent;
}
}
}
@ -884,6 +907,14 @@ interface ReadSelector {
*/
public long getNumReadsSelected();
/**
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
*/
public int getDownsamplingExtent();
/**
* Get the reads selected by this selector.
* @return collection of reads selected by this selector.
@ -905,6 +936,7 @@ class FirstNReadSelector implements ReadSelector {
private final Collection<SAMRecord> selectedReads = new LinkedList<SAMRecord>();
private final long readLimit;
private long readsSeen = 0;
private int downsamplingExtent = 0;
public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) {
this.chainedSelector = chainedSelector;
@ -918,8 +950,10 @@ class FirstNReadSelector implements ReadSelector {
chainedSelector.submitRead(read);
}
else
if(chainedSelector != null)
if(chainedSelector != null) {
chainedSelector.notifyReadRejected(read);
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
}
readsSeen++;
}
@ -942,6 +976,10 @@ class FirstNReadSelector implements ReadSelector {
return selectedReads.size();
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return selectedReads;
}
@ -949,6 +987,7 @@ class FirstNReadSelector implements ReadSelector {
public void reset() {
selectedReads.clear();
readsSeen = 0;
downsamplingExtent = 0;
if(chainedSelector != null)
chainedSelector.reset();
}
@ -961,6 +1000,7 @@ class NRandomReadSelector implements ReadSelector {
private final ReservoirDownsampler<SAMRecord> reservoir;
private final ReadSelector chainedSelector;
private long readsSeen = 0;
private int downsamplingExtent = 0;
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
this.reservoir = new ReservoirDownsampler<SAMRecord>((int)readLimit);
@ -969,8 +1009,10 @@ class NRandomReadSelector implements ReadSelector {
public void submitRead(SAMRecord read) {
SAMRecord displaced = reservoir.add(read);
if(displaced != null && chainedSelector != null)
if(displaced != null && chainedSelector != null) {
chainedSelector.notifyReadRejected(read);
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
}
readsSeen++;
}
@ -994,12 +1036,17 @@ class NRandomReadSelector implements ReadSelector {
return reservoir.size();
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reservoir.getDownsampledContents();
}
public void reset() {
reservoir.clear();
downsamplingExtent = 0;
if(chainedSelector != null)
chainedSelector.reset();
}
@ -1041,6 +1088,13 @@ class SamplePartitioner implements ReadSelector {
return readsSeen;
}
public int getDownsamplingExtent() {
int downsamplingExtent = 0;
for(SampleStorage storage: readsBySample.values())
downsamplingExtent = Math.max(downsamplingExtent,storage.downsamplingExtent);
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
}
@ -1060,6 +1114,7 @@ class SamplePartitioner implements ReadSelector {
private class SampleStorage implements ReadSelector {
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
private long readsSeen = 0;
private int downsamplingExtent = 0;
public void submitRead(SAMRecord read) {
reads.add(read);
@ -1068,6 +1123,7 @@ class SamplePartitioner implements ReadSelector {
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
}
public void complete() {
@ -1082,6 +1138,10 @@ class SamplePartitioner implements ReadSelector {
return readsSeen;
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reads;
}
@ -1089,6 +1149,7 @@ class SamplePartitioner implements ReadSelector {
public void reset() {
reads.clear();
readsSeen = 0;
downsamplingExtent = 0;
}
}
}

View File

@ -46,6 +46,7 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
protected int size = 0; // cached value of the size of the pileup
protected int nDeletions = 0; // cached value of the number of deletions
protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads
protected boolean hasPileupBeenDownsampled;
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
@ -95,7 +96,7 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
* @param loc
* @param pileup
*/
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup, int size, int nDeletions, int nMQ0Reads ) {
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup, int size, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
@ -104,6 +105,7 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
this.hasPileupBeenDownsampled = hasPileupBeenDownsampled;
}
@ -148,6 +150,7 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
hasPileupBeenDownsampled |= pileup.hasPileupBeenDownsampled();
}
/**
@ -560,6 +563,11 @@ public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE e
return loc;
}
@Override
public boolean hasPileupBeenDownsampled() {
return hasPileupBeenDownsampled;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return

View File

@ -34,6 +34,10 @@ import net.sf.samtools.SAMRecord;
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileup, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
private int nInsertions;
private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site
/**
* True if this pileup has been downsampled due to excessive coverage depth. False otherwise.
*/
private boolean hasBeenDownsampled;
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) {
super(loc,pileupElements);
@ -49,8 +53,8 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
* @param pileup
*/
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
super(loc,pileup,size,nDeletions,nMQ0Reads);
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) {
super(loc,pileup,size,nDeletions,nMQ0Reads,hasPileupBeenDownsampled);
this.maxDeletionLength = maxDeletionLength;
this.nInsertions = nInsertions;
}

View File

@ -96,6 +96,12 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
*/
public ReadBackedPileup getDownsampledPileup(int desiredCoverage);
/**
* Returns true if any reads have been filtered out of the pileup due to excess DoC.
* @return True if reads have been filtered out. False otherwise.
*/
public boolean hasPileupBeenDownsampled();
/**
* Gets a collection of all the samples stored in this pileup.
* @return Collection of samples in this pileup.

View File

@ -56,8 +56,8 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
* @param loc
* @param pileup
*/
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
super(loc,pileup,size,nDeletions,nMQ0Reads);
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) {
super(loc,pileup,size,nDeletions,nMQ0Reads,hasPileupBeenDownsampled);
}
protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker<PileupElement> tracker) {