From 3a9d426ca8446545dd08ebdae2ebb9cd196ed20f Mon Sep 17 00:00:00 2001 From: hanna Date: Mon, 28 Jun 2010 04:56:33 +0000 Subject: [PATCH] Added hasPileupBeenDownsampled() boolean to ReadBackedPileup, so that a pileup can report whether or not (but not how much) it's been downsampled. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3649 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/iterators/LocusIteratorByState.java | 79 ++++++++++++++++--- .../pileup/AbstractReadBackedPileup.java | 10 ++- .../ReadBackedExtendedEventPileupImpl.java | 8 +- .../sting/utils/pileup/ReadBackedPileup.java | 6 ++ .../utils/pileup/ReadBackedPileupImpl.java | 4 +- 5 files changed, 93 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index fdf24a127..67deb7b1e 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -370,6 +370,7 @@ public class LocusIteratorByState extends LocusIterator { for(String sampleName: sampleNames) { Iterator iterator = readStates.iteratorForSample(sampleName); ArrayList indelPile = new ArrayList(readStates.size()); + boolean hasBeenSampled = loc.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent(); size = 0; nDeletions = 0; @@ -413,7 +414,7 @@ public class LocusIteratorByState extends LocusIterator { nMQ0Reads++; } } - if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads)); + if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads,hasBeenSampled)); } hasExtendedEvents = false; // we are done with extended events prior to current ref base // System.out.println("Indel(s) at "+loc); @@ -427,6 +428,7 @@ public class LocusIteratorByState extends LocusIterator { for(String sampleName: sampleNames) { Iterator iterator = readStates.iteratorForSample(sampleName); ArrayList pile = new ArrayList(readStates.size()); + boolean hasBeenSampled = location.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent(); size = 0; nDeletions = 0; @@ -456,7 +458,7 @@ public class LocusIteratorByState extends LocusIterator { nMQ0Reads++; } } - if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads)); + if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads,hasBeenSampled)); } updateReadStates(); // critical - must be called after we get the current state offsets and location @@ -570,7 +572,9 @@ public class LocusIteratorByState extends LocusIterator { for(String sampleName: sampleNames) readStatesBySample.put(sampleName,new PerSampleReadStateManager()); - ReadSelector primaryReadSelector= samplePartitioner; + ReadSelector primaryReadSelector = samplePartitioner; + if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_BY_SAMPLE) + primaryReadSelector = new NRandomReadSelector(primaryReadSelector,targetCoverage); chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector; } @@ -644,6 +648,7 @@ public class LocusIteratorByState extends LocusIterator { PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName); int numReads = statesBySample.size(); + int downsamplingExtent = aggregator.getDownsamplingExtent(); if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) { long readLimit = aggregator.getNumReadsSeen(); @@ -653,6 +658,7 @@ public class LocusIteratorByState extends LocusIterator { mrlViolation = true; } addReadsToSample(statesBySample,newReads,readLimit,mrlViolation); + statesBySample.specifyNewDownsamplingExtent(downsamplingExtent); } else { int[] counts = statesBySample.getCountsPerAlignmentStart(); @@ -688,8 +694,10 @@ public class LocusIteratorByState extends LocusIterator { readOffset += counts[i]; } - statesBySample.purge(toPurge); + downsamplingExtent = Math.max(downsamplingExtent,statesBySample.purge(toPurge)); + addReadsToSample(statesBySample,newReads,targetCoverage-numReads,false); + statesBySample.specifyNewDownsamplingExtent(downsamplingExtent); } } chainedReadSelector.reset(); @@ -737,6 +745,7 @@ public class LocusIteratorByState extends LocusIterator { private class PerSampleReadStateManager implements Iterable { private final Queue readStates = new LinkedList(); private final Deque readStateCounter = new LinkedList(); + private int downsamplingExtent = 0; public void addStatesAtNextAlignmentStart(Collection states) { readStates.addAll(states); @@ -756,6 +765,14 @@ public class LocusIteratorByState extends LocusIterator { return readStates.size(); } + public void specifyNewDownsamplingExtent(int downsamplingExtent) { + this.downsamplingExtent = Math.max(this.downsamplingExtent,downsamplingExtent); + } + + public int getDownsamplingExtent() { + return downsamplingExtent; + } + public int[] getCountsPerAlignmentStart() { int[] counts = new int[readStateCounter.size()]; int index = 0; @@ -790,9 +807,12 @@ public class LocusIteratorByState extends LocusIterator { * Purge the given elements from the bitset. If an element in the bitset is true, purge * the corresponding read state. * @param elements bits from the set to purge. + * @return the extent of the final downsampled read. */ - public void purge(final BitSet elements) { - if(elements.isEmpty() || readStates.isEmpty()) return; + public int purge(final BitSet elements) { + int downsamplingExtent = 0; + + if(elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent; Iterator readStateIterator = readStates.iterator(); @@ -806,7 +826,8 @@ public class LocusIteratorByState extends LocusIterator { int removedCount = 0; while(readStateIterator.hasNext() && toPurge >= 0) { - readStateIterator.next(); + SAMRecordState state = readStateIterator.next(); + downsamplingExtent = Math.max(downsamplingExtent,state.getRead().getAlignmentEnd()); if(readIndex == toPurge) { readStateIterator.remove(); @@ -826,6 +847,8 @@ public class LocusIteratorByState extends LocusIterator { } totalReadStates -= removedCount; + + return downsamplingExtent; } } } @@ -884,6 +907,14 @@ interface ReadSelector { */ public long getNumReadsSelected(); + /** + * Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the + * last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at + * position 3 whose cigar string is 76M, the value of this parameter will be 78. + * @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0. + */ + public int getDownsamplingExtent(); + /** * Get the reads selected by this selector. * @return collection of reads selected by this selector. @@ -905,6 +936,7 @@ class FirstNReadSelector implements ReadSelector { private final Collection selectedReads = new LinkedList(); private final long readLimit; private long readsSeen = 0; + private int downsamplingExtent = 0; public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) { this.chainedSelector = chainedSelector; @@ -918,8 +950,10 @@ class FirstNReadSelector implements ReadSelector { chainedSelector.submitRead(read); } else - if(chainedSelector != null) + if(chainedSelector != null) { chainedSelector.notifyReadRejected(read); + downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); + } readsSeen++; } @@ -942,6 +976,10 @@ class FirstNReadSelector implements ReadSelector { return selectedReads.size(); } + public int getDownsamplingExtent() { + return downsamplingExtent; + } + public Collection getSelectedReads() { return selectedReads; } @@ -949,6 +987,7 @@ class FirstNReadSelector implements ReadSelector { public void reset() { selectedReads.clear(); readsSeen = 0; + downsamplingExtent = 0; if(chainedSelector != null) chainedSelector.reset(); } @@ -961,6 +1000,7 @@ class NRandomReadSelector implements ReadSelector { private final ReservoirDownsampler reservoir; private final ReadSelector chainedSelector; private long readsSeen = 0; + private int downsamplingExtent = 0; public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) { this.reservoir = new ReservoirDownsampler((int)readLimit); @@ -969,8 +1009,10 @@ class NRandomReadSelector implements ReadSelector { public void submitRead(SAMRecord read) { SAMRecord displaced = reservoir.add(read); - if(displaced != null && chainedSelector != null) + if(displaced != null && chainedSelector != null) { chainedSelector.notifyReadRejected(read); + downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); + } readsSeen++; } @@ -994,12 +1036,17 @@ class NRandomReadSelector implements ReadSelector { return reservoir.size(); } + public int getDownsamplingExtent() { + return downsamplingExtent; + } + public Collection getSelectedReads() { return reservoir.getDownsampledContents(); } public void reset() { reservoir.clear(); + downsamplingExtent = 0; if(chainedSelector != null) chainedSelector.reset(); } @@ -1041,6 +1088,13 @@ class SamplePartitioner implements ReadSelector { return readsSeen; } + public int getDownsamplingExtent() { + int downsamplingExtent = 0; + for(SampleStorage storage: readsBySample.values()) + downsamplingExtent = Math.max(downsamplingExtent,storage.downsamplingExtent); + return downsamplingExtent; + } + public Collection getSelectedReads() { throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner."); } @@ -1060,6 +1114,7 @@ class SamplePartitioner implements ReadSelector { private class SampleStorage implements ReadSelector { private Collection reads = new LinkedList(); private long readsSeen = 0; + private int downsamplingExtent = 0; public void submitRead(SAMRecord read) { reads.add(read); @@ -1068,6 +1123,7 @@ class SamplePartitioner implements ReadSelector { public void notifyReadRejected(SAMRecord read) { readsSeen++; + downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); } public void complete() { @@ -1082,6 +1138,10 @@ class SamplePartitioner implements ReadSelector { return readsSeen; } + public int getDownsamplingExtent() { + return downsamplingExtent; + } + public Collection getSelectedReads() { return reads; } @@ -1089,6 +1149,7 @@ class SamplePartitioner implements ReadSelector { public void reset() { reads.clear(); readsSeen = 0; + downsamplingExtent = 0; } } } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 6d980932d..37127b0c8 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -46,6 +46,7 @@ public abstract class AbstractReadBackedPileup pileup, int size, int nDeletions, int nMQ0Reads ) { + public AbstractReadBackedPileup(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) { if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup"); if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup"); @@ -104,6 +105,7 @@ public abstract class AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site + /** + * True if this pileup has been downsampled due to excessive coverage depth. False otherwise. + */ + private boolean hasBeenDownsampled; public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileupElements) { super(loc,pileupElements); @@ -49,8 +53,8 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< * @param pileup */ public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileup, int size, - int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) { - super(loc,pileup,size,nDeletions,nMQ0Reads); + int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) { + super(loc,pileup,size,nDeletions,nMQ0Reads,hasPileupBeenDownsampled); this.maxDeletionLength = maxDeletionLength; this.nInsertions = nInsertions; } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 58f09039e..74fb7b6d9 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -96,6 +96,12 @@ public interface ReadBackedPileup extends Iterable { */ public ReadBackedPileup getDownsampledPileup(int desiredCoverage); + /** + * Returns true if any reads have been filtered out of the pileup due to excess DoC. + * @return True if reads have been filtered out. False otherwise. + */ + public boolean hasPileupBeenDownsampled(); + /** * Gets a collection of all the samples stored in this pileup. * @return Collection of samples in this pileup. diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 145e2f5e6..b1d871bee 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -56,8 +56,8 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup pileup, int size, int nDeletions, int nMQ0Reads ) { - super(loc,pileup,size,nDeletions,nMQ0Reads); + public ReadBackedPileupImpl(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads, boolean hasPileupBeenDownsampled ) { + super(loc,pileup,size,nDeletions,nMQ0Reads,hasPileupBeenDownsampled); } protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker tracker) {