Chris found a bug in the downsampler where, if the number of reads entering
the pileup at the next alignment start is large, we don't add as many of those incoming reads as we should. No integration tests were affected. Thanks, Chris! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4378 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0ec07ad99a
commit
eee134baf2
|
|
@ -528,7 +528,6 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
private final PeekableIterator<SAMRecord> iterator;
|
private final PeekableIterator<SAMRecord> iterator;
|
||||||
private final DownsamplingMethod downsamplingMethod;
|
private final DownsamplingMethod downsamplingMethod;
|
||||||
|
|
||||||
private final ReadSelector chainedReadSelector;
|
|
||||||
private final SamplePartitioner samplePartitioner;
|
private final SamplePartitioner samplePartitioner;
|
||||||
|
|
||||||
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
||||||
|
|
@ -550,15 +549,13 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
this.targetCoverage = Integer.MAX_VALUE;
|
this.targetCoverage = Integer.MAX_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
samplePartitioner = new SamplePartitioner(sampleNames);
|
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
|
||||||
for(String sampleName: sampleNames)
|
for(String sampleName: sampleNames) {
|
||||||
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
||||||
|
readSelectors.put(sampleName,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||||
|
}
|
||||||
|
|
||||||
ReadSelector primaryReadSelector = samplePartitioner;
|
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||||
if(downsamplingMethod.type == DownsampleType.BY_SAMPLE)
|
|
||||||
primaryReadSelector = new NRandomReadSelector(primaryReadSelector,targetCoverage);
|
|
||||||
|
|
||||||
chainedReadSelector = primaryReadSelector;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
|
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
|
||||||
|
|
@ -609,7 +606,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||||
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||||
chainedReadSelector.submitRead(iterator.next());
|
samplePartitioner.submitRead(iterator.next());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -618,10 +615,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||||
chainedReadSelector.submitRead(iterator.next());
|
samplePartitioner.submitRead(iterator.next());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
chainedReadSelector.complete();
|
samplePartitioner.complete();
|
||||||
|
|
||||||
for(String sampleName: sampleNames) {
|
for(String sampleName: sampleNames) {
|
||||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
|
ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
|
||||||
|
|
@ -677,7 +674,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
chainedReadSelector.reset();
|
samplePartitioner.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -890,6 +887,52 @@ interface ReadSelector {
|
||||||
public void reset();
|
public void reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Select every read passed in.
|
||||||
|
*/
|
||||||
|
class AllReadsSelector implements ReadSelector {
|
||||||
|
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||||
|
private long readsSeen = 0;
|
||||||
|
private int downsamplingExtent = 0;
|
||||||
|
|
||||||
|
public void submitRead(SAMRecord read) {
|
||||||
|
reads.add(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void notifyReadRejected(SAMRecord read) {
|
||||||
|
readsSeen++;
|
||||||
|
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete() {
|
||||||
|
// NO-OP.
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSeen() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSelected() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getDownsamplingExtent() {
|
||||||
|
return downsamplingExtent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<SAMRecord> getSelectedReads() {
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
reads.clear();
|
||||||
|
readsSeen = 0;
|
||||||
|
downsamplingExtent = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Select N reads randomly from the input stream.
|
* Select N reads randomly from the input stream.
|
||||||
*/
|
*/
|
||||||
|
|
@ -950,13 +993,11 @@ class NRandomReadSelector implements ReadSelector {
|
||||||
}
|
}
|
||||||
|
|
||||||
class SamplePartitioner implements ReadSelector {
|
class SamplePartitioner implements ReadSelector {
|
||||||
private final Map<String,SampleStorage> readsBySample;
|
private final Map<String,ReadSelector> readsBySample;
|
||||||
private long readsSeen = 0;
|
private long readsSeen = 0;
|
||||||
|
|
||||||
public SamplePartitioner(Collection<String> sampleNames) {
|
public SamplePartitioner(Map<String,ReadSelector> readSelectors) {
|
||||||
readsBySample = new HashMap<String,SampleStorage>();
|
readsBySample = readSelectors;
|
||||||
for(String sampleName: sampleNames)
|
|
||||||
readsBySample.put(sampleName,new SampleStorage());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void submitRead(SAMRecord read) {
|
public void submitRead(SAMRecord read) {
|
||||||
|
|
@ -987,8 +1028,8 @@ class SamplePartitioner implements ReadSelector {
|
||||||
|
|
||||||
public int getDownsamplingExtent() {
|
public int getDownsamplingExtent() {
|
||||||
int downsamplingExtent = 0;
|
int downsamplingExtent = 0;
|
||||||
for(SampleStorage storage: readsBySample.values())
|
for(ReadSelector storage: readsBySample.values())
|
||||||
downsamplingExtent = Math.max(downsamplingExtent,storage.downsamplingExtent);
|
downsamplingExtent = Math.max(downsamplingExtent,storage.getDownsamplingExtent());
|
||||||
return downsamplingExtent;
|
return downsamplingExtent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1003,52 +1044,11 @@ class SamplePartitioner implements ReadSelector {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
for(SampleStorage storage: readsBySample.values())
|
for(ReadSelector storage: readsBySample.values())
|
||||||
storage.reset();
|
storage.reset();
|
||||||
readsSeen = 0;
|
readsSeen = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SampleStorage implements ReadSelector {
|
|
||||||
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
|
||||||
private long readsSeen = 0;
|
|
||||||
private int downsamplingExtent = 0;
|
|
||||||
|
|
||||||
public void submitRead(SAMRecord read) {
|
|
||||||
reads.add(read);
|
|
||||||
readsSeen++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void notifyReadRejected(SAMRecord read) {
|
|
||||||
readsSeen++;
|
|
||||||
downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void complete() {
|
|
||||||
// NO-OP.
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getNumReadsSeen() {
|
|
||||||
return readsSeen;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getNumReadsSelected() {
|
|
||||||
return readsSeen;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getDownsamplingExtent() {
|
|
||||||
return downsamplingExtent;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Collection<SAMRecord> getSelectedReads() {
|
|
||||||
return reads;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reset() {
|
|
||||||
reads.clear();
|
|
||||||
readsSeen = 0;
|
|
||||||
downsamplingExtent = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue