From eee134baf2dab43368b0a078ca5a0b9838ba25e8 Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 29 Sep 2010 11:18:12 +0000 Subject: [PATCH] Chris found a bug in the downsampler where, if the number of reads entering the pileup at the next alignment start is large, we don't add as many of those incoming reads as we should. No integration tests were affected. Thanks, Chris! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4378 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/iterators/LocusIteratorByState.java | 122 +++++++++--------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 104bbb583..f8e8cfe58 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -528,7 +528,6 @@ public class LocusIteratorByState extends LocusIterator { private final PeekableIterator iterator; private final DownsamplingMethod downsamplingMethod; - private final ReadSelector chainedReadSelector; private final SamplePartitioner samplePartitioner; private final Map readStatesBySample = new HashMap(); @@ -550,15 +549,13 @@ public class LocusIteratorByState extends LocusIterator { this.targetCoverage = Integer.MAX_VALUE; } - samplePartitioner = new SamplePartitioner(sampleNames); - for(String sampleName: sampleNames) + Map readSelectors = new HashMap(); + for(String sampleName: sampleNames) { readStatesBySample.put(sampleName,new PerSampleReadStateManager()); + readSelectors.put(sampleName,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector()); + } - ReadSelector primaryReadSelector = samplePartitioner; - if(downsamplingMethod.type == DownsampleType.BY_SAMPLE) - primaryReadSelector = new NRandomReadSelector(primaryReadSelector,targetCoverage); - - chainedReadSelector = primaryReadSelector; + samplePartitioner = new SamplePartitioner(readSelectors); } public Iterator iteratorForSample(final String sampleName) { @@ -609,7 +606,7 @@ public class LocusIteratorByState extends LocusIterator { int firstContigIndex = iterator.peek().getReferenceIndex(); int firstAlignmentStart = iterator.peek().getAlignmentStart(); while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { - chainedReadSelector.submitRead(iterator.next()); + samplePartitioner.submitRead(iterator.next()); } } else { @@ -618,10 +615,10 @@ public class LocusIteratorByState extends LocusIterator { return; while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) { - chainedReadSelector.submitRead(iterator.next()); + samplePartitioner.submitRead(iterator.next()); } } - chainedReadSelector.complete(); + samplePartitioner.complete(); for(String sampleName: sampleNames) { ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName); @@ -677,7 +674,7 @@ public class LocusIteratorByState extends LocusIterator { statesBySample.specifyNewDownsamplingExtent(downsamplingExtent); } } - chainedReadSelector.reset(); + samplePartitioner.reset(); } /** @@ -890,6 +887,52 @@ interface ReadSelector { public void reset(); } +/** + * Select every read passed in. + */ +class AllReadsSelector implements ReadSelector { + private Collection reads = new LinkedList(); + private long readsSeen = 0; + private int downsamplingExtent = 0; + + public void submitRead(SAMRecord read) { + reads.add(read); + readsSeen++; + } + + public void notifyReadRejected(SAMRecord read) { + readsSeen++; + downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); + } + + public void complete() { + // NO-OP. + } + + public long getNumReadsSeen() { + return readsSeen; + } + + public long getNumReadsSelected() { + return readsSeen; + } + + public int getDownsamplingExtent() { + return downsamplingExtent; + } + + public Collection getSelectedReads() { + return reads; + } + + public void reset() { + reads.clear(); + readsSeen = 0; + downsamplingExtent = 0; + } +} + + /** * Select N reads randomly from the input stream. */ @@ -950,13 +993,11 @@ class NRandomReadSelector implements ReadSelector { } class SamplePartitioner implements ReadSelector { - private final Map readsBySample; + private final Map readsBySample; private long readsSeen = 0; - public SamplePartitioner(Collection sampleNames) { - readsBySample = new HashMap(); - for(String sampleName: sampleNames) - readsBySample.put(sampleName,new SampleStorage()); + public SamplePartitioner(Map readSelectors) { + readsBySample = readSelectors; } public void submitRead(SAMRecord read) { @@ -987,8 +1028,8 @@ class SamplePartitioner implements ReadSelector { public int getDownsamplingExtent() { int downsamplingExtent = 0; - for(SampleStorage storage: readsBySample.values()) - downsamplingExtent = Math.max(downsamplingExtent,storage.downsamplingExtent); + for(ReadSelector storage: readsBySample.values()) + downsamplingExtent = Math.max(downsamplingExtent,storage.getDownsamplingExtent()); return downsamplingExtent; } @@ -1003,52 +1044,11 @@ class SamplePartitioner implements ReadSelector { } public void reset() { - for(SampleStorage storage: readsBySample.values()) + for(ReadSelector storage: readsBySample.values()) storage.reset(); readsSeen = 0; } - private class SampleStorage implements ReadSelector { - private Collection reads = new LinkedList(); - private long readsSeen = 0; - private int downsamplingExtent = 0; - - public void submitRead(SAMRecord read) { - reads.add(read); - readsSeen++; - } - - public void notifyReadRejected(SAMRecord read) { - readsSeen++; - downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); - } - - public void complete() { - // NO-OP. - } - - public long getNumReadsSeen() { - return readsSeen; - } - - public long getNumReadsSelected() { - return readsSeen; - } - - public int getDownsamplingExtent() { - return downsamplingExtent; - } - - public Collection getSelectedReads() { - return reads; - } - - public void reset() { - reads.clear(); - readsSeen = 0; - downsamplingExtent = 0; - } - } }