diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java index ec32ef32b..949ef8cf0 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java @@ -521,7 +521,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { private final int targetCoverage; private final int maxReadsAtLocus; - private final Deque>> readStatesByAlignmentStart; + private final Map>> readStatesBySample; private int totalReadStatesInHanger = 0; /** @@ -547,35 +547,40 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { this.targetCoverage = Integer.MAX_VALUE; } this.maxReadsAtLocus = maxReadsAtLocus; - if(downsamplingMethod.type == DownsampleType.NONE) + this.readStatesBySample = new HashMap>>(); + + if(downsamplingMethod.type == DownsampleType.NONE) { aggregatorsBySampleName.put(null,new ArrayList()); - else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) - aggregatorsBySampleName.put(null,new ReservoirDownsampler(targetCoverage)); - else { - for(String sampleName: sampleNames) - aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler(targetCoverage)); + readStatesBySample.put(null,new LinkedList>()); + } + else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) { + aggregatorsBySampleName.put(null,new ReservoirDownsampler(targetCoverage)); + readStatesBySample.put(null,new LinkedList>()); + } + else { + for(String sampleName: sampleNames) { + aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler(targetCoverage)); + readStatesBySample.put(sampleName,new LinkedList>()); + } } - this.readStatesByAlignmentStart = new LinkedList>>(); } public Iterator iterator() { return new Iterator() { - private Iterator>> alignmentStartIterator; - private Map> currentAlignmentStart; - + private final Iterator>> sampleIterators; private Iterator> sampleIterator; - private List currentSample; - - private Iterator readStateIterator; + private List currentAlignmentStart; + private Iterator alignmentStartIterator; private SAMRecordState nextReadState; + private int readsInHanger = totalReadStatesInHanger; { - alignmentStartIterator = readStatesByAlignmentStart.iterator(); - currentAlignmentStart = alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null; - sampleIterator = currentAlignmentStart!=null ? currentAlignmentStart.values().iterator() : null; - currentSample = sampleIterator!=null && sampleIterator.hasNext() ? sampleIterator.next() : null; - readStateIterator = currentSample!=null ? currentSample.iterator() : null; + List>> sampleIteratorList = new LinkedList>>(); + for(Deque> hanger: readStatesBySample.values()) + sampleIteratorList.add(hanger.iterator()); + sampleIterators = sampleIteratorList.iterator(); + advance(); } public boolean hasNext() { @@ -589,49 +594,50 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { return nextReadState; } finally { + readsInHanger--; nextReadState = null; } } public void remove() { - if(readStateIterator == null) - throw new StingException("Attempted to remove read, but no previous read was found."); - readStateIterator.remove(); - if(currentSample.isEmpty()) sampleIterator.remove(); - if(currentAlignmentStart.isEmpty()) alignmentStartIterator.remove(); + if(alignmentStartIterator == null) + throw new StingException("Cannot remove read -- iterator is in an invalid state."); + alignmentStartIterator.remove(); + if(currentAlignmentStart.isEmpty()) + sampleIterator.remove(); totalReadStatesInHanger--; } private void advance() { - nextReadState = null; - if(readStateIterator!=null && readStateIterator.hasNext()) - nextReadState = readStateIterator.next(); - else if(sampleIterator!=null && sampleIterator.hasNext()) { - currentSample = sampleIterator.next(); - readStateIterator = currentSample.iterator(); - nextReadState = readStateIterator.hasNext() ? readStateIterator.next() : null; + // nextReadState != null indicates that we haven't returned this value from the next() method yet. + if(nextReadState != null) + return; + if(alignmentStartIterator!=null&&alignmentStartIterator.hasNext()) { + nextReadState = alignmentStartIterator.next(); } - else if(alignmentStartIterator!=null && alignmentStartIterator.hasNext()) { - currentAlignmentStart = alignmentStartIterator.next(); - sampleIterator = currentAlignmentStart != null ? currentAlignmentStart.values().iterator() : null; - currentSample = (sampleIterator!=null && sampleIterator.hasNext()) ? sampleIterator.next() : null; - readStateIterator = currentSample!=null ? currentSample.iterator() : null; - nextReadState = (readStateIterator!=null && readStateIterator.hasNext()) ? readStateIterator.next() : null; + else if(sampleIterator!=null&&sampleIterator.hasNext()) { + currentAlignmentStart = sampleIterator.next(); + alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null; + nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null; + } + else if(sampleIterators.hasNext()) { + sampleIterator = sampleIterators.next(); + currentAlignmentStart = sampleIterator!=null&&sampleIterator.hasNext() ? sampleIterator.next() : null; + alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null; + nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null; } - - if(nextReadState != null) readsInHanger--; } }; } public boolean isEmpty() { - return readStatesByAlignmentStart.isEmpty(); + return readStatesBySample.isEmpty(); } public int size() { int size = 0; - for(Map> readStatesBySample: readStatesByAlignmentStart) { - for(Collection readStates: readStatesBySample.values()) + for(Deque> readStatesByAlignmentStart: readStatesBySample.values()) { + for(Collection readStates: readStatesByAlignmentStart) size += readStates.size(); } return size; @@ -642,7 +648,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { } public boolean hasNext() { - return !readStatesByAlignmentStart.isEmpty() || iterator.hasNext(); + return totalReadStatesInHanger > 0 || iterator.hasNext(); } public void collectPendingReads() { @@ -665,7 +671,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { } } - Map> culledReadStatesBySample = new HashMap>(); int readStatesInHangerEntry = 0; for(Map.Entry> entry: aggregatorsBySampleName.entrySet()) { @@ -674,6 +679,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { Collection newReads = new ArrayList(aggregator); aggregator.clear(); + + Deque> hanger = readStatesBySample.get(sampleName); int readsInHanger = countReadsInHanger(sampleName); if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) { @@ -683,15 +690,15 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { readLimit = maxReadsAtLocus-totalReadStatesInHanger; mrlViolation = true; } - readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,readLimit,mrlViolation); + readStatesInHangerEntry += addReadsToHanger(hanger,newReads,readLimit,mrlViolation); } else { - Iterator>> backIterator = readStatesByAlignmentStart.descendingIterator(); + Iterator> backIterator = hanger.descendingIterator(); boolean readPruned = true; while(readsInHanger+newReads.size()>targetCoverage && readPruned) { readPruned = false; while(readsInHanger+newReads.size()>targetCoverage && backIterator.hasNext()) { - List readsAtLocus = backIterator.next().get(sampleName); + List readsAtLocus = backIterator.next(); if(readsAtLocus.size() > 1) { readsAtLocus.remove(downsampleRandomizer.nextInt(readsAtLocus.size())); readPruned = true; @@ -701,16 +708,13 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { } if(readsInHanger == targetCoverage) { - Collection firstHangerForSample = readStatesByAlignmentStart.getFirst().get(sampleName); - readsInHanger -= firstHangerForSample.size(); - firstHangerForSample.clear(); + List readsInFirstHanger = hanger.remove(); + readsInHanger -= readsInFirstHanger.size(); } - readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,targetCoverage-readsInHanger,false); + readStatesInHangerEntry += addReadsToHanger(hanger,newReads,targetCoverage-readsInHanger,false); } - } - if(!culledReadStatesBySample.isEmpty()) { - readStatesByAlignmentStart.add(culledReadStatesBySample); + totalReadStatesInHanger += readStatesInHangerEntry; } } @@ -724,9 +728,8 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { private int countReadsInHanger(final String sampleName) { int count = 0; - for(Map> hangerEntry: readStatesByAlignmentStart) { - if(sampleName == null && hangerEntry.containsKey(sampleName)) - count += hangerEntry.get(sampleName).size(); + for(List hangerEntry: readStatesBySample.get(sampleName)) { + count += hangerEntry.size(); } return count; } @@ -734,12 +737,11 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { /** * Add reads with the given sample name to the given hanger entry. * @param newHangerEntry The hanger entry to add. - * @param sampleName Sample name of the given reads. Should match the entry in each read's read group. * @param reads Reads to add. Selected reads will be pulled from this source. * @param maxReads Maximum number of reads to add. * @return Total number of reads added. */ - private int addReadsToHanger(final Map> newHangerEntry, final String sampleName, final Collection reads, final int maxReads, boolean atMaxReadsAtLocusLimit) { + private int addReadsToHanger(final Deque> newHangerEntry, final Collection reads, final int maxReads, boolean atMaxReadsAtLocusLimit) { if(reads.isEmpty()) return 0; @@ -748,13 +750,13 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { // the farthest right a read extends Integer rightMostEnd = -1; - List readStatesBySample = new LinkedList(); + List readStates = new LinkedList(); int readCount = 0; for(SAMRecord read: reads) { if(readCount <= maxReads) { SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents()); state.stepForwardOnGenome(); - readStatesBySample.add(state); + readStates.add(state); // TODO: What if we downsample the extended events away? if (state.hadIndel()) hasExtendedEvents = true; readCount++; @@ -765,7 +767,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd; } } - newHangerEntry.put(sampleName,readStatesBySample); + newHangerEntry.add(readStates); if (location != null) overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),