From b10950c6919d6c516193873e0c0de14638cec87f Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 21 May 2010 19:26:16 +0000 Subject: [PATCH] Simple performance optimization -- cache the number of reads in the locus hanger. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3417 348d0f76-0448-11de-a6fe-93d51630548a --- .../DownsamplingLocusIteratorByState.java | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java index c9bd16a69..5c2a797f2 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java @@ -515,6 +515,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { private final int targetCoverage; private final Deque>> readStatesByAlignmentStart; + private int totalReadStatesInHanger = 0; /** * Store a random number generator with a consistent seed for consistent downsampling from run to run. @@ -546,7 +547,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { private Iterator readStateIterator; private SAMRecordState nextReadState; - private int readsInHanger = countReadsInHanger(); + private int readsInHanger = totalReadStatesInHanger; { alignmentStartIterator = readStatesByAlignmentStart.iterator(); @@ -577,6 +578,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { readStateIterator.remove(); if(currentSample.isEmpty()) sampleIterator.remove(); if(currentAlignmentStart.isEmpty()) alignmentStartIterator.remove(); + totalReadStatesInHanger--; } private void advance() { @@ -643,6 +645,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { } Map> culledReadStatesBySample = new HashMap>(); + int readStatesInHangerEntry = 0; for(Map.Entry> entry: downsamplersBySampleName.entrySet()) { String sampleName = entry.getKey(); @@ -653,7 +656,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { int readsInHanger = countReadsInHanger(sampleName); if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) - addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size()); + readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,newReads.size()); else { Iterator>> backIterator = readStatesByAlignmentStart.descendingIterator(); boolean readPruned = true; @@ -675,11 +678,13 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { firstHangerForSample.clear(); } - addReadsToHanger(culledReadStatesBySample,sampleName,newReads,targetCoverage-readsInHanger); + readStatesInHangerEntry += addReadsToHanger(culledReadStatesBySample,sampleName,newReads,targetCoverage-readsInHanger); } } - if(!culledReadStatesBySample.isEmpty()) + if(!culledReadStatesBySample.isEmpty()) { readStatesByAlignmentStart.add(culledReadStatesBySample); + totalReadStatesInHanger += readStatesInHangerEntry; + } } private ReservoirDownsampler getDownsampler(String sampleName) { @@ -689,15 +694,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { return downsamplersBySampleName.get(sampleName); } - private int countReadsInHanger() { - int count = 0; - for(Map> hangerEntry: readStatesByAlignmentStart) { - for(List reads: hangerEntry.values()) - count += reads.size(); - } - return count; - } - private int countReadsInHanger(final String sampleName) { int count = 0; for(Map> hangerEntry: readStatesByAlignmentStart) { @@ -707,22 +703,31 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { return count; } - private void addReadsToHanger(final Map> newHanger, final String sampleName, final Collection reads, final int maxReads) { + /** + * Add reads with the given sample name to the given hanger entry. + * @param newHangerEntry The hanger entry to add. + * @param sampleName Sample name of the given reads. Should match the entry in each read's read group. + * @param reads Reads to add. Selected reads will be pulled from this source. + * @param maxReads Maximum number of reads to add. + * @return Total number of reads added. + */ + private int addReadsToHanger(final Map> newHangerEntry, final String sampleName, final Collection reads, final int maxReads) { if(reads.isEmpty()) - return; - List hangerEntry = new LinkedList(); + return 0; + List readStatesBySample = new LinkedList(); int readCount = 0; for(SAMRecord read: reads) { if(readCount >= maxReads) break; SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents()); state.stepForwardOnGenome(); - hangerEntry.add(state); + readStatesBySample.add(state); // TODO: What if we downsample the extended events away? if (state.hadIndel()) hasExtendedEvents = true; readCount++; } - newHanger.put(sampleName,hangerEntry); + newHangerEntry.put(sampleName,readStatesBySample); + return readCount; } } }