From d9cdc5d006e470fb5429c55efa1e74b0be68a4bc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 21 May 2013 15:32:24 -0400 Subject: [PATCH] Optimization: track alleles in the PerReadAlleleLikelihoodMap with a HashSet -- The previous version of PerReadAlleleLikelihoodMap only stored the alleles in an ArrayList, and used ArrayList.contains() to determine if an allele was already present in the map. This is very slow with many alleles. Now keeps both the ArrayList (for get() performance) and a Set of alleles for contains(). --- .../genotyper/PerReadAlleleLikelihoodMap.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 150e24c51..c8bb7ff79 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -42,13 +42,13 @@ import java.util.*; * For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood. */ public class PerReadAlleleLikelihoodMap { - protected final List alleles; - protected final Map> likelihoodReadMap; + /** A set of all of the allele, so we can efficiently determine if an allele is already present */ + private final Set allelesSet = new HashSet<>(); + /** A list of the unique allele, as an ArrayList so we can call get(i) efficiently */ + protected final List alleles = new ArrayList<>(); + protected final Map> likelihoodReadMap = new LinkedHashMap<>(); - public PerReadAlleleLikelihoodMap() { - likelihoodReadMap = new LinkedHashMap>(); - alleles = new ArrayList(); - } + public PerReadAlleleLikelihoodMap() { } /** * Add a new entry into the Read -> ( Allele -> Likelihood ) map of maps. @@ -61,18 +61,20 @@ public class PerReadAlleleLikelihoodMap { if ( a == null ) throw new IllegalArgumentException("Cannot add a null allele to the allele likelihood map"); if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null"); if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))"); + Map likelihoodMap = likelihoodReadMap.get(read); if (likelihoodMap == null){ // LinkedHashMap will ensure iterating through alleles will be in consistent order - likelihoodMap = new LinkedHashMap(); + likelihoodMap = new LinkedHashMap<>(); } likelihoodReadMap.put(read,likelihoodMap); likelihoodMap.put(a,likelihood); - if (!alleles.contains(a)) + if (!allelesSet.contains(a)) { + allelesSet.add(a); alleles.add(a); - + } } public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) { @@ -165,6 +167,7 @@ public class PerReadAlleleLikelihoodMap { } public void clear() { + allelesSet.clear(); alleles.clear(); likelihoodReadMap.clear(); }