From d3437e62da7cb302e85c36b296c3bffee9981a05 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 2 Aug 2011 21:59:06 -0400 Subject: [PATCH] Added a simple utility method Utils.optimumHashSize() to calculate the optimum initial size for a Java hash table (HashMap, HashSet, etc.) given an expected maximum number of elements. The optimum size is the smallest size that's guaranteed not to result in any rehash / table-resize operations. Example Usage: Map hash = new HashMap(Utils.optimumHashSize(expectedMaxElements)); I think we're paying way too heavy a price in unnecessary rehash operations across the GATK. If you don't specify an initial size, you get a table of size 16 that gets completely rehashed and doubles in size every time it becomes 75% full. This means you do at least twice as much work as you need to in order to populate your table: (n + n/2 + n/4 + ... 16 ~= (1 + 1/2 + 1/4...) * n ~= 2 * n --- .../src/org/broadinstitute/sting/utils/Utils.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 6a50badce..015e5d6f6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -42,6 +42,21 @@ public class Utils { /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(Utils.class); + public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f; + + /** + * Calculates the optimum initial size for a hash table given the maximum number + * of elements it will need to hold. The optimum size is the smallest size that + * is guaranteed not to result in any rehash/table-resize operations. + * + * @param maxElements The maximum number of elements you expect the hash table + * will need to hold + * @return The optimum initial size for the table, given maxElements + */ + public static int optimumHashSize ( int maxElements ) { + return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2; + } + public static String getClassName(Class c) { String FQClassName = c.getName(); int firstChar;