NestedIntegerArray: Pre-allocate only the first two dimensions
It turns out that pre-allocating the entire tree was too expensive in terms of memory when using large values for the -mcs and -ics parameters. Pre-allocating the first two dimensions prevents us from ever locking the root node during a put(). Contention between threads over lower levels of the tree should be minimal given that puts are rare compared to gets. Also output dimensions and pre-allocation info at startup. If pre-allocation takes longer than usual this gives the user a sense of what is causing the delay.
This commit is contained in:
parent
cc8c12b954
commit
884d031e72
|
|
@ -25,9 +25,11 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.collections;
|
package org.broadinstitute.sting.utils.collections;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -38,39 +40,50 @@ import java.util.List;
|
||||||
|
|
||||||
public class NestedIntegerArray<T> {
|
public class NestedIntegerArray<T> {
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
|
||||||
|
|
||||||
protected final Object[] data;
|
protected final Object[] data;
|
||||||
|
|
||||||
protected final int numDimensions;
|
protected final int numDimensions;
|
||||||
protected final int[] dimensions;
|
protected final int[] dimensions;
|
||||||
|
|
||||||
|
// Preallocate the first two dimensions to limit contention during tree traversals in put()
|
||||||
|
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
|
||||||
|
|
||||||
public NestedIntegerArray(final int... dimensions) {
|
public NestedIntegerArray(final int... dimensions) {
|
||||||
numDimensions = dimensions.length;
|
numDimensions = dimensions.length;
|
||||||
if ( numDimensions == 0 )
|
if ( numDimensions == 0 )
|
||||||
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
||||||
this.dimensions = dimensions.clone();
|
this.dimensions = dimensions.clone();
|
||||||
|
|
||||||
|
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
||||||
|
|
||||||
|
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
||||||
|
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
|
|
||||||
data = new Object[dimensions[0]];
|
data = new Object[dimensions[0]];
|
||||||
prepopulateArray(data, 0);
|
preallocateArray(data, 0, dimensionsToPreallocate);
|
||||||
|
|
||||||
|
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively allocate the entire tree of arrays in all its dimensions.
|
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
|
||||||
*
|
*
|
||||||
* Doing this upfront uses more memory initially, but saves time over the course of the run
|
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
|
||||||
* and (crucially) avoids having to make threads wait while traversing the tree to check
|
|
||||||
* whether branches exist or not.
|
|
||||||
*
|
*
|
||||||
* @param subarray current node in the tree
|
* @param subarray current node in the tree
|
||||||
* @param dimension current level in the tree
|
* @param dimension current level in the tree
|
||||||
|
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
|
||||||
*/
|
*/
|
||||||
private void prepopulateArray( Object[] subarray, int dimension ) {
|
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
|
||||||
if ( dimension >= numDimensions - 1 ) {
|
if ( dimension >= dimensionsToPreallocate - 1 ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( int i = 0; i < subarray.length; i++ ) {
|
for ( int i = 0; i < subarray.length; i++ ) {
|
||||||
subarray[i] = new Object[dimensions[dimension + 1]];
|
subarray[i] = new Object[dimensions[dimension + 1]];
|
||||||
prepopulateArray((Object[])subarray[i], dimension + 1);
|
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,8 +95,9 @@ public class NestedIntegerArray<T> {
|
||||||
if ( keys[i] >= dimensions[i] )
|
if ( keys[i] >= dimensions[i] )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
myData = (Object[])myData[keys[i]];
|
||||||
// down to the leaves
|
if ( myData == null )
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (T)myData[keys[numNestedDimensions]];
|
return (T)myData[keys[numNestedDimensions]];
|
||||||
|
|
@ -92,8 +106,8 @@ public class NestedIntegerArray<T> {
|
||||||
/**
|
/**
|
||||||
* Insert a value at the position specified by the given keys.
|
* Insert a value at the position specified by the given keys.
|
||||||
*
|
*
|
||||||
* This method is THREAD-SAFE despite not being synchronized, however the caller MUST
|
* This method is thread-safe, however the caller MUST check the
|
||||||
* check the return value to see if the put succeeded. This method RETURNS FALSE if
|
* return value to see if the put succeeded. This method RETURNS FALSE if
|
||||||
* the value could not be inserted because there already was a value present
|
* the value could not be inserted because there already was a value present
|
||||||
* at the specified location. In this case the caller should do a get() to get
|
* at the specified location. In this case the caller should do a get() to get
|
||||||
* the already-existing value and (potentially) update it.
|
* the already-existing value and (potentially) update it.
|
||||||
|
|
@ -113,8 +127,17 @@ public class NestedIntegerArray<T> {
|
||||||
if ( keys[i] >= dimensions[i] )
|
if ( keys[i] >= dimensions[i] )
|
||||||
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
||||||
|
|
||||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
|
||||||
// down to the leaves
|
// check to see if the next branch exists, and if it doesn't, create it
|
||||||
|
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
|
||||||
|
synchronized ( myData ) {
|
||||||
|
if ( myData[keys[i]] == null ) {
|
||||||
|
myData[keys[i]] = new Object[dimensions[i + 1]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
myData = (Object[])myData[keys[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue