From cd68cc239b9dd200fb9a741e55dd60221c9d5c4a Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Thu, 29 Dec 2011 00:41:59 -0500
Subject: [PATCH] Added knuth-shuffle (KS) and randomSubset using KS to
 MathUtils

         * Knuth-shuffle is a simple, yet effective array permutator (hope this is good english).
         * added a simple randomSubset that returns a random subset without repeats of any given array with the same probability for every permutation.
         * added unit tests to both functions
---
 .../broadinstitute/sting/utils/MathUtils.java | 818 ++++++++++--------
 .../sting/utils/MathUtilsUnitTest.java        |  92 +-
 2 files changed, 545 insertions(+), 365 deletions(-)
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index 759e1649d..4a3100a94 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -25,6 +25,7 @@
 
 package org.broadinstitute.sting.utils;
 
+import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@@ -49,8 +50,11 @@ public class MathUtils {
      */
 
 
-    /** Private constructor.  No instantiating this class! */
-    private MathUtils() {}
+    /**
+     * Private constructor.  No instantiating this class!
+     */
+    private MathUtils() {
+    }
 
     @Requires({"d > 0.0"})
     public static int fastPositiveRound(double d) {
@@ -58,21 +62,21 @@ public class MathUtils {
     }
 
     public static int fastRound(double d) {
-        if ( d > 0.0 ) {
+        if (d > 0.0) {
             return fastPositiveRound(d);
         } else {
-            return -1*fastPositiveRound(-1*d);
+            return -1 * fastPositiveRound(-1 * d);
         }
     }
 
     public static double sum(Collection<Number> numbers) {
-        return sum(numbers,false);
+        return sum(numbers, false);
     }
 
-    public static double sum( Collection<Number> numbers, boolean ignoreNan ) {
+    public static double sum(Collection<Number> numbers, boolean ignoreNan) {
         double sum = 0;
-        for ( Number n : numbers ) {
-            if ( ! ignoreNan || ! Double.isNaN(n.doubleValue())) {
+        for (Number n : numbers) {
+            if (!ignoreNan || !Double.isNaN(n.doubleValue())) {
                 sum += n.doubleValue();
             }
         }
@@ -82,66 +86,72 @@ public class MathUtils {
 
     public static int nonNanSize(Collection<Number> numbers) {
         int size = 0;
-        for ( Number n : numbers) {
+        for (Number n : numbers) {
             size += Double.isNaN(n.doubleValue()) ? 0 : 1;
         }
 
         return size;
     }
 
-    public static double average( Collection<Number> numbers, boolean ignoreNan) {
-        if ( ignoreNan ) {
-            return sum(numbers,true)/nonNanSize(numbers);
+    public static double average(Collection<Number> numbers, boolean ignoreNan) {
+        if (ignoreNan) {
+            return sum(numbers, true) / nonNanSize(numbers);
         } else {
-            return sum(numbers,false)/nonNanSize(numbers);
+            return sum(numbers, false) / nonNanSize(numbers);
         }
     }
 
-    public static double variance( Collection<Number> numbers, Number mean, boolean ignoreNan ) {
+    public static double variance(Collection<Number> numbers, Number mean, boolean ignoreNan) {
         double mn = mean.doubleValue();
         double var = 0;
-        for ( Number n : numbers ) { var += ( ! ignoreNan || ! Double.isNaN(n.doubleValue())) ? (n.doubleValue()-mn)*(n.doubleValue()-mn) : 0; }
-        if ( ignoreNan ) { return var/(nonNanSize(numbers)-1); }
-        return var/(numbers.size()-1);
+        for (Number n : numbers) {
+            var += (!ignoreNan || !Double.isNaN(n.doubleValue())) ? (n.doubleValue() - mn) * (n.doubleValue() - mn) : 0;
+        }
+        if (ignoreNan) {
+            return var / (nonNanSize(numbers) - 1);
+        }
+        return var / (numbers.size() - 1);
     }
 
     public static double variance(Collection<Number> numbers, Number mean) {
-        return variance(numbers,mean,false);
+        return variance(numbers, mean, false);
     }
 
     public static double variance(Collection<Number> numbers, boolean ignoreNan) {
-        return variance(numbers,average(numbers,ignoreNan),ignoreNan);
+        return variance(numbers, average(numbers, ignoreNan), ignoreNan);
     }
 
     public static double variance(Collection<Number> numbers) {
-        return variance(numbers,average(numbers,false),false);
+        return variance(numbers, average(numbers, false), false);
     }
 
     public static double sum(double[] values) {
         double s = 0.0;
-        for ( double v : values) s += v;
+        for (double v : values) s += v;
         return s;
     }
 
 
     /**
      * Calculates the log10 cumulative sum of an array with log10 probabilities
+     *
      * @param log10p the array with log10 probabilites
-     * @param upTo index in the array to calculate the cumsum up to
+     * @param upTo   index in the array to calculate the cumsum up to
      * @return the log10 of the cumulative sum
      */
-    public static double log10CumulativeSumLog10(double [] log10p, int upTo) {
+    public static double log10CumulativeSumLog10(double[] log10p, int upTo) {
         return log10sumLog10(log10p, 0, upTo);
     }
 
     /**
      * Converts a real space array of probabilities into a log10 array
+     *
      * @param prRealSpace
      * @return
      */
     public static double[] toLog10(double[] prRealSpace) {
         double[] log10s = new double[prRealSpace.length];
-        for ( int i = 0; i < prRealSpace.length; i++ )
+        for (int i = 0; i < prRealSpace.length; i++)
             log10s[i] = Math.log10(prRealSpace[i]);
         return log10s;
     }
@@ -154,7 +164,7 @@ public class MathUtils {
         double sum = 0.0;
 
         double maxValue = Utils.findMaxEntry(log10p);
-        for ( int i = start; i < finish; i++ ) {
+        for (int i = start; i < finish; i++) {
             sum += Math.pow(10.0, log10p[i] - maxValue);
         }
 
@@ -163,13 +173,13 @@ public class MathUtils {
 
     public static double sumDoubles(List<Double> values) {
         double s = 0.0;
-        for ( double v : values) s += v;
+        for (double v : values) s += v;
         return s;
     }
 
     public static int sumIntegers(List<Integer> values) {
         int s = 0;
-        for ( int v : values) s += v;
+        for (int v : values) s += v;
         return s;
     }
 
@@ -185,11 +195,11 @@ public class MathUtils {
     }
 
     public static boolean wellFormedDouble(double val) {
-        return ! Double.isInfinite(val) && ! Double.isNaN(val);
+        return !Double.isInfinite(val) && !Double.isNaN(val);
     }
 
     public static double bound(double value, double minBoundary, double maxBoundary) {
-        return Math.max(Math.min(value,  maxBoundary), minBoundary);
+        return Math.max(Math.min(value, maxBoundary), minBoundary);
     }
 
     public static boolean isBounded(double val, double lower, double upper) {
@@ -197,7 +207,7 @@ public class MathUtils {
     }
 
     public static boolean isPositive(double val) {
-        return ! isNegativeOrZero(val);
+        return !isNegativeOrZero(val);
     }
 
     public static boolean isPositiveOrZero(double val) {
@@ -209,17 +219,19 @@ public class MathUtils {
     }
 
     public static boolean isNegative(double val) {
-        return ! isPositiveOrZero(val);
+        return !isPositiveOrZero(val);
     }
 
     /**
      * Compares double values for equality (within 1e-6), or inequality.
      *
-     * @param a  the first double value
-     * @param b  the second double value
-     * @return   -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
+     * @param a the first double value
+     * @param b the second double value
+     * @return -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
      */
-    public static byte compareDoubles(double a, double b) { return compareDoubles(a, b, 1e-6); }
+    public static byte compareDoubles(double a, double b) {
+        return compareDoubles(a, b, 1e-6);
+    }
 
     /**
      * Compares double values for equality (within epsilon), or inequality.
@@ -227,23 +239,28 @@ public class MathUtils {
      * @param a       the first double value
      * @param b       the second double value
      * @param epsilon the precision within which two double values will be considered equal
-     * @return        -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
+     * @return -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
      */
-    public static byte compareDoubles(double a, double b, double epsilon)
-    {
-        if (Math.abs(a - b) < epsilon) { return 0; }
-        if (a > b) { return -1; }
+    public static byte compareDoubles(double a, double b, double epsilon) {
+        if (Math.abs(a - b) < epsilon) {
+            return 0;
+        }
+        if (a > b) {
+            return -1;
+        }
         return 1;
     }
 
     /**
      * Compares float values for equality (within 1e-6), or inequality.
      *
-     * @param a  the first float value
-     * @param b  the second float value
-     * @return   -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
+     * @param a the first float value
+     * @param b the second float value
+     * @return -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
      */
-    public static byte compareFloats(float a, float b) { return compareFloats(a, b, 1e-6f); }
+    public static byte compareFloats(float a, float b) {
+        return compareFloats(a, b, 1e-6f);
+    }
 
     /**
      * Compares float values for equality (within epsilon), or inequality.
@@ -251,47 +268,50 @@ public class MathUtils {
      * @param a       the first float value
      * @param b       the second float value
      * @param epsilon the precision within which two float values will be considered equal
-     * @return        -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
+     * @return -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
      */
-    public static byte compareFloats(float a, float b, float epsilon)
-    {
-        if (Math.abs(a - b) < epsilon) { return 0; }
-        if (a > b) { return -1; }
+    public static byte compareFloats(float a, float b, float epsilon) {
+        if (Math.abs(a - b) < epsilon) {
+            return 0;
+        }
+        if (a > b) {
+            return -1;
+        }
         return 1;
     }
 
-    public static double NormalDistribution(double mean, double sd, double x)
-    {
-        double a = 1.0 / (sd*Math.sqrt(2.0 * Math.PI));
-        double b = Math.exp(-1.0 * (Math.pow(x - mean,2.0)/(2.0 * sd * sd)));
+    public static double NormalDistribution(double mean, double sd, double x) {
+        double a = 1.0 / (sd * Math.sqrt(2.0 * Math.PI));
+        double b = Math.exp(-1.0 * (Math.pow(x - mean, 2.0) / (2.0 * sd * sd)));
         return a * b;
     }
 
-    public static double binomialCoefficient (int n, int k) {
+    public static double binomialCoefficient(int n, int k) {
         return Math.pow(10, log10BinomialCoefficient(n, k));
     }
+
     /**
      * Computes a binomial probability.  This is computed using the formula
-     *
-     *  B(k; n; p) = [ n! / ( k! (n - k)! ) ] (p^k)( (1-p)^k )
-     *
+     * <p/>
+     * B(k; n; p) = [ n! / ( k! (n - k)! ) ] (p^k)( (1-p)^k )
+     * <p/>
      * where n is the number of trials, k is the number of successes, and p is the probability of success
      *
-     * @param n  number of Bernoulli trials
-     * @param k  number of successes
-     * @param p  probability of success
-     *
-     * @return   the binomial probability of the specified configuration.  Computes values down to about 1e-237.
+     * @param n number of Bernoulli trials
+     * @param k number of successes
+     * @param p probability of success
+     * @return the binomial probability of the specified configuration.  Computes values down to about 1e-237.
      */
-    public static double binomialProbability (int n, int k, double p) {
+    public static double binomialProbability(int n, int k, double p) {
         return Math.pow(10, log10BinomialProbability(n, k, Math.log10(p)));
     }
 
     /**
      * Performs the cumulative sum of binomial probabilities, where the probability calculation is done in log space.
-     * @param start - start of the cumulant sum (over hits)
-     * @param end - end of the cumulant sum (over hits)
-     * @param total - number of attempts for the number of hits
+     *
+     * @param start   - start of the cumulant sum (over hits)
+     * @param end     - end of the cumulant sum (over hits)
+     * @param total   - number of attempts for the number of hits
      * @param probHit - probability of a successful hit
      * @return - returns the cumulative probability
      */
@@ -300,11 +320,11 @@ public class MathUtils {
         double prevProb;
         BigDecimal probCache = BigDecimal.ZERO;
 
-        for(int hits = start; hits < end; hits++) {
+        for (int hits = start; hits < end; hits++) {
             prevProb = cumProb;
             double probability = binomialProbability(total, hits, probHit);
             cumProb += probability;
-            if ( probability > 0 && cumProb - prevProb < probability/2 ) { // loss of precision
+            if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
                 probCache = probCache.add(new BigDecimal(prevProb));
                 cumProb = 0.0;
                 hits--; // repeat loop
@@ -314,20 +334,20 @@ public class MathUtils {
 
         return probCache.add(new BigDecimal(cumProb)).doubleValue();
     }
-  
+
     /**
      * Computes a multinomial coefficient efficiently avoiding overflow even for large numbers.
      * This is computed using the formula:
-     *
-     *   M(x1,x2,...,xk; n) = [ n! / (x1! x2! ... xk!) ]
-     *
+     * <p/>
+     * M(x1,x2,...,xk; n) = [ n! / (x1! x2! ... xk!) ]
+     * <p/>
      * where xi represents the number of times outcome i was observed, n is the number of total observations.
      * In this implementation, the value of n is inferred as the sum over i of xi.
      *
-     * @param k  an int[] of counts, where each element represents the number of times a certain outcome was observed
-     * @return   the multinomial of the specified configuration.
+     * @param k an int[] of counts, where each element represents the number of times a certain outcome was observed
+     * @return the multinomial of the specified configuration.
      */
-    public static double multinomialCoefficient (int [] k) {
+    public static double multinomialCoefficient(int[] k) {
         int n = 0;
         for (int xi : k) {
             n += xi;
@@ -339,37 +359,38 @@ public class MathUtils {
     /**
      * Computes a multinomial probability efficiently avoiding overflow even for large numbers.
      * This is computed using the formula:
-     *
-     *   M(x1,x2,...,xk; n; p1,p2,...,pk) = [ n! / (x1! x2! ... xk!) ] (p1^x1)(p2^x2)(...)(pk^xk)
-     *
+     * <p/>
+     * M(x1,x2,...,xk; n; p1,p2,...,pk) = [ n! / (x1! x2! ... xk!) ] (p1^x1)(p2^x2)(...)(pk^xk)
+     * <p/>
      * where xi represents the number of times outcome i was observed, n is the number of total observations, and
      * pi represents the probability of the i-th outcome to occur.  In this implementation, the value of n is
      * inferred as the sum over i of xi.
      *
-     * @param k  an int[] of counts, where each element represents the number of times a certain outcome was observed
-     * @param p  a double[] of probabilities, where each element represents the probability a given outcome can occur
-     * @return   the multinomial probability of the specified configuration.
+     * @param k an int[] of counts, where each element represents the number of times a certain outcome was observed
+     * @param p a double[] of probabilities, where each element represents the probability a given outcome can occur
+     * @return the multinomial probability of the specified configuration.
      */
-    public static double multinomialProbability (int[] k, double[] p) {
+    public static double multinomialProbability(int[] k, double[] p) {
         if (p.length != k.length)
             throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + p.length + ", " + k.length);
 
         int n = 0;
-        double [] log10P = new double[p.length];
-        for (int i=0; i<p.length; i++) {
+        double[] log10P = new double[p.length];
+        for (int i = 0; i < p.length; i++) {
             log10P[i] = Math.log10(p[i]);
             n += k[i];
         }
-        return Math.pow(10,log10MultinomialProbability(n, k, log10P));
+        return Math.pow(10, log10MultinomialProbability(n, k, log10P));
     }
 
     /**
      * calculate the Root Mean Square of an array of integers
-     * @param x  an byte[] of numbers
-     * @return   the RMS of the specified numbers.
-    */
+     *
+     * @param x an byte[] of numbers
+     * @return the RMS of the specified numbers.
+     */
     public static double rms(byte[] x) {
-        if ( x.length == 0 )
+        if (x.length == 0)
             return 0.0;
 
         double rms = 0.0;
@@ -382,11 +403,12 @@ public class MathUtils {
 
     /**
      * calculate the Root Mean Square of an array of integers
-     * @param x  an int[] of numbers
-     * @return   the RMS of the specified numbers.
-    */
+     *
+     * @param x an int[] of numbers
+     * @return the RMS of the specified numbers.
+     */
     public static double rms(int[] x) {
-        if ( x.length == 0 )
+        if (x.length == 0)
             return 0.0;
 
         double rms = 0.0;
@@ -398,11 +420,12 @@ public class MathUtils {
 
     /**
      * calculate the Root Mean Square of an array of doubles
-     * @param x  a double[] of numbers
-     * @return   the RMS of the specified numbers.
-    */
+     *
+     * @param x a double[] of numbers
+     * @return the RMS of the specified numbers.
+     */
     public static double rms(Double[] x) {
-        if ( x.length == 0 )
+        if (x.length == 0)
             return 0.0;
 
         double rms = 0.0;
@@ -418,23 +441,23 @@ public class MathUtils {
 
         double rms = 0.0;
         for (int i : l)
-            rms += i*i;
+            rms += i * i;
         rms /= l.size();
         return Math.sqrt(rms);
     }
 
-    public static double distanceSquared( final double[] x, final double[] y ) {
+    public static double distanceSquared(final double[] x, final double[] y) {
         double dist = 0.0;
-        for(int iii = 0; iii < x.length; iii++) {
+        for (int iii = 0; iii < x.length; iii++) {
             dist += (x[iii] - y[iii]) * (x[iii] - y[iii]);
         }
         return dist;
     }
 
     public static double round(double num, int digits) {
-        double result = num * Math.pow(10.0, (double)digits);
+        double result = num * Math.pow(10.0, (double) digits);
         result = Math.round(result);
-        result = result / Math.pow(10.0, (double)digits);
+        result = result / Math.pow(10.0, (double) digits);
         return result;
     }
 
@@ -442,11 +465,10 @@ public class MathUtils {
     /**
      * normalizes the log10-based array.  ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
      *
-     * @param array  the array to be normalized
+     * @param array             the array to be normalized
      * @param takeLog10OfOutput if true, the output will be transformed back into log10 units
-     *
      * @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed
-    */
+     */
     public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) {
         return normalizeFromLog10(array, takeLog10OfOutput, false);
     }
@@ -476,7 +498,7 @@ public class MathUtils {
             sum += normalized[i];
         for (int i = 0; i < array.length; i++) {
             double x = normalized[i] / sum;
-            if ( takeLog10OfOutput ) x = Math.log10(x);
+            if (takeLog10OfOutput) x = Math.log10(x);
             normalized[i] = x;
         }
 
@@ -488,7 +510,7 @@ public class MathUtils {
 
         // for precision purposes, we need to add (or really subtract, since they're
         // all negative) the largest value; also, we need to convert to normal-space.
-        double maxValue = MathUtils.arrayMaxDouble( array );
+        double maxValue = MathUtils.arrayMaxDouble(array);
         for (int i = 0; i < array.size(); i++)
             normalized[i] = Math.pow(10, array.get(i) - maxValue);
 
@@ -498,19 +520,19 @@ public class MathUtils {
             sum += normalized[i];
         for (int i = 0; i < array.size(); i++) {
             double x = normalized[i] / sum;
-            if ( takeLog10OfOutput ) x = Math.log10(x);
+            if (takeLog10OfOutput) x = Math.log10(x);
             normalized[i] = x;
         }
 
         return normalized;
     }
+
     /**
      * normalizes the log10-based array.  ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
      *
-     * @param array  the array to be normalized
-     *
+     * @param array the array to be normalized
      * @return a newly allocated array corresponding the normalized values in array
-    */
+     */
     public static double[] normalizeFromLog10(double[] array) {
         return normalizeFromLog10(array, false);
     }
@@ -520,11 +542,11 @@ public class MathUtils {
     }
 
     public static int maxElementIndex(double[] array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
 
         int maxI = -1;
-        for ( int i = 0; i < array.length; i++ ) {
-            if ( maxI == -1 || array[i] > array[maxI] )
+        for (int i = 0; i < array.length; i++) {
+            if (maxI == -1 || array[i] > array[maxI])
                 maxI = i;
         }
 
@@ -532,11 +554,11 @@ public class MathUtils {
     }
 
     public static int maxElementIndex(int[] array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
 
         int maxI = -1;
-        for ( int i = 0; i < array.length; i++ ) {
-            if ( maxI == -1 || array[i] > array[maxI] )
+        for (int i = 0; i < array.length; i++) {
+            if (maxI == -1 || array[i] > array[maxI])
                 maxI = i;
         }
 
@@ -556,11 +578,11 @@ public class MathUtils {
     }
 
     public static int minElementIndex(double[] array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
 
         int minI = -1;
-        for ( int i = 0; i < array.length; i++ ) {
-            if ( minI == -1 || array[i] < array[minI] )
+        for (int i = 0; i < array.length; i++) {
+            if (minI == -1 || array[i] < array[minI])
                 minI = i;
         }
 
@@ -568,32 +590,32 @@ public class MathUtils {
     }
 
     public static int minElementIndex(byte[] array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
 
         int minI = -1;
-        for ( int i = 0; i < array.length; i++ ) {
-            if ( minI == -1 || array[i] < array[minI] )
+        for (int i = 0; i < array.length; i++) {
+            if (minI == -1 || array[i] < array[minI])
                 minI = i;
         }
 
         return minI;
-    }    
+    }
 
     public static int arrayMaxInt(List<Integer> array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
-        if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!");
 
         int m = array.get(0);
-        for ( int e : array ) m = Math.max(m, e);
+        for (int e : array) m = Math.max(m, e);
         return m;
     }
 
     public static double arrayMaxDouble(List<Double> array) {
-        if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
-        if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
+        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!");
 
         double m = array.get(0);
-        for ( double e : array ) m = Math.max(m, e);
+        for (double e : array) m = Math.max(m, e);
         return m;
     }
 
@@ -636,7 +658,7 @@ public class MathUtils {
         for (byte v : vals) {
             sum += v;
         }
-        return (byte) Math.floor(sum/vals.length);
+        return (byte) Math.floor(sum / vals.length);
     }
 
     public static double averageDouble(List<Double> vals) {
@@ -749,7 +771,9 @@ public class MathUtils {
     }
 
 
-    /** Draw N random elements from list. */
+    /**
+     * Draw N random elements from list.
+     */
     public static <T> List<T> randomSubset(List<T> list, int N) {
         if (list.size() <= N) {
             return list;
@@ -770,6 +794,25 @@ public class MathUtils {
         return ans;
     }
 
+    /**
+     * Draw N random elements from an array.
+     *
+     * @param array your objects
+     * @param n     number of elements to select at random from the list
+     * @return a new list with the N randomly chosen elements from list
+     */
+    @Requires({"array != null", "n>=0"})
+    @Ensures({"result != null", "result.length == Math.min(n, array.length)"})
+    public static Object[] randomSubset(final Object[] array, final int n) {
+        if (array.length <= n)
+            return array.clone();
+
+        Object[] shuffledArray = arrayShuffle(array);
+        Object[] result = new Object[n];
+        System.arraycopy(shuffledArray, 0, result, 0, n);
+        return result;
+    }
+
     public static double percentage(double x, double base) {
         return (base > 0 ? (x / base) * 100.0 : 0);
     }
@@ -799,7 +842,7 @@ public class MathUtils {
         return count;
     }
 
-    public static int countOccurrences(byte element, byte [] array) {
+    public static int countOccurrences(byte element, byte[] array) {
         int count = 0;
         for (byte y : array) {
             if (element == y)
@@ -814,13 +857,13 @@ public class MathUtils {
      * Better than sorting if N (number of elements to return) is small
      *
      * @param array the array
-     * @param n number of top elements to return
+     * @param n     number of top elements to return
      * @return the n larger elements of the array
      */
-    public static Collection<Double> getNMaxElements(double [] array, int n) {
+    public static Collection<Double> getNMaxElements(double[] array, int n) {
         ArrayList<Double> maxN = new ArrayList<Double>(n);
         double lastMax = Double.MAX_VALUE;
-        for (int i=0; i<n; i++) {
+        for (int i = 0; i < n; i++) {
             double max = Double.MIN_VALUE;
             for (double x : array) {
                 max = Math.min(lastMax, Math.max(x, max));
@@ -840,8 +883,8 @@ public class MathUtils {
      */
     static public ArrayList<Integer> sampleIndicesWithReplacement(int n, int k) {
 
-        ArrayList<Integer> chosen_balls = new ArrayList <Integer>(k);
-        for (int i=0; i< k; i++) {
+        ArrayList<Integer> chosen_balls = new ArrayList<Integer>(k);
+        for (int i = 0; i < k; i++) {
             //Integer chosen_ball = balls[rand.nextInt(k)];
             chosen_balls.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(n));
             //balls.remove(chosen_ball);
@@ -872,11 +915,11 @@ public class MathUtils {
 
     /**
      * Given a list of indices into a list, return those elements of the list with the possibility of drawing list elements multiple times
-
-     * @param indices  the list of indices for elements to extract
-     * @param list     the list from which the elements should be extracted
-     * @param <T>      the template type of the ArrayList
-     * @return         a new ArrayList consisting of the elements at the specified indices
+     *
+     * @param indices the list of indices for elements to extract
+     * @param list    the list from which the elements should be extracted
+     * @param <T>     the template type of the ArrayList
+     * @return a new ArrayList consisting of the elements at the specified indices
      */
     static public <T> ArrayList<T> sliceListByIndices(List<Integer> indices, List<T> list) {
         ArrayList<T> subset = new ArrayList<T>();
@@ -898,18 +941,18 @@ public class MathUtils {
         ArrayList equalToX = new ArrayList();
         ArrayList greaterThanX = new ArrayList();
 
-        for(Comparable y : list) {
-            if(x.compareTo(y) > 0) {
+        for (Comparable y : list) {
+            if (x.compareTo(y) > 0) {
                 lessThanX.add(y);
-            } else if(x.compareTo(y) < 0) {
+            } else if (x.compareTo(y) < 0) {
                 greaterThanX.add(y);
             } else
                 equalToX.add(y);
         }
 
-        if(lessThanX.size() > orderStat)
+        if (lessThanX.size() > orderStat)
             return orderStatisticSearch(orderStat, lessThanX);
-        else if(lessThanX.size() + equalToX.size() >= orderStat)
+        else if (lessThanX.size() + equalToX.size() >= orderStat)
             return orderStat;
         else
             return orderStatisticSearch(orderStat - lessThanX.size() - equalToX.size(), greaterThanX);
@@ -918,7 +961,7 @@ public class MathUtils {
 
 
     public static Object getMedian(List<Comparable> list) {
-        return orderStatisticSearch((int) Math.ceil(list.size()/2), list);
+        return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
     }
 
     public static byte getQScoreOrderStatistic(List<SAMRecord> reads, List<Integer> offsets, int k) {
@@ -926,7 +969,7 @@ public class MathUtils {
         // list index maps to a q-score only through the offset index
         // returns the kth-largest q-score.
 
-        if( reads.size() == 0) {
+        if (reads.size() == 0) {
             return 0;
         }
 
@@ -938,15 +981,15 @@ public class MathUtils {
 
         final byte qk = reads.get(k).getBaseQualities()[offsets.get(k)];
 
-        for(int iter = 0; iter < reads.size(); iter ++) {
+        for (int iter = 0; iter < reads.size(); iter++) {
             SAMRecord read = reads.get(iter);
             int offset = offsets.get(iter);
             byte quality = read.getBaseQualities()[offset];
 
-            if(quality < qk) {
+            if (quality < qk) {
                 lessThanQReads.add(read);
                 lessThanQOffsets.add(offset);
-            } else if(quality > qk) {
+            } else if (quality > qk) {
                 greaterThanQReads.add(read);
                 greaterThanQOffsets.add(offset);
             } else {
@@ -954,9 +997,9 @@ public class MathUtils {
             }
         }
 
-        if(lessThanQReads.size() > k)
+        if (lessThanQReads.size() > k)
             return getQScoreOrderStatistic(lessThanQReads, lessThanQOffsets, k);
-        else if(equalToQReads.size() + lessThanQReads.size() >= k)
+        else if (equalToQReads.size() + lessThanQReads.size() >= k)
             return qk;
         else
             return getQScoreOrderStatistic(greaterThanQReads, greaterThanQOffsets, k - lessThanQReads.size() - equalToQReads.size());
@@ -964,10 +1007,11 @@ public class MathUtils {
     }
 
     public static byte getQScoreMedian(List<SAMRecord> reads, List<Integer> offsets) {
-        return getQScoreOrderStatistic(reads, offsets, (int)Math.floor(reads.size()/2.));
+        return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.));
     }
 
-    /** A utility class that computes on the fly average and standard deviation for a stream of numbers.
+    /**
+     * A utility class that computes on the fly average and standard deviation for a stream of numbers.
      * The number of observations does not have to be known in advance, and can be also very big (so that
      * it could overflow any naive summation-based scheme or cause loss of precision).
      * Instead, adding a new number <code>observed</code>
@@ -983,20 +1027,31 @@ public class MathUtils {
         public void add(double obs) {
             obs_count++;
             double oldMean = mean;
-            mean += ( obs - mean ) / obs_count; // update mean
-            s += ( obs - oldMean ) * ( obs - mean );
+            mean += (obs - mean) / obs_count; // update mean
+            s += (obs - oldMean) * (obs - mean);
         }
 
         public void addAll(Collection<Number> col) {
-            for ( Number o : col ) {
+            for (Number o : col) {
                 add(o.doubleValue());
             }
         }
 
-        public double mean() { return mean; }
-        public double stddev() { return Math.sqrt(s/(obs_count - 1)); }
-        public double var() { return s/(obs_count - 1); }
-        public long observationCount() { return obs_count; }
+        public double mean() {
+            return mean;
+        }
+
+        public double stddev() {
+            return Math.sqrt(s / (obs_count - 1));
+        }
+
+        public double var() {
+            return s / (obs_count - 1);
+        }
+
+        public long observationCount() {
+            return obs_count;
+        }
 
         public RunningAverage clone() {
             RunningAverage ra = new RunningAverage();
@@ -1007,71 +1062,86 @@ public class MathUtils {
         }
 
         public void merge(RunningAverage other) {
-            if ( this.obs_count > 0 || other.obs_count > 0 ) { // if we have any observations at all
-                this.mean = ( this.mean * this.obs_count + other.mean * other.obs_count ) / ( this.obs_count + other.obs_count );
+            if (this.obs_count > 0 || other.obs_count > 0) { // if we have any observations at all
+                this.mean = (this.mean * this.obs_count + other.mean * other.obs_count) / (this.obs_count + other.obs_count);
                 this.s += other.s;
             }
             this.obs_count += other.obs_count;
         }
     }
-    
+
     //
     // useful common utility routines
     //
-    public static double rate(long n, long d)           { return n / (1.0 * Math.max(d, 1)); }
-    public static double rate(int n, int d)             { return n / (1.0 * Math.max(d, 1)); }
+    public static double rate(long n, long d) {
+        return n / (1.0 * Math.max(d, 1));
+    }
 
-    public static long inverseRate(long n, long d)      { return n == 0 ? 0 : d / Math.max(n, 1); }
-    public static long inverseRate(int n, int d)        { return n == 0 ? 0 : d / Math.max(n, 1); }
+    public static double rate(int n, int d) {
+        return n / (1.0 * Math.max(d, 1));
+    }
 
-    public static double ratio(int num, int denom)      { return ((double)num) / (Math.max(denom, 1)); }
-    public static double ratio(long num, long denom)    { return ((double)num) / (Math.max(denom, 1)); }
+    public static long inverseRate(long n, long d) {
+        return n == 0 ? 0 : d / Math.max(n, 1);
+    }
+
+    public static long inverseRate(int n, int d) {
+        return n == 0 ? 0 : d / Math.max(n, 1);
+    }
+
+    public static double ratio(int num, int denom) {
+        return ((double) num) / (Math.max(denom, 1));
+    }
+
+    public static double ratio(long num, long denom) {
+        return ((double) num) / (Math.max(denom, 1));
+    }
 
     public static final double[] log10Cache;
     public static final double[] jacobianLogTable;
     public static final int JACOBIAN_LOG_TABLE_SIZE = 101;
     public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
-    public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0/JACOBIAN_LOG_TABLE_STEP;
+    public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP;
     public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
     private static final int MAXN = 11000;
     private static final int LOG10_CACHE_SIZE = 4 * MAXN;  // we need to be able to go up to 2*(2N) when calculating some of the coefficients
 
     static {
         log10Cache = new double[LOG10_CACHE_SIZE];
-	    jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
+        jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
 
         log10Cache[0] = Double.NEGATIVE_INFINITY;
-        for (int k=1; k < LOG10_CACHE_SIZE; k++)
+        for (int k = 1; k < LOG10_CACHE_SIZE; k++)
             log10Cache[k] = Math.log10(k);
 
-    	for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
-	        jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k)
-						       * JACOBIAN_LOG_TABLE_STEP));
+        for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
+            jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k)
+                    * JACOBIAN_LOG_TABLE_STEP));
 
-	    }
+        }
     }
 
     static public double softMax(final double[] vec) {
         double acc = vec[0];
-        for (int k=1; k < vec.length; k++)
-            acc = softMax(acc,vec[k]);
+        for (int k = 1; k < vec.length; k++)
+            acc = softMax(acc, vec[k]);
 
         return acc;
 
     }
 
     static public double max(double x0, double x1, double x2) {
-        double a = Math.max(x0,x1);
-        return Math.max(a,x2);
+        double a = Math.max(x0, x1);
+        return Math.max(a, x2);
     }
-    
-    static public double softMax(final double x0, final double x1, final double x2) {
-         // compute naively log10(10^x[0] + 10^x[1]+...)
-         //        return Math.log10(MathUtils.sumLog10(vec));
 
-         // better approximation: do Jacobian logarithm function on data pairs
-         double a = softMax(x0,x1);
-         return softMax(a,x2);
+    static public double softMax(final double x0, final double x1, final double x2) {
+        // compute naively log10(10^x[0] + 10^x[1]+...)
+        //        return Math.log10(MathUtils.sumLog10(vec));
+
+        // better approximation: do Jacobian logarithm function on data pairs
+        double a = softMax(x0, x1);
+        return softMax(a, x2);
     }
 
     static public double softMax(final double x, final double y) {
@@ -1084,49 +1154,50 @@ public class MathUtils {
         // slow exact version:
         // return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
 
-        double diff = x-y;
+        double diff = x - y;
 
         if (diff > MAX_JACOBIAN_TOLERANCE)
             return x;
         else if (diff < -MAX_JACOBIAN_TOLERANCE)
             return y;
         else if (diff >= 0) {
-            int ind = (int)(diff*INV_JACOBIAN_LOG_TABLE_STEP+0.5);
+            int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
             return x + jacobianLogTable[ind];
-        }
-        else {
-            int ind = (int)(-diff*INV_JACOBIAN_LOG_TABLE_STEP+0.5);
+        } else {
+            int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
             return y + jacobianLogTable[ind];
         }
     }
 
-    public static double phredScaleToProbability (byte q) {
-        return Math.pow(10,(-q)/10.0);
+    public static double phredScaleToProbability(byte q) {
+        return Math.pow(10, (-q) / 10.0);
     }
 
-    public static double phredScaleToLog10Probability (byte q) {
-        return ((-q)/10.0);
+    public static double phredScaleToLog10Probability(byte q) {
+        return ((-q) / 10.0);
     }
 
     /**
      * Returns the phred scaled value of probability p
+     *
      * @param p probability (between 0 and 1).
      * @return phred scaled probability of p
      */
-    public static byte probabilityToPhredScale (double p) {
+    public static byte probabilityToPhredScale(double p) {
         return (byte) ((-10) * Math.log10(p));
     }
 
-    public static double log10ProbabilityToPhredScale (double log10p) {
+    public static double log10ProbabilityToPhredScale(double log10p) {
         return (-10) * log10p;
     }
 
     /**
      * Converts LN to LOG10
+     *
      * @param ln log(x)
      * @return log10(x)
      */
-    public static double lnToLog10 (double ln) {
+    public static double lnToLog10(double ln) {
         return ln * Math.log10(Math.exp(1));
     }
 
@@ -1134,169 +1205,190 @@ public class MathUtils {
      * Constants to simplify the log gamma function calculation.
      */
     private static final double
-        zero = 0.0,
-        one  = 1.0,
-        half = .5,
-        a0  =  7.72156649015328655494e-02,
-        a1  =  3.22467033424113591611e-01,
-        a2  =  6.73523010531292681824e-02,
-        a3  =  2.05808084325167332806e-02,
-        a4  =  7.38555086081402883957e-03,
-        a5  =  2.89051383673415629091e-03,
-        a6  =  1.19270763183362067845e-03,
-        a7  =  5.10069792153511336608e-04,
-        a8  =  2.20862790713908385557e-04,
-        a9  =  1.08011567247583939954e-04,
-        a10 =  2.52144565451257326939e-05,
-        a11 =  4.48640949618915160150e-05,
-        tc  =  1.46163214496836224576e+00,
-        tf  = -1.21486290535849611461e-01,
-        tt  = -3.63867699703950536541e-18,
-        t0  =  4.83836122723810047042e-01,
-        t1  = -1.47587722994593911752e-01,
-        t2  =  6.46249402391333854778e-02,
-        t3  = -3.27885410759859649565e-02,
-        t4  =  1.79706750811820387126e-02,
-        t5  = -1.03142241298341437450e-02,
-        t6  =  6.10053870246291332635e-03,
-        t7  = -3.68452016781138256760e-03,
-        t8  =  2.25964780900612472250e-03,
-        t9  = -1.40346469989232843813e-03,
-        t10 =  8.81081882437654011382e-04,
-        t11 = -5.38595305356740546715e-04,
-        t12 =  3.15632070903625950361e-04,
-        t13 = -3.12754168375120860518e-04,
-        t14 =  3.35529192635519073543e-04,
-        u0  = -7.72156649015328655494e-02,
-        u1  =  6.32827064025093366517e-01,
-        u2  =  1.45492250137234768737e+00,
-        u3  =  9.77717527963372745603e-01,
-        u4  =  2.28963728064692451092e-01,
-        u5  =  1.33810918536787660377e-02,
-        v1  =  2.45597793713041134822e+00,
-        v2  =  2.12848976379893395361e+00,
-        v3  =  7.69285150456672783825e-01,
-        v4  =  1.04222645593369134254e-01,
-        v5  =  3.21709242282423911810e-03,
-        s0  = -7.72156649015328655494e-02,
-        s1  =  2.14982415960608852501e-01,
-        s2  =  3.25778796408930981787e-01,
-        s3  =  1.46350472652464452805e-01,
-        s4  =  2.66422703033638609560e-02,
-        s5  =  1.84028451407337715652e-03,
-        s6  =  3.19475326584100867617e-05,
-        r1  =  1.39200533467621045958e+00,
-        r2  =  7.21935547567138069525e-01,
-        r3  =  1.71933865632803078993e-01,
-        r4  =  1.86459191715652901344e-02,
-        r5  =  7.77942496381893596434e-04,
-        r6  =  7.32668430744625636189e-06,
-        w0  =  4.18938533204672725052e-01,
-        w1  =  8.33333333333329678849e-02,
-        w2  = -2.77777777728775536470e-03,
-        w3  =  7.93650558643019558500e-04,
-        w4  = -5.95187557450339963135e-04,
-        w5  =  8.36339918996282139126e-04,
-        w6  = -1.63092934096575273989e-03;
+            zero = 0.0,
+            one = 1.0,
+            half = .5,
+            a0 = 7.72156649015328655494e-02,
+            a1 = 3.22467033424113591611e-01,
+            a2 = 6.73523010531292681824e-02,
+            a3 = 2.05808084325167332806e-02,
+            a4 = 7.38555086081402883957e-03,
+            a5 = 2.89051383673415629091e-03,
+            a6 = 1.19270763183362067845e-03,
+            a7 = 5.10069792153511336608e-04,
+            a8 = 2.20862790713908385557e-04,
+            a9 = 1.08011567247583939954e-04,
+            a10 = 2.52144565451257326939e-05,
+            a11 = 4.48640949618915160150e-05,
+            tc = 1.46163214496836224576e+00,
+            tf = -1.21486290535849611461e-01,
+            tt = -3.63867699703950536541e-18,
+            t0 = 4.83836122723810047042e-01,
+            t1 = -1.47587722994593911752e-01,
+            t2 = 6.46249402391333854778e-02,
+            t3 = -3.27885410759859649565e-02,
+            t4 = 1.79706750811820387126e-02,
+            t5 = -1.03142241298341437450e-02,
+            t6 = 6.10053870246291332635e-03,
+            t7 = -3.68452016781138256760e-03,
+            t8 = 2.25964780900612472250e-03,
+            t9 = -1.40346469989232843813e-03,
+            t10 = 8.81081882437654011382e-04,
+            t11 = -5.38595305356740546715e-04,
+            t12 = 3.15632070903625950361e-04,
+            t13 = -3.12754168375120860518e-04,
+            t14 = 3.35529192635519073543e-04,
+            u0 = -7.72156649015328655494e-02,
+            u1 = 6.32827064025093366517e-01,
+            u2 = 1.45492250137234768737e+00,
+            u3 = 9.77717527963372745603e-01,
+            u4 = 2.28963728064692451092e-01,
+            u5 = 1.33810918536787660377e-02,
+            v1 = 2.45597793713041134822e+00,
+            v2 = 2.12848976379893395361e+00,
+            v3 = 7.69285150456672783825e-01,
+            v4 = 1.04222645593369134254e-01,
+            v5 = 3.21709242282423911810e-03,
+            s0 = -7.72156649015328655494e-02,
+            s1 = 2.14982415960608852501e-01,
+            s2 = 3.25778796408930981787e-01,
+            s3 = 1.46350472652464452805e-01,
+            s4 = 2.66422703033638609560e-02,
+            s5 = 1.84028451407337715652e-03,
+            s6 = 3.19475326584100867617e-05,
+            r1 = 1.39200533467621045958e+00,
+            r2 = 7.21935547567138069525e-01,
+            r3 = 1.71933865632803078993e-01,
+            r4 = 1.86459191715652901344e-02,
+            r5 = 7.77942496381893596434e-04,
+            r6 = 7.32668430744625636189e-06,
+            w0 = 4.18938533204672725052e-01,
+            w1 = 8.33333333333329678849e-02,
+            w2 = -2.77777777728775536470e-03,
+            w3 = 7.93650558643019558500e-04,
+            w4 = -5.95187557450339963135e-04,
+            w5 = 8.36339918996282139126e-04,
+            w6 = -1.63092934096575273989e-03;
 
     /**
      * Efficient rounding functions to simplify the log gamma function calculation
-     *   double to long with 32 bit shift
+     * double to long with 32 bit shift
      */
-    private static final int HI (double x) {
-        return (int)(Double.doubleToLongBits(x) >> 32);
+    private static final int HI(double x) {
+        return (int) (Double.doubleToLongBits(x) >> 32);
     }
 
     /**
      * Efficient rounding functions to simplify the log gamma function calculation
-     *   double to long without shift
+     * double to long without shift
      */
-    private static final int LO (double x) {
-        return (int)Double.doubleToLongBits(x);
+    private static final int LO(double x) {
+        return (int) Double.doubleToLongBits(x);
     }
 
     /**
      * Most efficent implementation of the lnGamma (FDLIBM)
      * Use via the log10Gamma wrapper method.
      */
-    private static double lnGamma (double x) {
-        double t,y,z,p,p1,p2,p3,q,r,w;
+    private static double lnGamma(double x) {
+        double t, y, z, p, p1, p2, p3, q, r, w;
         int i;
 
         int hx = HI(x);
         int lx = LO(x);
 
         /* purge off +-inf, NaN, +-0, and negative arguments */
-        int ix = hx&0x7fffffff;
+        int ix = hx & 0x7fffffff;
         if (ix >= 0x7ff00000) return Double.POSITIVE_INFINITY;
-        if ((ix|lx)==0 || hx < 0) return Double.NaN;
-        if (ix<0x3b900000) {	/* |x|<2**-70, return -log(|x|) */
+        if ((ix | lx) == 0 || hx < 0) return Double.NaN;
+        if (ix < 0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
             return -Math.log(x);
         }
 
         /* purge off 1 and 2 */
-        if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
-        /* for x < 2.0 */
-        else if(ix<0x40000000) {
-            if(ix<=0x3feccccc) { 	/* lgamma(x) = lgamma(x+1)-log(x) */
+        if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0;
+            /* for x < 2.0 */
+        else if (ix < 0x40000000) {
+            if (ix <= 0x3feccccc) {     /* lgamma(x) = lgamma(x+1)-log(x) */
                 r = -Math.log(x);
-                if(ix>=0x3FE76944) {y = one-x; i= 0;}
-                else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
-                else {y = x; i=2;}
+                if (ix >= 0x3FE76944) {
+                    y = one - x;
+                    i = 0;
+                } else if (ix >= 0x3FCDA661) {
+                    y = x - (tc - one);
+                    i = 1;
+                } else {
+                    y = x;
+                    i = 2;
+                }
             } else {
                 r = zero;
-                if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
-                else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
-                else {y=x-one;i=2;}
+                if (ix >= 0x3FFBB4C3) {
+                    y = 2.0 - x;
+                    i = 0;
+                } /* [1.7316,2] */ else if (ix >= 0x3FF3B4C4) {
+                    y = x - tc;
+                    i = 1;
+                } /* [1.23,1.73] */ else {
+                    y = x - one;
+                    i = 2;
+                }
             }
 
-            switch(i) {
-            case 0:
-                z = y*y;
-                p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
-                p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
-                p  = y*p1+p2;
-                r  += (p-0.5*y); break;
-            case 1:
-                z = y*y;
-                w = z*y;
-                p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));	/* parallel comp */
-                p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
-                p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
-                p  = z*p1-(tt-w*(p2+y*p3));
-                r += (tf + p); break;
-            case 2:
-                p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
-                p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
-                r += (-0.5*y + p1/p2);
+            switch (i) {
+                case 0:
+                    z = y * y;
+                    p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
+                    p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
+                    p = y * p1 + p2;
+                    r += (p - 0.5 * y);
+                    break;
+                case 1:
+                    z = y * y;
+                    w = z * y;
+                    p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12)));    /* parallel comp */
+                    p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
+                    p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
+                    p = z * p1 - (tt - w * (p2 + y * p3));
+                    r += (tf + p);
+                    break;
+                case 2:
+                    p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
+                    p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
+                    r += (-0.5 * y + p1 / p2);
             }
-        }
-        else if(ix<0x40200000) { 			/* x < 8.0 */
-            i = (int)x;
+        } else if (ix < 0x40200000) {             /* x < 8.0 */
+            i = (int) x;
             t = zero;
-            y = x-(double)i;
-            p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
-            q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
-            r = half*y+p/q;
-            z = one;	/* lgamma(1+s) = log(s) + lgamma(s) */
-            switch(i) {
-            case 7: z *= (y+6.0);	/* FALLTHRU */
-            case 6: z *= (y+5.0);	/* FALLTHRU */
-            case 5: z *= (y+4.0);	/* FALLTHRU */
-            case 4: z *= (y+3.0);	/* FALLTHRU */
-            case 3: z *= (y+2.0);	/* FALLTHRU */
-                r += Math.log(z); break;
+            y = x - (double) i;
+            p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
+            q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
+            r = half * y + p / q;
+            z = one;    /* lgamma(1+s) = log(s) + lgamma(s) */
+            switch (i) {
+                case 7:
+                    z *= (y + 6.0);    /* FALLTHRU */
+                case 6:
+                    z *= (y + 5.0);    /* FALLTHRU */
+                case 5:
+                    z *= (y + 4.0);    /* FALLTHRU */
+                case 4:
+                    z *= (y + 3.0);    /* FALLTHRU */
+                case 3:
+                    z *= (y + 2.0);    /* FALLTHRU */
+                    r += Math.log(z);
+                    break;
             }
             /* 8.0 <= x < 2**58 */
         } else if (ix < 0x43900000) {
             t = Math.log(x);
-            z = one/x;
-            y = z*z;
-            w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
-            r = (x-half)*(t-one)+w;
+            z = one / x;
+            y = z * z;
+            w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
+            r = (x - half) * (t - one) + w;
         } else
             /* 2**58 <= x <= inf */
-            r =  x*(Math.log(x)-one);
+            r = x * (Math.log(x) - one);
         return r;
     }
 
@@ -1308,7 +1400,7 @@ public class MathUtils {
      * @param x the x parameter
      * @return the log10 of the gamma function at x.
      */
-    public static double log10Gamma (double x) {
+    public static double log10Gamma(double x) {
         return lnToLog10(lnGamma(x));
     }
 
@@ -1320,13 +1412,13 @@ public class MathUtils {
      * @param k number of successes
      * @return the log10 of the binomial coefficient
      */
-    public static double log10BinomialCoefficient (int n, int k) {
-        return log10Gamma(n+1) - log10Gamma(k+1) - log10Gamma(n-k+1);
+    public static double log10BinomialCoefficient(int n, int k) {
+        return log10Gamma(n + 1) - log10Gamma(k + 1) - log10Gamma(n - k + 1);
     }
 
-    public static double log10BinomialProbability (int n, int k, double log10p) {
-        double log10OneMinusP = Math.log10(1-Math.pow(10,log10p));
-        return log10BinomialCoefficient(n, k) + log10p*k + log10OneMinusP*(n-k);
+    public static double log10BinomialProbability(int n, int k, double log10p) {
+        double log10OneMinusP = Math.log10(1 - Math.pow(10, log10p));
+        return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k);
     }
 
 
@@ -1338,38 +1430,74 @@ public class MathUtils {
      * @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
      * @return
      */
-    public static double log10MultinomialCoefficient (int n, int [] k) {
+    public static double log10MultinomialCoefficient(int n, int[] k) {
         double denominator = 0.0;
         for (int x : k) {
-            denominator += log10Gamma(x+1);
+            denominator += log10Gamma(x + 1);
         }
-        return log10Gamma(n+1) - denominator;
+        return log10Gamma(n + 1) - denominator;
     }
 
     /**
      * Computes the log10 of the multinomial distribution probability given a vector
      * of log10 probabilities. Designed to prevent overflows even with very large numbers.
      *
-     * @param n number of trials
-     * @param k array of number of successes for each possibility
+     * @param n      number of trials
+     * @param k      array of number of successes for each possibility
      * @param log10p array of log10 probabilities
      * @return
      */
-    public static double log10MultinomialProbability (int n, int [] k, double [] log10p) {
+    public static double log10MultinomialProbability(int n, int[] k, double[] log10p) {
         if (log10p.length != k.length)
             throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + log10p.length + ", " + k.length);
         double log10Prod = 0.0;
-        for (int i=0; i<log10p.length; i++) {
-            log10Prod += log10p[i]*k[i];
+        for (int i = 0; i < log10p.length; i++) {
+            log10Prod += log10p[i] * k[i];
         }
         return log10MultinomialCoefficient(n, k) + log10Prod;
     }
 
-    public static double factorial (int x) {
-        return Math.pow(10, log10Gamma(x+1));
+    public static double factorial(int x) {
+        return Math.pow(10, log10Gamma(x + 1));
     }
 
-    public static double log10Factorial (int x) {
-        return log10Gamma(x+1);
+    public static double log10Factorial(int x) {
+        return log10Gamma(x + 1);
+    }
+
+
+    /**
+     * Adds two arrays together and returns a new array with the sum.
+     *
+     * @param a one array
+     * @param b another array
+     * @return a new array with the sum of a and b
+     */
+    @Requires("a.length == b.length")
+    @Ensures("result.length == a.length")
+    public static int[] addArrays(int[] a, int[] b) {
+        int[] c = new int[a.length];
+        for (int i = 0; i < a.length; i++)
+            c[i] = a[i] + b[i];
+        return c;
+    }
+
+    /**
+     * Quick implementation of the Knuth-shuffle algorithm to generate a random
+     * permutation of the given array.
+     *
+     * @param array the original array
+     * @return a new array with the elements shuffled
+     */
+    public static Object[] arrayShuffle(Object[] array) {
+        int n = array.length;
+        Object[] shuffled = array.clone();
+        for (int i = 0; i < n; i++) {
+            int j = i + GenomeAnalysisEngine.getRandomGenerator().nextInt(n - i);
+            Object tmp = shuffled[i];
+            shuffled[i] = shuffled[j];
+            shuffled[j] = tmp;
+        }
+        return shuffled;
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
index 251c105c3..049bdce3e 100755
--- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
@@ -26,22 +26,20 @@
 package org.broadinstitute.sting.utils;
 
 
+import org.broadinstitute.sting.BaseTest;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
-import org.broadinstitute.sting.BaseTest;
 
-
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Collections;
+import java.util.*;
 
 /**
  * Basic unit test for MathUtils
  */
 public class MathUtilsUnitTest extends BaseTest {
     @BeforeClass
-    public void init() { }
+    public void init() {
+    }
 
     /**
      * Tests that we get the right values from the binomial distribution
@@ -66,20 +64,20 @@ public class MathUtilsUnitTest extends BaseTest {
     public void testMultinomialProbability() {
         logger.warn("Executing testMultinomialProbability");
 
-        int[] counts0 = { 2, 0, 1 };
-        double[] probs0 = { 0.33, 0.33, 0.34 };
+        int[] counts0 = {2, 0, 1};
+        double[] probs0 = {0.33, 0.33, 0.34};
         Assert.assertEquals(MathUtils.multinomialProbability(counts0, probs0), 0.111078, 1e-6);
 
-        int[] counts1 = { 10, 20, 30 };
-        double[] probs1 = { 0.25, 0.25, 0.50 };
+        int[] counts1 = {10, 20, 30};
+        double[] probs1 = {0.25, 0.25, 0.50};
         Assert.assertEquals(MathUtils.multinomialProbability(counts1, probs1), 0.002870301, 1e-9);
 
-        int[] counts2 = { 38, 82, 50, 36 };
-        double[] probs2 = { 0.25, 0.25, 0.25, 0.25 };
+        int[] counts2 = {38, 82, 50, 36};
+        double[] probs2 = {0.25, 0.25, 0.25, 0.25};
         Assert.assertEquals(MathUtils.multinomialProbability(counts2, probs2), 1.88221e-09, 1e-10);
 
-        int[] counts3 = { 1, 600, 1 };
-        double[] probs3 = { 0.33, 0.33, 0.34 };
+        int[] counts3 = {1, 600, 1};
+        double[] probs3 = {0.33, 0.33, 0.34};
         Assert.assertEquals(MathUtils.multinomialProbability(counts3, probs3), 5.20988e-285, 1e-286);
     }
 
@@ -123,19 +121,21 @@ public class MathUtilsUnitTest extends BaseTest {
         Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha));
     }
 
-    /** Tests that we correctly compute mean and standard deviation from a stream of numbers */
+    /**
+     * Tests that we correctly compute mean and standard deviation from a stream of numbers
+     */
     @Test
     public void testRunningAverage() {
         logger.warn("Executing testRunningAverage");
 
-        int [] numbers = {1,2,4,5,3,128,25678,-24};
+        int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
         MathUtils.RunningAverage r = new MathUtils.RunningAverage();
 
-        for ( int i = 0 ; i < numbers.length ; i++ ) r.add((double)numbers[i]);
+        for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
 
-        Assert.assertEquals((long)numbers.length, r.observationCount());
-        Assert.assertTrue(r.mean()- 3224.625 < 2e-10 );
-        Assert.assertTrue(r.stddev()-9072.6515881128 < 2e-10);
+        Assert.assertEquals((long) numbers.length, r.observationCount());
+        Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
+        Assert.assertTrue(r.stddev() - 9072.6515881128 < 2e-10);
     }
 
     @Test
@@ -174,4 +174,56 @@ public class MathUtilsUnitTest extends BaseTest {
         Assert.assertEquals(MathUtils.log10Factorial(12342), 45138.26, 1e-1);
     }
 
+    @Test(enabled = true)
+    public void testRandomSubset() {
+        Integer[] x = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+        Assert.assertEquals(MathUtils.randomSubset(x, 0).length, 0);
+        Assert.assertEquals(MathUtils.randomSubset(x, 1).length, 1);
+        Assert.assertEquals(MathUtils.randomSubset(x, 2).length, 2);
+        Assert.assertEquals(MathUtils.randomSubset(x, 3).length, 3);
+        Assert.assertEquals(MathUtils.randomSubset(x, 4).length, 4);
+        Assert.assertEquals(MathUtils.randomSubset(x, 5).length, 5);
+        Assert.assertEquals(MathUtils.randomSubset(x, 6).length, 6);
+        Assert.assertEquals(MathUtils.randomSubset(x, 7).length, 7);
+        Assert.assertEquals(MathUtils.randomSubset(x, 8).length, 8);
+        Assert.assertEquals(MathUtils.randomSubset(x, 9).length, 9);
+        Assert.assertEquals(MathUtils.randomSubset(x, 10).length, 10);
+        Assert.assertEquals(MathUtils.randomSubset(x, 11).length, 10);
+
+        for (int i = 0; i < 25; i++)
+            Assert.assertTrue(hasUniqueElements(MathUtils.randomSubset(x, 5)));
+
+    }
+
+    @Test(enabled = true)
+    public void testArrayShuffle() {
+        Integer[] x = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+        for (int i = 0; i < 25; i++) {
+            Object[] t = MathUtils.arrayShuffle(x);
+            Assert.assertTrue(hasUniqueElements(t));
+            Assert.assertTrue(hasAllElements(x, t));
+        }
+    }
+
+    private boolean hasUniqueElements(Object[] x) {
+        for (int i = 0; i < x.length; i++)
+            for (int j = i + 1; j < x.length; j++)
+                if (x[i].equals(x[j]) || x[i] == x[j])
+                    return false;
+        return true;
+    }
+
+    private boolean hasAllElements(final Object[] expected, final Object[] actual) {
+        HashSet<Object> set = new HashSet<Object>();
+        set.addAll(Arrays.asList(expected));
+        set.removeAll(Arrays.asList(actual));
+        return set.isEmpty();
+    }
+
+
+    private void p (Object []x) {
+        for (Object v: x)
+            System.out.print((Integer) v + " ");
+        System.out.println();
+    }
 }