diff --git a/java/src/org/broadinstitute/sting/utils/MathUtils.java b/java/src/org/broadinstitute/sting/utils/MathUtils.java index 209588fff..395326d15 100755 --- a/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -714,7 +714,31 @@ public class MathUtils { return getQScoreOrderStatistic(reads, offsets, (int)Math.floor(reads.size()/2.)); } + /** A utility class that computes on the fly average and standard deviation for a stream of numbers. + * The number of observations does not have to be known in advance, and can be also very big (so that + * it could overflow any naive summation-based scheme or cause loss of precision). + * Instead, adding a new number observed + * to a sample with add(observed) immediately updates the instance of this object so that + * it contains correct mean and standard deviation for all the numbers seen so far. Source: Knuth, vol.2 + * (see also e.g. http://www.johndcook.com/standard_deviation.html for online reference). + */ + public static class RunningAverage { + private double mean = 0.0; + private double s = 0.0; + private long obs_count = 0; + public void add(double obs) { + obs_count++; + double oldMean = mean; + mean += ( obs - mean ) / obs_count; // update mean + s += ( obs - oldMean ) * ( obs - mean ); + } + + public double mean() { return mean; } + public double stddev() { return Math.sqrt(s/(obs_count - 1)); } + public long observationCount() { return obs_count; } + } + // // useful common utility routines // diff --git a/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index b6c5d6720..d74e09128 100755 --- a/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -123,5 +123,20 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertTrue(BigFiveAlpha.containsAll(FiveAlpha)); Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha)); } + + /** Tests that we correctly compute mean and standard deviation from a stream of numbers */ + @Test + public void testRunningAverage() { + logger.warn("Executing testRunningAverage"); + + int [] numbers = {1,2,4,5,3,128,25678,-24}; + MathUtils.RunningAverage r = new MathUtils.RunningAverage(); + + for ( int i = 0 ; i < numbers.length ; i++ ) r.add((double)numbers[i]); + + Assert.assertEquals(r.observationCount(),(long)numbers.length); + Assert.assertTrue(r.mean()- 3224.625 < 2e-10 ); + Assert.assertTrue(r.stddev()-9072.6515881128 < 2e-10); + } } \ No newline at end of file