A utility class that computes running average and standard deviation for a stream of numbers it is being fed with. Updates mean/stddev on the fly and does not cache the observations, so it uses no memory and also should be stable against overflow/loss of precision. Simple unit test is also provided (does *not* stress-test the engine with millions of numbers though).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3944 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2010-08-04 21:39:02 +00:00
parent 8d8acc9fae
commit d53d5ffbf6
2 changed files with 39 additions and 0 deletions

View File

@ -714,7 +714,31 @@ public class MathUtils {
return getQScoreOrderStatistic(reads, offsets, (int)Math.floor(reads.size()/2.));
}
/** A utility class that computes on the fly average and standard deviation for a stream of numbers.
* The number of observations does not have to be known in advance, and can be also very big (so that
* it could overflow any naive summation-based scheme or cause loss of precision).
* Instead, adding a new number <code>observed</code>
* to a sample with <code>add(observed)</code> immediately updates the instance of this object so that
* it contains correct mean and standard deviation for all the numbers seen so far. Source: Knuth, vol.2
* (see also e.g. http://www.johndcook.com/standard_deviation.html for online reference).
*/
public static class RunningAverage {
private double mean = 0.0;
private double s = 0.0;
private long obs_count = 0;
public void add(double obs) {
obs_count++;
double oldMean = mean;
mean += ( obs - mean ) / obs_count; // update mean
s += ( obs - oldMean ) * ( obs - mean );
}
public double mean() { return mean; }
public double stddev() { return Math.sqrt(s/(obs_count - 1)); }
public long observationCount() { return obs_count; }
}
//
// useful common utility routines
//

View File

@ -123,5 +123,20 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(BigFiveAlpha.containsAll(FiveAlpha));
Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha));
}
/** Tests that we correctly compute mean and standard deviation from a stream of numbers */
@Test
public void testRunningAverage() {
logger.warn("Executing testRunningAverage");
int [] numbers = {1,2,4,5,3,128,25678,-24};
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
for ( int i = 0 ; i < numbers.length ; i++ ) r.add((double)numbers[i]);
Assert.assertEquals(r.observationCount(),(long)numbers.length);
Assert.assertTrue(r.mean()- 3224.625 < 2e-10 );
Assert.assertTrue(r.stddev()-9072.6515881128 < 2e-10);
}
}