A utility class that computes running average and standard deviation for a stream of numbers it is being fed with. Updates mean/stddev on the fly and does not cache the observations, so it uses no memory and also should be stable against overflow/loss of precision. Simple unit test is also provided (does *not* stress-test the engine with millions of numbers though).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3944 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8d8acc9fae
commit
d53d5ffbf6
|
|
@ -714,7 +714,31 @@ public class MathUtils {
|
|||
return getQScoreOrderStatistic(reads, offsets, (int)Math.floor(reads.size()/2.));
|
||||
}
|
||||
|
||||
/** A utility class that computes on the fly average and standard deviation for a stream of numbers.
|
||||
* The number of observations does not have to be known in advance, and can be also very big (so that
|
||||
* it could overflow any naive summation-based scheme or cause loss of precision).
|
||||
* Instead, adding a new number <code>observed</code>
|
||||
* to a sample with <code>add(observed)</code> immediately updates the instance of this object so that
|
||||
* it contains correct mean and standard deviation for all the numbers seen so far. Source: Knuth, vol.2
|
||||
* (see also e.g. http://www.johndcook.com/standard_deviation.html for online reference).
|
||||
*/
|
||||
public static class RunningAverage {
|
||||
private double mean = 0.0;
|
||||
private double s = 0.0;
|
||||
private long obs_count = 0;
|
||||
|
||||
public void add(double obs) {
|
||||
obs_count++;
|
||||
double oldMean = mean;
|
||||
mean += ( obs - mean ) / obs_count; // update mean
|
||||
s += ( obs - oldMean ) * ( obs - mean );
|
||||
}
|
||||
|
||||
public double mean() { return mean; }
|
||||
public double stddev() { return Math.sqrt(s/(obs_count - 1)); }
|
||||
public long observationCount() { return obs_count; }
|
||||
}
|
||||
|
||||
//
|
||||
// useful common utility routines
|
||||
//
|
||||
|
|
|
|||
|
|
@ -123,5 +123,20 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertTrue(BigFiveAlpha.containsAll(FiveAlpha));
|
||||
Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha));
|
||||
}
|
||||
|
||||
/** Tests that we correctly compute mean and standard deviation from a stream of numbers */
|
||||
@Test
|
||||
public void testRunningAverage() {
|
||||
logger.warn("Executing testRunningAverage");
|
||||
|
||||
int [] numbers = {1,2,4,5,3,128,25678,-24};
|
||||
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
|
||||
|
||||
for ( int i = 0 ; i < numbers.length ; i++ ) r.add((double)numbers[i]);
|
||||
|
||||
Assert.assertEquals(r.observationCount(),(long)numbers.length);
|
||||
Assert.assertTrue(r.mean()- 3224.625 < 2e-10 );
|
||||
Assert.assertTrue(r.stddev()-9072.6515881128 < 2e-10);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue