Subsampling of points, for the case where we have enormous numbers of points

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4927 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-01-03 13:48:11 +00:00
parent 54adbd2581
commit dba30c4118
1 changed files with 14 additions and 2 deletions

View File

@ -6,6 +6,8 @@ DATA_FILE = args[1]
DESCRIPTION = args[2]
OUTPUT_PDF = paste(DATA_FILE, ".pdf", sep="")
MAX_POINTS = 1000
if ( onCMDLine ) {
print(paste("Reading data from", DATA_FILE))
d = read.table(DATA_FILE, header=T)
@ -19,6 +21,15 @@ vec.margin <- function(x) {
c(x[1], d[1:(l-1)])
}
everyNth <- function(x, n) {
l = dim(x)[1]
m = ceiling(l / n)
print(m)
keep = 1:l %% m == 0
x[keep,]
}
l = length(d$units.processed)
d$units.processed.margin = vec.margin(d$units.processed)
#prev = 0
@ -32,14 +43,15 @@ generateOneReport <- function(d) {
qs = quantile(d$processing.speed, probs = c(0.01, 0.5, 0.99))
# unit processing time
plot(d$elapsed.time, d$processing.speed, main=DESCRIPTION, xlab="Elapsed time (sec)", ylab="Processing speed (seconds per 1M units)", ylim=c(qs[1], qs[3]), type="b", col="cornflowerblue", lwd=2)
dpoints = everyNth(d, MAX_POINTS)
plot(dpoints$elapsed.time, dpoints$processing.speed, main=DESCRIPTION, xlab="Elapsed time (sec)", ylab="Processing speed (seconds per 1M units)", ylim=c(qs[1], qs[3]), type="b", col="cornflowerblue", lwd=2)
abline(h=qs[2], lty=2)
# instantaneous processing speed
running_median_window = 101
rm = runmed(d$units.processed.margin, running_median_window)
POINT_COL = "#0000AA33"
plot(d$elapsed.time, d$units.processed.margin, main=DESCRIPTION, xlab="Elapsed time (sec)", ylab="Units processed in last timing interval", type="p", cex = 0.5, col=POINT_COL)
plot(dpoints$elapsed.time, dpoints$units.processed.margin, main=DESCRIPTION, xlab="Elapsed time (sec)", ylab="Units processed in last timing interval", type="p", cex = 0.5, col=POINT_COL)
lines(d$elapsed.time, rm, lwd=3, col="red")
legend("topleft", c("Observations", "101-elt running median"), fill=c(POINT_COL, "red"))
}