gatk-3.8/R/privateMutations.R

62 lines
1.7 KiB
R

MAX_AC = 10000
normHist <- function(d, m) {
x = hist(d$true.ac, breaks=1:20000, plot=F)$counts[1:MAX_AC]
x / sum(x)
}
f <- function(d, acs) {
cols = rainbow(length(acs), alpha=0.75)
y = normHist(subset(afs, small.ac == acs[1]))
x = 1:length(y) / max(d$true.an)
plot(x, y, type="l", col=cols[1], xlab="True MAF in full population", ylab="Frequency", lwd=3, log="x")
for (i in 2:length(acs)) {
points(x, normHist(subset(afs, small.ac == acs[i])), type="l", col=cols[i], lwd=3)
}
legend("topright", legend=lapply(acs, function(x) paste("AC =", x)), fill=cols, title="Sub-population")
}
expected <- function(maxAN, N, eps, ac1scale = F) {
scale = 10
f <- function(ps, N) {
co = 2 * N / ( 1 - eps )
co * ((1 - ps)/(1-eps))^(2 * N - 1)
}
# these are the points that we'll actually show, but we need to do the calculation
# special for the AC = 1 given the equation actually fits an infinite population
# not a discrete population with max chromosomes
ps = 1:maxAN / maxAN
v = f(ps, N)
v = v / sum(v)
if ( ac1scale ) {
subps = seq(1, maxAN*scale) / (maxAN * scale)
#print(subps)
subv = f(subps, N)
#print(subv)
#print(v[1:10])
pBelowAC1 = sum(subv[1:scale] / sum(subv))
#print(list(pBelowAC1=pBelowAC1, v1=v[1]))
v[1] = v[1] + pBelowAC1
}
list(ps = ps, pr = v)
}
f(afs, c(1,2,3,5,10,50))
if ( F ) {
scale = 100
ex1 = expected(200000, 1000, 1e-8)
ex2 = expected(200000*scale, 1000, 1e-8)
i = 1:(200000*scale) %% scale == 1
plot(ex2$ps[i], cumsum(ex1$pr), type="l",lty=3,lwd=3, log="x", col="red")
points(ex2$ps[i], cumsum(ex2$pr)[i], type="l",lty=3,lwd=3, log="x")
}
ex = expected(200000, 1000, 1e-8, T)
points(ex$ps, ex$pr, type="l",lty=3,lwd=3)