Simple R script to visualize geontype likelihood accuracy
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5805 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fa75efb6ac
commit
5e9c0d00c6
|
|
@ -0,0 +1,52 @@
|
||||||
|
require("lattice")
|
||||||
|
require("ggplot2")
|
||||||
|
|
||||||
|
READ_DATA = F
|
||||||
|
|
||||||
|
if ( READ_DATA ) {
|
||||||
|
d = read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T)
|
||||||
|
#d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T)
|
||||||
|
}
|
||||||
|
|
||||||
|
moltenCD = d
|
||||||
|
#moltenCD = melt(d, id.vars=c("comp", "rg"), measure.vars=c("QofAAGivenD", "QofABGivenD", "QofBBGivenD"))
|
||||||
|
#moltenCD$log10value = round(-10*log10(1-10^moltenCD$value))
|
||||||
|
|
||||||
|
genotypeCounts <- function(x) {
|
||||||
|
#print(table(x$comp))
|
||||||
|
type = unique(x$variable)[1]
|
||||||
|
#print(type)
|
||||||
|
t = addmargins(table(x$comp))
|
||||||
|
return(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
addEmpiricalPofG <- function(d) {
|
||||||
|
r = c()
|
||||||
|
for ( i in 1:dim(d)[1] ) {
|
||||||
|
row = d[i,]
|
||||||
|
#print(row)
|
||||||
|
if ( row$pGGivenDType == "QofAAGivenD" ) v = row$HOM_REF
|
||||||
|
if ( row$pGGivenDType == "QofABGivenD" ) v = row$HET
|
||||||
|
if ( row$pGGivenDType == "QofBBGivenD" ) v = row$HOM_VAR
|
||||||
|
#print(v)
|
||||||
|
#print(row$Sum)
|
||||||
|
r = c(r, v / row$Sum)
|
||||||
|
#print(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
#print(length(r))
|
||||||
|
d$EmpiricalPofG = r
|
||||||
|
d$EmpiricalPofGQ = round(-10*log10(1-r))
|
||||||
|
return(d)
|
||||||
|
}
|
||||||
|
|
||||||
|
eByComp <- addEmpiricalPofG(ddply(moltenCD, .(rg, pGGivenDType, pGGivenD), genotypeCounts))
|
||||||
|
print(subset(eByComp, EmpiricalPofGQ < Inf))
|
||||||
|
|
||||||
|
goodEByComp = subset(eByComp, Sum > 10 & EmpiricalPofGQ < Inf)
|
||||||
|
|
||||||
|
print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), size=log10(Sum), color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
|
||||||
|
print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), facets = . ~ pGGivenDType, color=rg, geom=c("line"), group=rg, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue