Simple R script to visualize geontype likelihood accuracy
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5805 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fa75efb6ac
commit
5e9c0d00c6
|
|
@ -0,0 +1,52 @@
|
|||
require("lattice")
|
||||
require("ggplot2")
|
||||
|
||||
READ_DATA = F
|
||||
|
||||
if ( READ_DATA ) {
|
||||
d = read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T)
|
||||
#d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T)
|
||||
}
|
||||
|
||||
moltenCD = d
|
||||
#moltenCD = melt(d, id.vars=c("comp", "rg"), measure.vars=c("QofAAGivenD", "QofABGivenD", "QofBBGivenD"))
|
||||
#moltenCD$log10value = round(-10*log10(1-10^moltenCD$value))
|
||||
|
||||
genotypeCounts <- function(x) {
|
||||
#print(table(x$comp))
|
||||
type = unique(x$variable)[1]
|
||||
#print(type)
|
||||
t = addmargins(table(x$comp))
|
||||
return(t)
|
||||
}
|
||||
|
||||
addEmpiricalPofG <- function(d) {
|
||||
r = c()
|
||||
for ( i in 1:dim(d)[1] ) {
|
||||
row = d[i,]
|
||||
#print(row)
|
||||
if ( row$pGGivenDType == "QofAAGivenD" ) v = row$HOM_REF
|
||||
if ( row$pGGivenDType == "QofABGivenD" ) v = row$HET
|
||||
if ( row$pGGivenDType == "QofBBGivenD" ) v = row$HOM_VAR
|
||||
#print(v)
|
||||
#print(row$Sum)
|
||||
r = c(r, v / row$Sum)
|
||||
#print(r)
|
||||
}
|
||||
|
||||
#print(length(r))
|
||||
d$EmpiricalPofG = r
|
||||
d$EmpiricalPofGQ = round(-10*log10(1-r))
|
||||
return(d)
|
||||
}
|
||||
|
||||
eByComp <- addEmpiricalPofG(ddply(moltenCD, .(rg, pGGivenDType, pGGivenD), genotypeCounts))
|
||||
print(subset(eByComp, EmpiricalPofGQ < Inf))
|
||||
|
||||
goodEByComp = subset(eByComp, Sum > 10 & EmpiricalPofGQ < Inf)
|
||||
|
||||
print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), size=log10(Sum), color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
|
||||
print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), facets = . ~ pGGivenDType, color=rg, geom=c("line"), group=rg, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue