From 672bee295c4b34625f7179e0d51c865b81d5869b Mon Sep 17 00:00:00 2001 From: depristo Date: Tue, 10 Aug 2010 12:02:52 +0000 Subject: [PATCH] now plots tranches separately from optimizer git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4000 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_OptimizationCurve.R | 74 ---------------------------------- R/plot_Tranches.R | 81 ++++++++++++++++++++++++++++++++++++++ R/titvFPEst.R | 6 ++- 3 files changed, 86 insertions(+), 75 deletions(-) create mode 100755 R/plot_Tranches.R diff --git a/R/plot_OptimizationCurve.R b/R/plot_OptimizationCurve.R index 70f068277..5eff8a34c 100755 --- a/R/plot_OptimizationCurve.R +++ b/R/plot_OptimizationCurve.R @@ -6,33 +6,6 @@ verbose = TRUE input = args[1] targetTITV = as.numeric(args[2]) -# ----------------------------------------------------------------------------------------------- -# Useful general routines -# ----------------------------------------------------------------------------------------------- - -MIN_FP_RATE = 0.01 - -titvFPEst <- function(titvExpected, titvObserved) { - max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE) -} - -titvFPEstV <- function(titvExpected, titvs) { - sapply(titvs, function(x) titvFPEst(titvExpected, x)) -} - -nTPFP <- function(nVariants, FDR) { - return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100))) -} - -leftShift <- function(x, leftValue = 0) { - r = rep(leftValue, length(x)) - for ( i in 1:(length(x)-1) ) { - print(list(i=i)) - r[i] = x[i+1] - } - r -} - # ----------------------------------------------------------------------------------------------- # optimization curve # ----------------------------------------------------------------------------------------------- @@ -61,50 +34,3 @@ axis(side=4,col="DarkGreen") mtext("Number of Variants", side=4, line=2, col="DarkGreen",cex=1.4) legend("topright", c("Known","Novel"), col=c("Green","DarkGreen"), pch=c(20,20),cex=0.7) dev.off() - -# ----------------------------------------------------------------------------------------------- -# Tranches plot -# ----------------------------------------------------------------------------------------------- -data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) -cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange") -density=c(20, -1, -1, 20) -outfile = paste(input, ".FDRtranches.pdf", sep="") -pdf(outfile, height=7, width=8) -alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV) -print(alpha) - -numGood = round(alpha * data2$numNovel); - -#numGood = round(data2$numNovel * (1-data2$FDRtranche/100)) -numBad = data2$numNovel - numGood; - -numPrevGood = leftShift(numGood, 0) -numNewGood = numGood - numPrevGood -numPrevBad = leftShift(numBad, 0) -numNewBad = numBad - numPrevBad - -d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE) -print(d) -barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)",ylab="Novel Ti/Tv --> FDR (%)", density=density) # , xlim=c(250000,350000)) -#abline(v= d[2,dim(d)[2]], lty=2) -#abline(v= d[1,3], lty=2) -legend(10000/1000, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25) -axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche) -axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV) -dev.off() - - -# -#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) -#cols = c("steelblue","orange") -#outfile = paste(input, ".FDRtranches.pdf", sep="") -#pdf(outfile, height=7, width=8) -#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5); -#numGood = round(alpha * data2$numNovel); -#numBad = data2$numNovel - numGood; -#d=matrix(c(numGood,numBad),2,byrow=TRUE) -#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv --> FDR (%)") -#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16) -#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche) -#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV) -#dev.off() diff --git a/R/plot_Tranches.R b/R/plot_Tranches.R new file mode 100755 index 000000000..839c8973c --- /dev/null +++ b/R/plot_Tranches.R @@ -0,0 +1,81 @@ +#!/bin/env Rscript + +args <- commandArgs(TRUE) +verbose = TRUE + +input = args[1] +targetTITV = as.numeric(args[2]) + +# ----------------------------------------------------------------------------------------------- +# Useful general routines +# ----------------------------------------------------------------------------------------------- + +MIN_FP_RATE = 0.01 + +titvFPEst <- function(titvExpected, titvObserved) { + max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE) +} + +titvFPEstV <- function(titvExpected, titvs) { + sapply(titvs, function(x) titvFPEst(titvExpected, x)) +} + +nTPFP <- function(nVariants, FDR) { + return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100))) +} + +leftShift <- function(x, leftValue = 0) { + r = rep(leftValue, length(x)) + for ( i in 1:(length(x)-1) ) { + #print(list(i=i)) + r[i] = x[i+1] + } + r +} + +# ----------------------------------------------------------------------------------------------- +# Tranches plot +# ----------------------------------------------------------------------------------------------- +data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) +cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange") +density=c(20, -1, -1, 20) +outfile = paste(input, ".FDRtranches.pdf", sep="") +pdf(outfile, height=7, width=8) +alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV) +#print(alpha) + +numGood = round(alpha * data2$numNovel); + +#numGood = round(data2$numNovel * (1-data2$FDRtranche/100)) +numBad = data2$numNovel - numGood; + +numPrevGood = leftShift(numGood, 0) +numNewGood = numGood - numPrevGood +numPrevBad = leftShift(numBad, 0) +numNewBad = numBad - numPrevBad + +d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE) +#print(d) +barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)",ylab="Novel Ti/Tv --> FDR (%)", density=density) # , xlim=c(250000,350000)) +#abline(v= d[2,dim(d)[2]], lty=2) +#abline(v= d[1,3], lty=2) +legend(10000/1000, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25) +axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche) +axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV) +dev.off() + + +# +#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T) +#cols = c("steelblue","orange") +#outfile = paste(input, ".FDRtranches.pdf", sep="") +#pdf(outfile, height=7, width=8) +#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5); +#numGood = round(alpha * data2$numNovel); +#numBad = data2$numNovel - numGood; +#d=matrix(c(numGood,numBad),2,byrow=TRUE) +#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv --> FDR (%)") +#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16) +#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche) +#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV) +#dev.off() diff --git a/R/titvFPEst.R b/R/titvFPEst.R index 09c0774e3..e6a0d4413 100755 --- a/R/titvFPEst.R +++ b/R/titvFPEst.R @@ -1,4 +1,8 @@ -titvFPEst <- function(titvExpected, titvObserved) { 1 - (titvObserved - 0.5) / (titvExpected - 0.5) } +titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) } + +titvFPEstV <- function(titvExpected, titvs) { + sapply(titvs, function(x) titvFPEst(titvExpected, x)) +} calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) { TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel