From 672bee295c4b34625f7179e0d51c865b81d5869b Mon Sep 17 00:00:00 2001
From: depristo <depristo@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Tue, 10 Aug 2010 12:02:52 +0000
Subject: [PATCH] now plots tranches separately from optimizer

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4000 348d0f76-0448-11de-a6fe-93d51630548a
---
 R/plot_OptimizationCurve.R | 74 ----------------------------------
 R/plot_Tranches.R          | 81 ++++++++++++++++++++++++++++++++++++++
 R/titvFPEst.R              |  6 ++-
 3 files changed, 86 insertions(+), 75 deletions(-)
 create mode 100755 R/plot_Tranches.R

diff --git a/R/plot_OptimizationCurve.R b/R/plot_OptimizationCurve.R
index 70f068277..5eff8a34c 100755
--- a/R/plot_OptimizationCurve.R
+++ b/R/plot_OptimizationCurve.R
@@ -6,33 +6,6 @@ verbose = TRUE
 input = args[1]
 targetTITV = as.numeric(args[2])
 
-# -----------------------------------------------------------------------------------------------
-# Useful general routines
-# -----------------------------------------------------------------------------------------------
-
-MIN_FP_RATE = 0.01
-
-titvFPEst <- function(titvExpected, titvObserved) { 
-    max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE) 
-}
-
-titvFPEstV <- function(titvExpected, titvs) {
-    sapply(titvs, function(x) titvFPEst(titvExpected, x))
-}
-
-nTPFP <- function(nVariants, FDR) {
-    return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100)))
-}
-
-leftShift <- function(x, leftValue = 0) {
-    r = rep(leftValue, length(x))
-    for ( i in 1:(length(x)-1) ) {
-        print(list(i=i))
-        r[i] = x[i+1]
-    }
-    r
-}
-
 # -----------------------------------------------------------------------------------------------
 # optimization curve
 # -----------------------------------------------------------------------------------------------
@@ -61,50 +34,3 @@ axis(side=4,col="DarkGreen")
 mtext("Number of Variants", side=4, line=2, col="DarkGreen",cex=1.4)
 legend("topright", c("Known","Novel"), col=c("Green","DarkGreen"), pch=c(20,20),cex=0.7)
 dev.off()
-
-# -----------------------------------------------------------------------------------------------
-# Tranches plot
-# -----------------------------------------------------------------------------------------------
-data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
-cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange")
-density=c(20, -1, -1, 20)
-outfile = paste(input, ".FDRtranches.pdf", sep="")
-pdf(outfile, height=7, width=8)
-alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV)
-print(alpha)
-
-numGood = round(alpha * data2$numNovel);
-
-#numGood = round(data2$numNovel * (1-data2$FDRtranche/100))
-numBad = data2$numNovel - numGood;
-
-numPrevGood = leftShift(numGood, 0)
-numNewGood = numGood - numPrevGood
-numPrevBad = leftShift(numBad, 0)
-numNewBad = numBad - numPrevBad
-
-d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE)
-print(d)
-barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)",ylab="Novel Ti/Tv   -->   FDR (%)", density=density) # , xlim=c(250000,350000))
-#abline(v= d[2,dim(d)[2]], lty=2)
-#abline(v= d[1,3], lty=2)
-legend(10000/1000, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
-axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
-axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
-dev.off()
-
-
-#
-#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
-#cols = c("steelblue","orange")
-#outfile = paste(input, ".FDRtranches.pdf", sep="")
-#pdf(outfile, height=7, width=8)
-#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5);
-#numGood = round(alpha * data2$numNovel);
-#numBad = data2$numNovel - numGood;
-#d=matrix(c(numGood,numBad),2,byrow=TRUE)
-#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv   -->   FDR (%)")
-#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16)
-#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
-#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
-#dev.off()
diff --git a/R/plot_Tranches.R b/R/plot_Tranches.R
new file mode 100755
index 000000000..839c8973c
--- /dev/null
+++ b/R/plot_Tranches.R
@@ -0,0 +1,81 @@
+#!/bin/env Rscript
+
+args <- commandArgs(TRUE)
+verbose = TRUE
+
+input = args[1]
+targetTITV = as.numeric(args[2])
+
+# -----------------------------------------------------------------------------------------------
+# Useful general routines
+# -----------------------------------------------------------------------------------------------
+
+MIN_FP_RATE = 0.01
+
+titvFPEst <- function(titvExpected, titvObserved) { 
+    max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE) 
+}
+
+titvFPEstV <- function(titvExpected, titvs) {
+    sapply(titvs, function(x) titvFPEst(titvExpected, x))
+}
+
+nTPFP <- function(nVariants, FDR) {
+    return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100)))
+}
+
+leftShift <- function(x, leftValue = 0) {
+    r = rep(leftValue, length(x))
+    for ( i in 1:(length(x)-1) ) {
+        #print(list(i=i))
+        r[i] = x[i+1]
+    }
+    r
+}
+
+# -----------------------------------------------------------------------------------------------
+# Tranches plot
+# -----------------------------------------------------------------------------------------------
+data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
+cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange")
+density=c(20, -1, -1, 20)
+outfile = paste(input, ".FDRtranches.pdf", sep="")
+pdf(outfile, height=7, width=8)
+alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV)
+#print(alpha)
+
+numGood = round(alpha * data2$numNovel);
+
+#numGood = round(data2$numNovel * (1-data2$FDRtranche/100))
+numBad = data2$numNovel - numGood;
+
+numPrevGood = leftShift(numGood, 0)
+numNewGood = numGood - numPrevGood
+numPrevBad = leftShift(numBad, 0)
+numNewBad = numBad - numPrevBad
+
+d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE)
+#print(d)
+barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)",ylab="Novel Ti/Tv   -->   FDR (%)", density=density) # , xlim=c(250000,350000))
+#abline(v= d[2,dim(d)[2]], lty=2)
+#abline(v= d[1,3], lty=2)
+legend(10000/1000, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
+axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
+axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
+dev.off()
+
+
+#
+#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
+#cols = c("steelblue","orange")
+#outfile = paste(input, ".FDRtranches.pdf", sep="")
+#pdf(outfile, height=7, width=8)
+#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5);
+#numGood = round(alpha * data2$numNovel);
+#numBad = data2$numNovel - numGood;
+#d=matrix(c(numGood,numBad),2,byrow=TRUE)
+#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv   -->   FDR (%)")
+#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16)
+#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
+#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
+#dev.off()
diff --git a/R/titvFPEst.R b/R/titvFPEst.R
index 09c0774e3..e6a0d4413 100755
--- a/R/titvFPEst.R
+++ b/R/titvFPEst.R
@@ -1,4 +1,8 @@
-titvFPEst <- function(titvExpected, titvObserved) { 1 - (titvObserved - 0.5) / (titvExpected - 0.5) }
+titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) }
+
+titvFPEstV <- function(titvExpected, titvs) {
+    sapply(titvs, function(x) titvFPEst(titvExpected, x))
+}
 
 calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) {
   TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel