2010-07-09 08:13:18 +08:00
|
|
|
#!/bin/env Rscript
|
2010-03-17 03:45:02 +08:00
|
|
|
|
|
|
|
|
args <- commandArgs(TRUE)
|
|
|
|
|
verbose = TRUE
|
|
|
|
|
|
|
|
|
|
input = args[1]
|
|
|
|
|
targetTITV = as.numeric(args[2])
|
|
|
|
|
|
2010-08-07 21:58:59 +08:00
|
|
|
# -----------------------------------------------------------------------------------------------
|
|
|
|
|
# Useful general routines
|
|
|
|
|
# -----------------------------------------------------------------------------------------------
|
2010-03-17 03:45:02 +08:00
|
|
|
|
2010-08-07 21:58:59 +08:00
|
|
|
MIN_FP_RATE = 0.01
|
|
|
|
|
|
|
|
|
|
titvFPEst <- function(titvExpected, titvObserved) {
|
|
|
|
|
max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), MIN_FP_RATE)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
titvFPEstV <- function(titvExpected, titvs) {
|
|
|
|
|
sapply(titvs, function(x) titvFPEst(titvExpected, x))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nTPFP <- function(nVariants, FDR) {
|
|
|
|
|
return(list(TP = nVariants * (1 - FDR/100), FP = nVariants * (FDR / 100)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
leftShift <- function(x, leftValue = 0) {
|
|
|
|
|
r = rep(leftValue, length(x))
|
|
|
|
|
for ( i in 1:(length(x)-1) ) {
|
|
|
|
|
print(list(i=i))
|
|
|
|
|
r[i] = x[i+1]
|
|
|
|
|
}
|
|
|
|
|
r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------------------------
|
|
|
|
|
# optimization curve
|
|
|
|
|
# -----------------------------------------------------------------------------------------------
|
2010-03-17 03:45:02 +08:00
|
|
|
data = read.table(input,sep=",",head=T)
|
|
|
|
|
maxVars = max(data$numKnown, data$numNovel)
|
|
|
|
|
maxTITV = max(data$knownTITV[is.finite(data$knownTITV) & data$numKnown>2000], data$novelTITV[is.finite(data$novelTITV) & data$numNovel > 2000], targetTITV)
|
2010-03-18 20:46:47 +08:00
|
|
|
maxTITV = min(maxTITV, targetTITV + 1)
|
2010-03-17 03:45:02 +08:00
|
|
|
minTITV = min(data$knownTITV[length(data$knownTITV)], data$novelTITV[length(data$novelTITV)], targetTITV)
|
2010-03-18 04:41:42 +08:00
|
|
|
maxPCut = max(data$pCut[data$numKnown>0 | data$numNovel>0])
|
2010-03-17 03:45:02 +08:00
|
|
|
|
|
|
|
|
outfile = paste(input, ".optimizationCurve.pdf", sep="")
|
|
|
|
|
pdf(outfile, height=7, width=8)
|
|
|
|
|
|
|
|
|
|
par(mar=c(4,4,1,4),cex=1.3)
|
2010-03-18 04:41:42 +08:00
|
|
|
plot(data$pCut, data$knownTITV, axes=F,xlab="Keep variants with QUAL >= X",ylab="",ylim=c(minTITV,maxTITV),xlim=c(0,maxPCut),col="Blue",pch=20)
|
2010-03-17 03:45:02 +08:00
|
|
|
points(data$pCut, data$novelTITV,,col="DarkBlue",pch=20)
|
|
|
|
|
abline(h=targetTITV,lty=3,col="Blue")
|
|
|
|
|
axis(side=2,col="DarkBlue")
|
|
|
|
|
axis(side=1)
|
|
|
|
|
mtext("Ti/Tv Ratio", side=2, line=2, col="blue",cex=1.4)
|
|
|
|
|
legend("left", c("Known Ti/Tv","Novel Ti/Tv"), col=c("Blue","DarkBlue"), pch=c(20,20),cex=0.7)
|
|
|
|
|
par(new=T)
|
2010-03-18 04:41:42 +08:00
|
|
|
plot(data$pCut, data$numKnown, axes=F,xlab="",ylab="",ylim=c(0,maxVars),xlim=c(0,maxPCut),col="Green",pch=20)
|
2010-03-17 03:45:02 +08:00
|
|
|
points(data$pCut, data$numNovel,col="DarkGreen",pch=20)
|
|
|
|
|
axis(side=4,col="DarkGreen")
|
|
|
|
|
mtext("Number of Variants", side=4, line=2, col="DarkGreen",cex=1.4)
|
|
|
|
|
legend("topright", c("Known","Novel"), col=c("Green","DarkGreen"), pch=c(20,20),cex=0.7)
|
2010-06-02 23:03:00 +08:00
|
|
|
dev.off()
|
|
|
|
|
|
2010-08-07 21:58:59 +08:00
|
|
|
# -----------------------------------------------------------------------------------------------
|
|
|
|
|
# Tranches plot
|
|
|
|
|
# -----------------------------------------------------------------------------------------------
|
2010-06-02 23:03:00 +08:00
|
|
|
data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
|
2010-08-07 21:58:59 +08:00
|
|
|
cols = c("cornflowerblue", "cornflowerblue", "darkorange", "darkorange")
|
|
|
|
|
density=c(20, -1, -1, 20)
|
2010-06-02 23:03:00 +08:00
|
|
|
outfile = paste(input, ".FDRtranches.pdf", sep="")
|
|
|
|
|
pdf(outfile, height=7, width=8)
|
2010-08-07 21:58:59 +08:00
|
|
|
alpha = 1 - titvFPEstV(targetTITV, data2$novelTITV)
|
|
|
|
|
print(alpha)
|
|
|
|
|
|
2010-06-02 23:03:00 +08:00
|
|
|
numGood = round(alpha * data2$numNovel);
|
2010-08-07 21:58:59 +08:00
|
|
|
|
|
|
|
|
#numGood = round(data2$numNovel * (1-data2$FDRtranche/100))
|
2010-06-02 23:03:00 +08:00
|
|
|
numBad = data2$numNovel - numGood;
|
2010-08-07 21:58:59 +08:00
|
|
|
|
|
|
|
|
numPrevGood = leftShift(numGood, 0)
|
|
|
|
|
numNewGood = numGood - numPrevGood
|
|
|
|
|
numPrevBad = leftShift(numBad, 0)
|
|
|
|
|
numNewBad = numBad - numPrevBad
|
|
|
|
|
|
|
|
|
|
d=matrix(c(numPrevGood,numNewGood, numNewBad, numPrevBad),4,byrow=TRUE)
|
|
|
|
|
print(d)
|
|
|
|
|
barplot(d/1000,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants (1000s)",ylab="Novel Ti/Tv --> FDR (%)", density=density) # , xlim=c(250000,350000))
|
|
|
|
|
#abline(v= d[2,dim(d)[2]], lty=2)
|
|
|
|
|
#abline(v= d[1,3], lty=2)
|
|
|
|
|
legend(10000/1000, 2.25, c('Cumulative TPs','Tranch-specific TPs', 'Tranch-specific FPs', 'Cumulative FPs' ), fill=cols, density=density, bg='white', cex=1.25)
|
2010-06-02 23:03:00 +08:00
|
|
|
axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
|
|
|
|
|
axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
|
|
|
|
|
dev.off()
|
2010-08-07 21:58:59 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
#data2 = read.table(paste(input,".tranches",sep=""),sep=",",head=T)
|
|
|
|
|
#cols = c("steelblue","orange")
|
|
|
|
|
#outfile = paste(input, ".FDRtranches.pdf", sep="")
|
|
|
|
|
#pdf(outfile, height=7, width=8)
|
|
|
|
|
#alpha = (data2$novelTITV - 0.5) / (targetTITV - 0.5);
|
|
|
|
|
#numGood = round(alpha * data2$numNovel);
|
|
|
|
|
#numBad = data2$numNovel - numGood;
|
|
|
|
|
#d=matrix(c(numGood,numBad),2,byrow=TRUE)
|
|
|
|
|
#barplot(d,horiz=TRUE,col=cols,space=0.2,xlab="Number of Novel Variants",ylab="Novel Ti/Tv --> FDR (%)")
|
|
|
|
|
#legend('topright',c('implied TP','implied FP'),col=cols,lty=1,lwd=16)
|
|
|
|
|
#axis(2,line=-1,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$FDRtranche)
|
|
|
|
|
#axis(2,line=0.4,at=0.7+(0:(length(data2$FDRtranche)-1))*1.2,tick=FALSE,labels=data2$novelTITV)
|
|
|
|
|
#dev.off()
|