2010-08-10 20:02:52 +08:00
titvFPEst <- function ( titvExpected , titvObserved ) { max ( min ( 1 - ( titvObserved - 0.5 ) / ( titvExpected - 0.5 ) , 1 ) , 0.001 ) }
titvFPEstV <- function ( titvExpected , titvs ) {
sapply ( titvs , function ( x ) titvFPEst ( titvExpected , x ) )
}
2010-07-01 00:38:49 +08:00
calcHet <- function ( nknown , knownTiTv , nnovel , novelTiTv , callable ) {
TP <- nknown + ( 1 - titvFPEst ( knownTiTv , novelTiTv ) ) * nnovel
2 * TP / 3 / callable
}
marginalTiTv <- function ( nx , titvx , ny , titvy ) {
tvx = nx / ( titvx + 1 )
tix = nx - tvx
tvy = ny / ( titvy + 1 )
tiy = ny - tvy
tiz = tix - tiy
tvz = tvx - tvy
return ( tiz / tvz )
}
marginaldbSNPRate <- function ( nx , dbx , ny , dby ) {
knownx = nx * dbx / 100
novelx = nx - knownx
knowny = ny * dby / 100
novely = ny - knowny
knownz = knownx - knowny
novelz = novelx - novely
return ( knownz / ( knownz + novelz ) * 100 )
}
numExpectedCalls <- function ( L , theta , calledFractionOfRegion , nIndividuals , dbSNPRate ) {
nCalls <- L * theta * calledFractionOfRegion * sum ( 1 / seq ( 1 , 2 * nIndividuals ) )
return ( list ( nCalls = nCalls , nKnown = dbSNPRate * nCalls , nNovel = ( 1 - dbSNPRate ) * nCalls ) )
}
normalize <- function ( x ) {
x / sum ( x )
}
normcumsum <- function ( x ) {
cumsum ( normalize ( x ) )
}
2011-01-03 07:32:20 +08:00
cumhist <- function ( d , ... ) {
plot ( d [order ( d ) ] , type = " b" , col = " orange" , lwd = 2 , ... )
2010-07-01 00:38:49 +08:00
}
2010-07-09 08:13:18 +08:00
revcumsum <- function ( x ) {
return ( rev ( cumsum ( rev ( x ) ) ) )
}
2010-09-01 04:32:22 +08:00
phred <- function ( x ) {
log10 ( max ( x , 10 ^ ( -9.9 ) ) ) * -10
}
pOfB <- function ( b , B , Q ) {
#print(paste(b, B, Q))
p = 1 - 10 ^ ( - Q / 10 )
if ( b == B )
return ( p )
else
return ( 1 - p )
}
pOfG <- function ( bs , qs , G ) {
a1 = G [1 ]
a2 = G [2 ]
log10p = 0
for ( i in 1 : length ( bs ) ) {
b = bs [i ]
q = qs [i ]
p1 = pOfB ( b , a1 , q ) / 2 + pOfB ( b , a2 , q ) / 2
log10p = log10p + log10 ( p1 )
}
return ( log10p )
}
pOfGs <- function ( nAs , nBs , Q ) {
bs = c ( rep ( " a" , nAs ) , rep ( " t" , nBs ) )
qs = rep ( Q , nAs + nBs )
G1 = c ( " a" , " a" )
G2 = c ( " a" , " t" )
G3 = c ( " t" , " t" )
log10p1 = pOfG ( bs , qs , G1 )
log10p2 = pOfG ( bs , qs , G2 )
log10p3 = pOfG ( bs , qs , G3 )
Qsample = phred ( 1 - 10 ^log10p2 / sum ( 10 ^ ( c ( log10p1 , log10p2 , log10p3 ) ) ) )
return ( list ( p1 = log10p1 , p2 = log10p2 , p3 = log10p3 , Qsample = Qsample ) )
}
QsampleExpected <- function ( depth , Q ) {
weightedAvg = 0
for ( d in 1 : ( depth * 3 ) ) {
Qsample = 0
pOfD = dpois ( d , depth )
for ( nBs in 0 : d ) {
pOfnB = dbinom ( nBs , d , 0.5 )
nAs = d - nBs
Qsample = pOfGs ( nAs , nBs , Q ) $ Qsample
#Qsample = 1
weightedAvg = weightedAvg + Qsample * pOfD * pOfnB
print ( as.data.frame ( list ( d = d , nBs = nBs , pOfD = pOfD , pOfnB = pOfnB , Qsample = Qsample , weightedAvg = weightedAvg ) ) )
}
}
return ( weightedAvg )
}
plotQsamples <- function ( depths , Qs , Qmax ) {
cols = rainbow ( length ( Qs ) )
plot ( depths , rep ( Qmax , length ( depths ) ) , type = " n" , ylim = c ( 0 , Qmax ) , xlab = " Average sequencing coverage" , ylab = " Qsample" , main = " Expected Qsample values, including depth and allele sampling" )
for ( i in 1 : length ( Qs ) ) {
Q = Qs [i ]
y = as.numeric ( lapply ( depths , function ( x ) QsampleExpected ( x , Q ) ) )
points ( depths , y , col = cols [i ] , type = " b" )
}
legend ( " topleft" , paste ( " Q" , Qs ) , fill = cols )
2010-12-23 04:16:41 +08:00
}
pCallHetGivenDepth <- function ( depth , nallelesToCall ) {
depths = 0 : ( 2 * depth )
pNoAllelesToCall = apply ( as.matrix ( depths ) , 1 , function ( d ) sum ( dbinom ( 0 : nallelesToCall , d , 0.5 ) ) )
dpois ( depths , depth ) * ( 1 - pNoAllelesToCall )
}
pCallHets <- function ( depth , nallelesToCall ) {
sum ( pCallHetGivenDepth ( depth , nallelesToCall ) )
}
pCallHetMultiSample <- function ( depth , nallelesToCall , nsamples ) {
1 - ( 1 - pCallHets ( depth , nallelesToCall ) ) ^nsamples
}