Added probability bound on phasing paths, which slightly speeds up calculations. It seems that a real speed-up can only be achieved by considering fewer paths by doing some form of caching of sub-problems (e.g., dynamic programming or matrix multiplication, as Mark suggested)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4832 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2010-12-13 23:53:56 +00:00
parent f36861eeee
commit 4403b9d276
1 changed files with 6 additions and 1 deletions

View File

@ -108,7 +108,7 @@ pDirectlyPhaseHetPair <- function(meanDepth, nReadsToPhase, L, theta, Im, Is) {
}
# Probability (over locations of sites on reads, insert sizes, and read depths) that paired-end reads can TRANSITIVELY phase phaseIndex relative to phaseIndex - 1, given a window of length(windowDistances)+1 het sites at distances given by windowDistances (where an edge in the transitive path requires at least nReadsToPhase reads):
pPhaseHetPairAtDistanceUsingDepthAndWindow <- function(windowDistances, phaseIndex, meanDepth, nReadsToPhase, L, Im, Is) {
pPhaseHetPairAtDistanceUsingDepthAndWindow <- function(windowDistances, phaseIndex, meanDepth, nReadsToPhase, L, Im, Is, MIN_PATH_PROB = 10^-6) {
n = length(windowDistances) + 1 # the window size
if (phaseIndex < 2 || phaseIndex > n) {
stop("phaseIndex < 2 || phaseIndex > n")
@ -164,6 +164,11 @@ pPhaseHetPairAtDistanceUsingDepthAndWindow <- function(windowDistances, phaseInd
pSpecificPathPhases = 1
for (j in seq(from=1, to=length(path)-1, by=1)) {
pSpecificPathPhases = pSpecificPathPhases * pPhasePair[path[j], path[j+1]]
if (pSpecificPathPhases < MIN_PATH_PROB) { # Do a "bounded" calculation [any path that is ALREADY of low probability can be discarded]:
#print(paste("pSpecificPathPhases= ", pSpecificPathPhases, sep=""))
pSpecificPathPhases = 0
break
}
}
pWindowNotPhasing = pWindowNotPhasing * (1 - pSpecificPathPhases)