From c167c6f9eba635ba171818c187d8ddc21c3d4295 Mon Sep 17 00:00:00 2001 From: fromer Date: Tue, 14 Dec 2010 18:44:59 +0000 Subject: [PATCH] Calculate the phasing probabilities for particular intra-het distances git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4838 348d0f76-0448-11de-a6fe-93d51630548a --- R/phasing/RBP_theoretical.R | 1 + .../calcPhasingProbsForWindowDistances.R | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 R/phasing/calcPhasingProbsForWindowDistances.R diff --git a/R/phasing/RBP_theoretical.R b/R/phasing/RBP_theoretical.R index fcfd98fc8..e93550b33 100644 --- a/R/phasing/RBP_theoretical.R +++ b/R/phasing/RBP_theoretical.R @@ -122,6 +122,7 @@ pPhaseHetPairAtDistanceUsingDepthAndWindow <- function(windowDistances, phaseInd if (phaseIndex < 2 || phaseIndex > n) { stop("phaseIndex < 2 || phaseIndex > n") } + #print(paste("windowDistances= (", paste(windowDistances, collapse=", "), ")", sep="")) # A. Pre-compute the upper diagonal of square matrix of n CHOOSE 2 values of: # pDirectlyPhaseHetPairAtDistanceUsingDepth(meanDepth, nReadsToPhase, L, dist(i,j), Im, Is) diff --git a/R/phasing/calcPhasingProbsForWindowDistances.R b/R/phasing/calcPhasingProbsForWindowDistances.R new file mode 100644 index 000000000..2de4dbc30 --- /dev/null +++ b/R/phasing/calcPhasingProbsForWindowDistances.R @@ -0,0 +1,47 @@ +calcPhasingProbsForWindowDistances <- function(distances, MAX_WINDOW_SIZE, FILE_NAME = NULL) { + WINDOW_SIZES = 2:MAX_WINDOW_SIZE + + phaseProbsPositionWindow = matrix(data = NA, nrow=length(distances), ncol=length(WINDOW_SIZES)) + + for (i in 1:length(distances)) { + # Try to phase (i+1)-st position [relative to i] using varying window sizes: + for (j in 1:length(WINDOW_SIZES)) { + windowSize = WINDOW_SIZES[j] + remainingSize = windowSize - 2 # exlcude i, i+1 + + numOnLeft = i - 1 + numOnRight = (length(distances) + 1) - (i + 2) + 1 + + if (numOnLeft <= numOnRight) { + halfToUse = floor(remainingSize / 2) # skimp on the left [floor], and be generous with the right side + useOnLeft = min(halfToUse, numOnLeft) + useOnRight = min(remainingSize - useOnLeft, numOnRight) + } + else { + halfToUse = ceiling(remainingSize / 2) # be generous with the right side [ceiling] + useOnRight = min(halfToUse, numOnRight) + useOnLeft = min(remainingSize - useOnRight, numOnLeft) + } + startInd = i - useOnLeft # go left from position i + stopInd = i + 1 + useOnRight # go right from position i + 1 + + usePositionRange = seq(from=startInd, to=stopInd, by=1) + useDistancesRange = seq(from=startInd, to=stopInd-1, by=1) # since there are N-1 distances between N consecutive positions + + phaseIndex = which(usePositionRange == i+1) + if (length(phaseIndex) != 1) stop("NO phaseIndex!") + windowDistances = distances[useDistancesRange] + + print(paste("Try to phase position ", i+1, " [relative to ", i, "] using positions: (", paste(usePositionRange, collapse=", "), "), windowDistances= (", paste(windowDistances, collapse=", "), "), [phaseIndex= ", phaseIndex, ", i=", i, "]", sep="")) + p = pPhaseHetPairAtDistanceUsingDepthAndWindow(windowDistances, phaseIndex, meanDepth, nReadsToPhase, L, Im, Is) + print(paste("phase prob: ", p, sep="")) + phaseProbsPositionWindow[i, j] = p + } + + if (!is.null(FILE_NAME)) { + save(list = ls(all=TRUE), file = paste(FILE_NAME, ".RData", sep="")) + } + } + + phaseProbsPositionWindow +}