Checking in Michael's DoC parameterization script;

this functionality will eventually be moved into VariantFiltration


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1515 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-09-03 15:07:49 +00:00
parent 515fc7c476
commit 3ac5ac066f
1 changed files with 47 additions and 0 deletions

View File

@ -0,0 +1,47 @@
#!/util/bin/python
import math, string, sys
for input_file in sys.argv[1:]:
FILENAME = input_file
# Read the depth of coverage distribution from file
depth = []; count = []
for line in open(FILENAME):
try:
int(line[0])
split = string.split(line,' ')
if int(split[1]) != 0:
depth.append(int(split[0]))
count.append(int(split[1]))
except ValueError: pass
# Calculate the mode
mode = depth[count.index(max(count))]
# Find the index of maximum extent of 'good' data
idx = depth.index(mode); dist = 10**9
while abs(count[idx] - count[0]) < dist:
dist = abs(count[idx] - count[0])
idx += 1
model_count = count[:idx + 1]
# Calculate mean & variance of 'good' data
model_tot = sum(model_count); adder = 0
for i in range(len(model_count)): adder += depth[i]*model_count[i]
mean = adder/float(model_tot)
adder = 0
for i in range(len(model_count)): adder += model_count[i]*(mean - depth[i])**2
var = adder/float(model_tot)
stdev = var**0.5
# Output Thresholds
print FILENAME
print 'Mean Depth of Coverage: ' + str(mean)
print 'Plus One Std Dev: ' + str(mean + stdev)
print 'Plus Two Std Dev: ' + str(mean + 2*stdev)
print 'Plus Three Std Dev: ' + str(mean + 3*stdev)
print 'Plus Four Std Dev: ' + str(mean + 4*stdev)
print 'Plus Five Std Dev: ' + str(mean + 5*stdev)