48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
print("opening...")
|
|
ea_vcf = open("/humgen/gsa-hpprojects/analysis/privateMutations/eomi+autism/resources/callsets/eomi+autism/eomi+autism_batch.merged.vcf")
|
|
print("reading past header...")
|
|
ln = ea_vcf.readline()
|
|
import random
|
|
while (ln.startswith("#")):
|
|
ln = ea_vcf.readline()
|
|
|
|
numcalc = 0
|
|
nread = -1
|
|
|
|
def getAC(e):
|
|
if ( e.startswith("0/0") ):
|
|
return 0
|
|
elif ( e.startswith("0/1") or e.startswith("1/0") ):
|
|
return 1
|
|
elif ( e.startswith("1/1") ):
|
|
return 2
|
|
else:
|
|
print("Warning: "+e)
|
|
return 0
|
|
|
|
def calcTrans(line):
|
|
spline = line.strip().split("\t")
|
|
gtypes = filter(lambda y: y.find("./.") == -1, spline[9:len(spline)])
|
|
if ( len(gtypes) < 1800 ):
|
|
return (-1,-1)
|
|
random.shuffle(gtypes)
|
|
firstAC = reduce(lambda x,y: x + y , map(lambda u: getAC(u),gtypes[0:900]))
|
|
if ( firstAC > 5 ):
|
|
return (-1,-1)
|
|
secondAC = reduce(lambda x,y: x + y, map(lambda u: getAC(u),gtypes[900:1800]))
|
|
return (firstAC,secondAC)
|
|
|
|
print("Calculating...")
|
|
counts = filter(lambda u: u[0] > -1, map(lambda z: calcTrans(z) ,ea_vcf.readlines()))
|
|
print("Lines actually processed: %d" % len(counts))
|
|
|
|
cdict = dict()
|
|
for c in counts:
|
|
if ( not c in cdict ):
|
|
cdict[c] = 0
|
|
cdict[c] += 1
|
|
|
|
out = open("posterior_counts.txt",'w')
|
|
for c in cdict:
|
|
out.write("%d\t%d\t%d\n" % (c[0],c[1],cdict[c]))
|