gatk-3.8/python/validatePosterior.py

print("opening...")
ea_vcf = open("/humgen/gsa-hpprojects/analysis/privateMutations/eomi+autism/resources/callsets/eomi+autism/eomi+autism_batch.merged.vcf")
print("reading past header...")
ln = ea_vcf.readline()
import random
while (ln.startswith("#")):
    ln = ea_vcf.readline()

numcalc = 0
nread = -1

def getAC(e):
    if ( e.startswith("0/0") ):
        return 0
    elif ( e.startswith("0/1") or e.startswith("1/0") ):
        return 1
    elif ( e.startswith("1/1") ):
        return 2
    else:
        print("Warning: "+e)
        return 0

def calcTrans(line):
    spline = line.strip().split("\t")
    gtypes = filter(lambda y: y.find("./.") == -1, spline[9:len(spline)])
    if ( len(gtypes) < 1800 ):
        return (-1,-1)
    random.shuffle(gtypes)
    firstAC = reduce(lambda x,y: x + y , map(lambda u: getAC(u),gtypes[0:900]))
    if ( firstAC > 5 ):
        return (-1,-1)
    secondAC = reduce(lambda x,y: x + y, map(lambda u: getAC(u),gtypes[900:1800]))
    return (firstAC,secondAC)

print("Calculating...")
counts = filter(lambda u: u[0] > -1, map(lambda z: calcTrans(z) ,ea_vcf.readlines()))
print("Lines actually processed: %d" % len(counts))

cdict = dict()
for c in counts:
    if ( not c in cdict ):
        cdict[c] = 0
    cdict[c] += 1

out = open("posterior_counts.txt",'w')
for c in cdict:
    out.write("%d\t%d\t%d\n" % (c[0],c[1],cdict[c]))
Committing two pieces of code for exome analysis, in case they need be returned to git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5135 348d0f76-0448-11de-a6fe-93d51630548a 2011-01-31 22:13:09 +08:00			`print("opening...")`
			`ea_vcf = open("/humgen/gsa-hpprojects/analysis/privateMutations/eomi+autism/resources/callsets/eomi+autism/eomi+autism_batch.merged.vcf")`
			`print("reading past header...")`
			`ln = ea_vcf.readline()`
			`import random`
			`while (ln.startswith("#")):`
			`ln = ea_vcf.readline()`

			`numcalc = 0`
			`nread = -1`

			`def getAC(e):`
			`if ( e.startswith("0/0") ):`
			`return 0`
			`elif ( e.startswith("0/1") or e.startswith("1/0") ):`
			`return 1`
			`elif ( e.startswith("1/1") ):`
			`return 2`
			`else:`
			`print("Warning: "+e)`
			`return 0`

			`def calcTrans(line):`
			`spline = line.strip().split("\t")`
			`gtypes = filter(lambda y: y.find("./.") == -1, spline[9:len(spline)])`
			`if ( len(gtypes) < 1800 ):`
			`return (-1,-1)`
			`random.shuffle(gtypes)`
			`firstAC = reduce(lambda x,y: x + y , map(lambda u: getAC(u),gtypes[0:900]))`
			`if ( firstAC > 5 ):`
			`return (-1,-1)`
			`secondAC = reduce(lambda x,y: x + y, map(lambda u: getAC(u),gtypes[900:1800]))`
			`return (firstAC,secondAC)`

			`print("Calculating...")`
			`counts = filter(lambda u: u[0] > -1, map(lambda z: calcTrans(z) ,ea_vcf.readlines()))`
			`print("Lines actually processed: %d" % len(counts))`

			`cdict = dict()`
			`for c in counts:`
			`if ( not c in cdict ):`
			`cdict[c] = 0`
			`cdict[c] += 1`

			`out = open("posterior_counts.txt",'w')`
			`for c in cdict:`
			`out.write("%d\t%d\t%d\n" % (c[0],c[1],cdict[c]))`