A few more additions; almost done...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1541 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-09-07 01:50:22 +00:00
parent 5dbba6711c
commit e716f9337d
1 changed files with 15 additions and 15 deletions

View File

@ -51,7 +51,7 @@ if __name__ == "__main__":
samples = ["NA12878","NA12891","NA12892","NA19238","NA19239","NA19240"] samples = ["NA12878","NA12891","NA12892","NA19238","NA19239","NA19240"]
techs = ["SLX"] techs = ["SLX"]
chrs = range(1, 22) + ["X"] chrs = range(1, 23) + ["X"]
DoCs = [82,91,70,56,68,86] DoCs = [82,91,70,56,68,86]
# Official genome-wide Depth of Coverage tables for pilot 2, freeze 5: # Official genome-wide Depth of Coverage tables for pilot 2, freeze 5:
@ -60,7 +60,7 @@ if __name__ == "__main__":
# SLX: 82 91 70 56 68 86 # SLX: 82 91 70 56 68 86
# SOLID: 37 64 # SOLID: 37 64
# 454+SLD: 64 77 # 454+SLD: 64 77
# ALL: xx xx # ALL: 138 150
for sample, DoC in zip(samples, DoCs): for sample, DoC in zip(samples, DoCs):
# #
@ -69,12 +69,12 @@ if __name__ == "__main__":
MQs = [100,5,5] MQs = [100,5,5]
def finalBam(tech): def finalBam(tech):
return os.path.join(final_bam_dir, "%s.%s.bam" % ( sample, tech )) return os.path.join(final_bam_dir, "%s.%s.bam" % ( sample, tech ))
def outputFile(root, tech, name): def outputFileTech(root, tech, name):
return os.path.join(root, "%s.%s.%s" % ( sample, tech, name )) return os.path.join(root, "%s.%s.%s" % ( sample, tech, name ))
def badSnps( tech ): def badSnps( tech ):
return outputFile(cleaner_output, tech, "realigner.badsnps") return os.path.join(cleaner_output, "%s.%s.realigner.badsnps" % ( sample, tech ))
def indelsForFiltering( tech ): def indelsForFiltering( tech ):
return outputFile(indel_output, tech, "low.calls") return outputFileTech(indel_output, tech, "low.calls")
myTechs = techs myTechs = techs
if sample in ["NA12878", "NA19240"]: if sample in ["NA12878", "NA19240"]:
@ -85,7 +85,7 @@ if __name__ == "__main__":
myChrs = chrs myChrs = chrs
if sample in ["NA12891", "NA19239"]: if sample in ["NA12891", "NA19239"]:
myChrs = chrs + ["Y"] myChrs = chrs + ["Y"]
def badSnps( tech, chr ): def badSnpsChr( tech, chr ):
return os.path.join(cleaner_output, "%s.chr%s.%s.realigner.badsnps" % ( sample, chr, tech )) return os.path.join(cleaner_output, "%s.chr%s.%s.realigner.badsnps" % ( sample, chr, tech ))
for chr in myChrs: for chr in myChrs:
@ -123,7 +123,7 @@ if __name__ == "__main__":
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, mergedIntervalsFile, just_print_commands = OPTIONS.dry, waitID = jobid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, mergedIntervalsFile, just_print_commands = OPTIONS.dry, waitID = jobid)
cleanedFile = outputFile(cleaner_output, "bam") cleanedFile = outputFile(cleaner_output, "bam")
badsnpsFile = badSnps(tech) badsnpsFile = badSnpsChr(tech, str(chr))
cmd = CleanIntervals(bam, cleanedFile, mergedIntervalsFile, badsnpsFile) cmd = CleanIntervals(bam, cleanedFile, mergedIntervalsFile, badsnpsFile)
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, cleanedFile, just_print_commands = OPTIONS.dry, waitID = jobid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, cleanedFile, just_print_commands = OPTIONS.dry, waitID = jobid)
@ -140,7 +140,7 @@ if __name__ == "__main__":
cmd = "cat " cmd = "cat "
for chr in myChrs: for chr in myChrs:
cmd = cmd + " " + badSnps(tech, chr) cmd = cmd + " " + badSnpsChr(tech, chr)
cmd = cmd + " > " + badSnps(tech) cmd = cmd + " > " + badSnps(tech)
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, badSnps(tech), just_print_commands = OPTIONS.dry, waitID = mergeid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, badSnps(tech), just_print_commands = OPTIONS.dry, waitID = mergeid)
@ -149,12 +149,12 @@ if __name__ == "__main__":
def SnpCaller(bam, outputFile): def SnpCaller(bam, outputFile):
return config.gatkCmd('SingleSampleGenotyper') + " -o " + outputFile + " -I " + bam return config.gatkCmd('SingleSampleGenotyper') + " -o " + outputFile + " -I " + bam
def VarFiltration(bam, outputHead, snpcalls, badsnps, indelcalls, depth, mq): def VarFiltration(bam, outputHead, snpcalls, badsnps, indelcalls, depth, mq):
return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -I " + bam + " -B variant,Variants," + snpcalls + ",cleaned,CleanedOutSnp," + badsnps + ",indels,SimpleIndel," + indelcalls + " -X DepthOfCoverage:" + depth + " -X MappingQualityZero:" + mq return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -I " + bam + " -B variant,Variants," + snpcalls + ",cleaned,CleanedOutSnp," + badsnps + ",indels,SimpleIndel," + indelcalls + " -X DepthOfCoverage:max=" + depth + " -X MappingQualityZero:max=" + mq
def VarFiltration454(bam, outputHead, snpcalls, depth, mq): def VarFiltration454(bam, outputHead, snpcalls, depth, mq):
return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -I " + bam + " -B variant,Variants," + snpcalls + " -X DepthOfCoverage:" + depth + " -X MappingQualityZero:" + mq return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -I " + bam + " -B variant,Variants," + snpcalls + " -X DepthOfCoverage:max=" + depth + " -X MappingQualityZero:max=" + mq
indelsFileHigh = outputFile(indel_output, tech, "high.calls") indelsFileHigh = outputFileTech(indel_output, tech, "high.calls")
cmd = IndelCaller(bamToCallFrom, indelsFileHigh, "0.3") cmd = IndelCaller(bamToCallFrom, indelsFileHigh, "0.3")
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, indelsFileHigh, just_print_commands = OPTIONS.dry, waitID = mergeid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, indelsFileHigh, just_print_commands = OPTIONS.dry, waitID = mergeid)
@ -162,7 +162,7 @@ if __name__ == "__main__":
cmd = IndelCaller(bamToCallFrom, indelsFileLow, "0.1") cmd = IndelCaller(bamToCallFrom, indelsFileLow, "0.1")
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, indelsFileLow, just_print_commands = OPTIONS.dry, waitID = mergeid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, indelsFileLow, just_print_commands = OPTIONS.dry, waitID = mergeid)
snpsFile = outputFile(snp_output, tech, "calls") snpsFile = outputFileTech(snp_output, tech, "calls")
cmd = SnpCaller(bamToCallFrom, snpsFile) cmd = SnpCaller(bamToCallFrom, snpsFile)
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, snpsFile, just_print_commands = OPTIONS.dry, waitID = jobid) # wait on the low indel calls jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, snpsFile, just_print_commands = OPTIONS.dry, waitID = jobid) # wait on the low indel calls
@ -176,13 +176,13 @@ if __name__ == "__main__":
def SnpCaller(bams, outputFile): def SnpCaller(bams, outputFile):
return config.gatkCmd('SingleSampleGenotyper') + " -o " + outputFile + " ".join(map( lambda x: " -I " + x, bams )) return config.gatkCmd('SingleSampleGenotyper') + " -o " + outputFile + " ".join(map( lambda x: " -I " + x, bams ))
def VarFiltration(bams, outputHead, snpcalls, badsnps, indelcalls, depth, mq): def VarFiltration(bams, outputHead, snpcalls, badsnps, indelcalls, depth, mq):
return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -B variant,Variants," + snpcalls + ",cleaned,CleanedOutSnp," + badsnps + ",indels,SimpleIndel," + indelcalls + " -X DepthOfCoverage:" + depth + " -X MappingQualityZero:" + mq + " ".join(map( lambda x: " -I " + x, bams )) return config.gatkCmd('VariantFiltration') + " -VOH " + outputHead + " -B variant,Variants," + snpcalls + ",cleaned,CleanedOutSnp," + badsnps + ",indels,SimpleIndel," + indelcalls + " -X DepthOfCoverage:max=" + depth + " -X MappingQualityZero:max=" + mq + " ".join(map( lambda x: " -I " + x, bams ))
# #
# HOW DO I MAKE THESE JOBS DEPEND ON THE MERGE IDS OF THE INDIVIDUAL SAMPLES??? # HOW DO I MAKE THESE JOBS DEPEND ON THE MERGE IDS OF THE INDIVIDUAL SAMPLES???
# (Or until everything else is done?) # (Or until everything else is done?)
# #
solid454SnpsFile = outputFile(snp_output, "454-SOLID", "calls") solid454SnpsFile = outputFileTech(snp_output, "454-SOLID", "calls")
cmd = SnpCaller([finalBam("SOLID"),finalBam("454")], solid454SnpsFile) cmd = SnpCaller([finalBam("SOLID"),finalBam("454")], solid454SnpsFile)
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, solid454SnpsFile, just_print_commands = OPTIONS.dry, waitID = allMergeIds) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, solid454SnpsFile, just_print_commands = OPTIONS.dry, waitID = allMergeIds)
@ -190,7 +190,7 @@ if __name__ == "__main__":
cmd = VarFiltration([finalBam("SOLID"),finalBam("454")], solid454VarFiltFile, solid454SnpsFile, badSnps("SOLID"), indelsForFiltering("SOLID"), str(DoC), str(mappingQuality)) cmd = VarFiltration([finalBam("SOLID"),finalBam("454")], solid454VarFiltFile, solid454SnpsFile, badSnps("SOLID"), indelsForFiltering("SOLID"), str(DoC), str(mappingQuality))
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, allVarFiltFile, just_print_commands = OPTIONS.dry, waitID = jobid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, allVarFiltFile, just_print_commands = OPTIONS.dry, waitID = jobid)
allSnpsFile = outputFile(snp_output, "allTechs", "calls") allSnpsFile = outputFileTech(snp_output, "allTechs", "calls")
cmd = SnpCaller([finalBam("SLX"),finalBam("SOLID"),finalBam("454")], solid454SnpsFile) cmd = SnpCaller([finalBam("SLX"),finalBam("SOLID"),finalBam("454")], solid454SnpsFile)
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, allSnpsFile, just_print_commands = OPTIONS.dry, waitID = allMergeIds) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, allSnpsFile, just_print_commands = OPTIONS.dry, waitID = allMergeIds)
allVarFiltFile = os.path.join(filter_output, "%s.allTechs" % ( sample )) allVarFiltFile = os.path.join(filter_output, "%s.allTechs" % ( sample ))