From d9b12b468f7455b34e9d458f618211c643b4bd07 Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 15 Mar 2010 10:05:46 +0000 Subject: [PATCH] Adding default filter info git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3000 348d0f76-0448-11de-a6fe-93d51630548a --- python/getBamFilesFromSpreadsheet.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/getBamFilesFromSpreadsheet.py b/python/getBamFilesFromSpreadsheet.py index 17ade7b7d..0627a6356 100755 --- a/python/getBamFilesFromSpreadsheet.py +++ b/python/getBamFilesFromSpreadsheet.py @@ -10,18 +10,19 @@ hg18_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_hg18.rod" b36_dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_130_b36.rod" b36_reference = "/broad/1KG/reference/human_b36_both.fasta" hg18_intervals = "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list" +#hg18_intervals = "/humgen/gsa-hpprojects/FHS/indexed/interval_lists/fhs_jhs_pilot.targets.interval_list" b36_intervals = "" min_base_q = "10" min_map_q = "10" max_reads = "1000000" -min_conf = "0" - +min_conf = "50" +variant_expression = "QUAL <= 50.0 || AB > 0.75 || QD < 5.0 || HRun > 3" spreadsheetPath = sys.argv[3] projectName = sys.argv[2] groupName = sys.argv[1] reference = sys.argv[4] - +filter_name = projectName+"_Initial_Filter" if ( reference != "hg18" and reference != "b36" ): raise ValueError("Illegal reference type") elif ( reference == "hg18" ): @@ -36,7 +37,7 @@ else: fpref = "human_b36" outputFile = projectName+"_bam_files.txt" -OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality"] +OUTPUT_HEADER = ["sample_id","recalibrated_bam_file","individual_id","fingerprint_file","reference_file","dbsnp_file","interval_list","max_reads_at_locus","min_confidence","min_mapping_quality","min_base_quality","variant_filter_expression","variant_filter_name"] if ( spreadsheetPath.find("/") > -1 ): newSpreadsheet = spreadsheetPath.rsplit("/",1)[1].rsplit(".",1)[0]+"_proper_format.tsv" @@ -74,9 +75,9 @@ for line in project_info.readlines(): version = getNewestVersion(versioningDirectory) bamfile = versioningDirectory+version+"/"+spline[sample_index]+".bam" fingerprint_path = fingerprint_base+spline[project_index]+"/"+fpref+"/" - if ( spline[sample_index]+".fingerprint.geli" in os.listdir(fingerprint_path) ): + if ( os.path.isdir(fingerprint_path) and spline[sample_index]+".fingerprint.geli" in os.listdir(fingerprint_path) ): fingerprint_file = fingerprint_path+spline[sample_index]+".fingerprint.geli" else: fingerprint_file = "" if ( spline[status_index] == "Complete" ): - outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\n") + outputFile.write(projectName+"_"+spline[sample_index]+"\t"+bamfile+"\t"+groupName+"\t"+fingerprint_file+"\t"+reference+"\t"+dbsnp+"\t"+intervals+"\t"+max_reads+"\t"+min_conf+"\t"+min_map_q+"\t"+min_base_q+"\t"+variant_expression+"\t"+filter_name+"\n")