Updated SNP calling power from coverage tools to work with new UnifiedGenotyper and DepthOfCoverage tools.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2378 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f5e547ed6e
commit
4e7e0432a2
|
|
@ -221,7 +221,7 @@ def stats_from_hist(options, depth_hist_filename, stats_filename, variant_eval_d
|
||||||
|
|
||||||
|
|
||||||
hist = []
|
hist = []
|
||||||
hist_gen = FlatFileTable.record_generator(depth_hist_filename, sep=" ", skip_n_lines=9)
|
hist_gen = FlatFileTable.record_generator(depth_hist_filename, sep=" ", skip_until_regex_line = "^depth count freq")
|
||||||
for index, record in enumerate(hist_gen):
|
for index, record in enumerate(hist_gen):
|
||||||
assert int(record["depth"]) == index
|
assert int(record["depth"]) == index
|
||||||
hist.append(int(record["count"]))
|
hist.append(int(record["count"]))
|
||||||
|
|
|
||||||
|
|
@ -44,9 +44,13 @@ def process_bam(bam_file):
|
||||||
sys.exit("Expected to find "+hapmapchip_file+" but it's not around")
|
sys.exit("Expected to find "+hapmapchip_file+" but it's not around")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Config for pilot 3
|
# Config for pilot 3 Broad data
|
||||||
ref = " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
|
#ref = " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
|
||||||
intervals_file = "/seq/references/HybSelOligos/thousand_genomes_alpha_redesign/thousand_genomes_alpha_redesign.targets.interval_list"
|
#intervals_file = "/seq/references/HybSelOligos/thousand_genomes_alpha_redesign/thousand_genomes_alpha_redesign.targets.interval_list"
|
||||||
|
|
||||||
|
# Config for pilot 3 DCC data (12/16/09 r. 2375)
|
||||||
|
ref = " -R /broad/1KG/reference/human_b36_both.fasta"
|
||||||
|
intervals_file = "/humgen/gsa-scr1/GATK_Data/thousand_genomes_alpha_redesign.targets.b36.interval_list"
|
||||||
|
|
||||||
# Assure that BAM index exists
|
# Assure that BAM index exists
|
||||||
bam_index_file = output_basepath+".bam.bai"
|
bam_index_file = output_basepath+".bam.bai"
|
||||||
|
|
@ -54,13 +58,13 @@ def process_bam(bam_file):
|
||||||
Cmd = "samtools index "+bam_file
|
Cmd = "samtools index "+bam_file
|
||||||
cmd(Cmd, die_on_fail = True)
|
cmd(Cmd, die_on_fail = True)
|
||||||
|
|
||||||
# Run SSG calls
|
# Run UnifiedGenotyper calls
|
||||||
ssg_outfile = output_basepath+".SSG.calls"
|
vcf_outfile = output_basepath+".calls.vcf"
|
||||||
if not exists_and_non_zero(ssg_outfile):
|
if not exists_and_non_zero(vcf_outfile):
|
||||||
print "Running SingleSampleGenotyper"
|
print "Running UnifiedGenotyper"
|
||||||
Cmd = gatk_exe + ref + " -T SingleSampleGenotyper"+\
|
Cmd = gatk_exe + ref + " -T UnifiedGenotyper"+\
|
||||||
" -I "+bam_file+\
|
" -I "+bam_file+\
|
||||||
" -varout "+ssg_outfile+\
|
" -varout "+vcf_outfile+\
|
||||||
" -L "+intervals_file+\
|
" -L "+intervals_file+\
|
||||||
" -fmq0 -l INFO "
|
" -fmq0 -l INFO "
|
||||||
#" --genotype -lod 5"
|
#" --genotype -lod 5"
|
||||||
|
|
@ -68,22 +72,18 @@ def process_bam(bam_file):
|
||||||
#+interval_string+\
|
#+interval_string+\
|
||||||
cmd(Cmd, die_on_fail=True)
|
cmd(Cmd, die_on_fail=True)
|
||||||
|
|
||||||
# Run VariantEval on SSG calls
|
# Run VariantEval on UnifiedGenotyper calls
|
||||||
varianteval_outdir = output_basepath+".VariantEval"
|
varianteval_outfile = output_basepath+".varianteval"
|
||||||
varianteval_outbase = output_basepath+".VariantEval/"+output_basepath
|
|
||||||
varianteval_outfile = output_basepath+".VariantEval/"+output_basepath #+".all.genotype_concordance"
|
|
||||||
|
|
||||||
if not exists_and_non_zero(varianteval_outfile):
|
if not exists_and_non_zero(varianteval_outfile):
|
||||||
print "Running VariantEval with output to "+varianteval_outbase
|
print "Running VariantEval with output to "+varianteval_outfile
|
||||||
if not os.path.exists(varianteval_outdir):
|
|
||||||
os.mkdir(varianteval_outdir)
|
|
||||||
Cmd = gatk_exe + ref + " -T VariantEval"+\
|
Cmd = gatk_exe + ref + " -T VariantEval"+\
|
||||||
" -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod"+\
|
" -B dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod"+\
|
||||||
" -B eval,Variants,"+ssg_outfile+\
|
" -B eval,VCF,"+ssg_outfile+\
|
||||||
" -o "+varianteval_outbase+\
|
" -o "+varianteval_outfile+\
|
||||||
" -L "+intervals_file+\
|
" -L "+intervals_file+\
|
||||||
|
" -G -V"+\
|
||||||
" --evalContainsGenotypes"+\
|
" --evalContainsGenotypes"+\
|
||||||
" -minConfidenceScore 5"+\
|
" -minPhredConfidenceScore 150"+\
|
||||||
" -fmq0 -l INFO"
|
" -fmq0 -l INFO"
|
||||||
if using_hapmapchip:
|
if using_hapmapchip:
|
||||||
Cmd = Cmd + " -hc "+hapmapchip_file
|
Cmd = Cmd + " -hc "+hapmapchip_file
|
||||||
|
|
@ -94,10 +94,11 @@ def process_bam(bam_file):
|
||||||
hist_outfile = output_basepath+".hapmap.coverage_hist"
|
hist_outfile = output_basepath+".hapmap.coverage_hist"
|
||||||
if not exists_and_non_zero(hist_outfile):
|
if not exists_and_non_zero(hist_outfile):
|
||||||
print "Running CoverageHist on "+bam_file
|
print "Running CoverageHist on "+bam_file
|
||||||
Cmd = gatk_exe + ref + " -T CoverageHistogram"+\
|
Cmd = gatk_exe + ref + " -T DepthOfCoverage"+\
|
||||||
" -I "+bam_file+\
|
" -I "+bam_file+\
|
||||||
" -o "+hist_outfile+\
|
" -o "+hist_outfile+\
|
||||||
" -L "+intervals_file+\
|
" -L "+intervals_file+\
|
||||||
|
" -histogram --suppressLocusPrinting"+\
|
||||||
" -fmq0 -l INFO"
|
" -fmq0 -l INFO"
|
||||||
|
|
||||||
cmd(Cmd, die_on_fail=True)#,"gsa", output_basepath)
|
cmd(Cmd, die_on_fail=True)#,"gsa", output_basepath)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue