From afccbc44ece4927ebf5fb615a74aece82d637701 Mon Sep 17 00:00:00 2001 From: andrewk Date: Fri, 31 Jul 2009 00:22:53 +0000 Subject: [PATCH] Script that performs all the processing steps from raw Illumina reads through to analysis of barcoding and hybrid selection efficience as documented in the GATK tutorial; can automatically run all steps in series on the farm. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1354 348d0f76-0448-11de-a6fe-93d51630548a --- python/BarcodeAnalysis.py | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100755 python/BarcodeAnalysis.py diff --git a/python/BarcodeAnalysis.py b/python/BarcodeAnalysis.py new file mode 100755 index 000000000..52cc634b9 --- /dev/null +++ b/python/BarcodeAnalysis.py @@ -0,0 +1,41 @@ +#!/usr/bin/python + +import os,sys +from farm_commands import cmd + +def get_line_number_from_file(filename, line_num): + for index, line in enumerate(open(filename)): + if index == line_num: + return line + +solexa_dir = "." + +barcodes = [] +barcode_file = open("../barcode_sequences.txt") +barcode_file.readline() +for line in barcode_file: + fields = line.split() + barcodes.append(fields[1]) + +farm = "long" +lane = 6 +alias = "lane"+str(lane) +tmp_dir = "/humgen/gsa-scr1/andrewk/tmp/" + +cmd("/seq/software/picard/current/bin/ExtractIlluminaBarcodes.jar B="+solexa_dir+" L="+str(lane)+" BARCODE_POSITION=1 METRICS_FILE=metrics.out BARCODE=" + " BARCODE=".join(barcodes)) + +cmd( "/seq/software/picard/current/bin/BustardToSam.jar B="+solexa_dir+" L="+str(lane)+" O="+alias+".bam SAMPLE_ALIAS="+alias+" RUN_BARCODE="+alias+" TMP_DIR="+tmp_dir)#, farm_queue=farm) #PAIRED_RUN=True + +for barcode in barcodes: + id1 = cmd( "/seq/software/picard/current/bin/BustardToSam.jar B="+solexa_dir+" L="+str(lane)+" BARCODE_POSITION=1 BARCODE="+barcode+" O="+barcode+".bam RUN_BARCODE="+barcode+" SAMPLE_ALIAS=lane5 TMP_DIR="+tmp_dir, farm_queue=farm) #PAIRED_RUN=True + + id2 = cmd( "/home/radon01/andrewk/dev/Sting/trunk/python/AlignBam.py "+barcode+".bam", farm_queue=farm, waitID = id1) + id3 = cmd("/seq/software/picard/1.21/bin/MergeSamFiles.jar I="+barcode+".aligned.bam O="+barcode+".aligned.merged.bam VALIDATION_STRINGENCY=SILENT", farm_queue=farm, waitID = id2) + id4 = cmd("java -Xmx4096m -jar /seq/software/picard/1.21/bin/MarkDuplicates.jar VALIDATION_STRINGENCY=SILENT I= "+barcode+".aligned.merged.bam O="+barcode+".aligned.duplicates_marked.bam M="+barcode+".aligned.bam.duplicates_marked.bam.duplicate_metrics TMP_DIR="+tmp_dir, farm_queue=farm), waitID=id3) + cmd("java -jar /seq/software/picard/current/bin/CalculateHsMetrics.jar BI=/seq/references/HybSelOligos/thousand_genomes_alpha_redesign/thousand_genomes_alpha_redesign.baits.interval_list TI=/seq/references/HybSelOligos/thousand_genomes_alpha_redesign/thousand_genomes_alpha_redesign.targets.interval_list I="+barcode+".aligned.duplicates_marked.bam M="+barcode+".hs_metrics VALIDATION_STRINGENCY=SILENT", farm_queue=farm, waitID = id4) + + +fout = open("hybrid_selection.metrics", "w") +print >> fout, "BARCODE\t"+get_line_number_from_file(barcodes[0]+".hs_metrics", 6).rstrip() +for barcode in barcodes: + print >> fout, barcode+"\t"+get_line_number_from_file(barcode+".hs_metrics", 7).rstrip()