Fixed potential bug with next() operation returning empty contexts when a read contains a large deletion. We can now use the look ahead safely...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@439 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7261787b71
commit
2eabcfedb7
|
|
@ -0,0 +1,103 @@
|
|||
import farm_commands
|
||||
import os.path
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
defaultCommands = ['CountReads', 'Pileup']
|
||||
|
||||
def indexBAM(reads):
|
||||
cmd = "samtools index " + reads
|
||||
farm_commands.cmd(cmd, None, None)
|
||||
|
||||
def main():
|
||||
global OPTIONS, ROOT
|
||||
|
||||
usage = "usage: %prog [options]"
|
||||
parser = OptionParser(usage=usage)
|
||||
parser.add_option("-f", "--file", dest="validateInfoFile",
|
||||
type="string", default=None,
|
||||
help="File name of the validation set")
|
||||
parser.add_option("-d", "--dataDir", dest="dataDir",
|
||||
type="string", default=None,
|
||||
help="Directory to the data files")
|
||||
parser.add_option("-o", "--outputDir", dest="outputDir",
|
||||
type="string", default=None,
|
||||
help="Directory to put the output")
|
||||
parser.add_option("-q", "--farmQueue", dest="farmQueue",
|
||||
type="string", default=None,
|
||||
help="Farm queue to submit jobs to. Leave blank for local processing")
|
||||
parser.add_option("-e", "--ignoreExistingFiles", dest="ignoreExistingFiles",
|
||||
action='store_true', default=True,
|
||||
help="Ignores the existing files")
|
||||
parser.add_option("-a", "--rebuildAllFiles", dest="rebuildAllFiles",
|
||||
action='store_true', default=False,
|
||||
help="If provided, all intermediate files (BAM and pileups) will be regenerated")
|
||||
|
||||
(OPTIONS, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if OPTIONS.validateInfoFile == None:
|
||||
parser.error("f option is required")
|
||||
if OPTIONS.dataDir == None:
|
||||
parser.error("d option is required")
|
||||
if OPTIONS.outputDir == None:
|
||||
parser.error("o option is required")
|
||||
|
||||
if not os.path.exists(OPTIONS.outputDir):
|
||||
os.mkdir(OPTIONS.outputDir)
|
||||
if not os.path.exists(OPTIONS.dataDir):
|
||||
os.mkdir(OPTIONS.dataDir)
|
||||
|
||||
for line in open(OPTIONS.validateInfoFile):
|
||||
if line.strip() == "" or line[0] == '#':
|
||||
# comment line
|
||||
continue
|
||||
|
||||
# line is of the format:
|
||||
# <reads.bam> <ref.fasta> <processingRegion>
|
||||
originalReads, ref, region = line.split()
|
||||
|
||||
originalReads = os.path.expanduser(originalReads)
|
||||
ref = os.path.expanduser(ref)
|
||||
|
||||
if not os.path.exists(originalReads) or not os.path.exists(ref):
|
||||
print 'Input files do not exist!', originalReads, ref
|
||||
sys.exit(1)
|
||||
|
||||
head, read_filename = os.path.split(originalReads)
|
||||
filebase = os.path.splitext(read_filename)[0]
|
||||
|
||||
reads = os.path.join(OPTIONS.dataDir, read_filename)
|
||||
readsIndex = os.path.join(OPTIONS.dataDir, filebase + '.selected.bam.bai')
|
||||
subBAM = os.path.join(OPTIONS.dataDir, filebase + '.selected.bam')
|
||||
pileup = os.path.join(OPTIONS.dataDir, filebase + '.selected.pileup')
|
||||
validationOutput = os.path.join(OPTIONS.outputDir, filebase + '.validate.output')
|
||||
|
||||
#print 'reads', reads
|
||||
if not os.path.exists(reads) or OPTIONS.rebuildAllFiles:
|
||||
farm_commands.cmd("ln -s " + os.path.abspath(originalReads) + " " + reads)
|
||||
|
||||
if not os.path.exists(readsIndex) or OPTIONS.rebuildAllFiles:
|
||||
indexBAM(reads)
|
||||
|
||||
if not os.path.exists(subBAM) or OPTIONS.rebuildAllFiles:
|
||||
if region == '*':
|
||||
farm_commands.cmd("ln -s " + os.path.abspath(reads) + " " + subBAM)
|
||||
farm_commands.cmd("ln -s " + os.path.abspath(readsIndex) + " " + subBAM+'.bai')
|
||||
else:
|
||||
cmd = "samtools view -b " + reads + " " + region + " > " + subBAM
|
||||
farm_commands.cmd(cmd, None, None)
|
||||
|
||||
if not os.path.exists(pileup) or OPTIONS.rebuildAllFiles:
|
||||
cmd = "samtools pileup -cf " + ref + " " + subBAM + " > " + pileup
|
||||
farm_commands.cmd(cmd, None, None)
|
||||
|
||||
if not os.path.exists(validationOutput) or OPTIONS.ignoreExistingFiles:
|
||||
analysis = "ValidatingPileup"
|
||||
cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar -T " + analysis + " -I " + subBAM + " -R " + ref + " -l INFO -S SILENT -U -B pileup SAMPileup " + pileup
|
||||
print cmd
|
||||
farm_commands.cmd(cmd, OPTIONS.farmQueue, outputFile=validationOutput)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -3,11 +3,14 @@
|
|||
import os
|
||||
#justPrintCommands = False
|
||||
|
||||
def cmd(cmd_str, farm_queue=False, output_head="", just_print_commands=False):
|
||||
def cmd(cmd_str, farm_queue=False, output_head="", just_print_commands=False, outputFile = None):
|
||||
# if farm_queue is non-False, submits to queue, other
|
||||
|
||||
if farm_queue:
|
||||
farm_stdout = output_head+".stdout"
|
||||
if outputFile <> None:
|
||||
farm_stdout = outputFile
|
||||
else:
|
||||
farm_stdout = output_head+".stdout"
|
||||
cmd_str = "bsub -q "+farm_queue+" -o "+farm_stdout+" "+cmd_str #+" TMP_DIR=/wga/scr1/andrewk/tmp"
|
||||
print ">>> Farming via "+cmd_str
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
# e.coli
|
||||
/humgen/gsa-scr1/GATK_Data/Validation_Data/MV1994.bam /humgen/gsa-scr1/GATK_Data/Validation_Data/Escherichia_coli_K12_MG1655.fasta *
|
||||
|
||||
# daughter chrom 6 deep coverage
|
||||
/broad/1KG/DCC/data/NA12878/alignment/NA12878.chrom6.SRP000032.2009_02.bam /broad/1KG/reference/human_b36_both.fasta 6:1-10,000,00
|
||||
|
||||
# daughter chrom 1 deep coverage
|
||||
/broad/1KG/DCC/data/NA12878/alignment/NA12878.chrom1.SRP000032.2009_02.bam /broad/1KG/reference/human_b36_female.fasta *
|
||||
|
||||
# mother of trio individual low coverage
|
||||
/broad/1KG/DCC/data/NA12892/alignment/NA12892.SRP000031.2009_02.bam /broad/1KG/reference/human_b36_female.fasta *
|
||||
|
||||
# Pilot 3
|
||||
/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||
|
||||
# anthony PCR lane
|
||||
/seq/dirseq/aphilipp/combo/sequences/pcr/samfiles/10035.5.clean.sam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||
|
||||
# anthony PCR lane
|
||||
/seq/dirseq/aphilipp/combo/sequences/hs/samfiles/30CLA.5.clean.sam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||
|
||||
# WGS tumor
|
||||
#/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||
|
||||
Loading…
Reference in New Issue