Better validation scripts
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@458 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
01be8f09e3
commit
e842b543c9
|
|
@ -32,6 +32,11 @@ def main():
|
||||||
parser.add_option("-a", "--rebuildAllFiles", dest="rebuildAllFiles",
|
parser.add_option("-a", "--rebuildAllFiles", dest="rebuildAllFiles",
|
||||||
action='store_true', default=False,
|
action='store_true', default=False,
|
||||||
help="If provided, all intermediate files (BAM and pileups) will be regenerated")
|
help="If provided, all intermediate files (BAM and pileups) will be regenerated")
|
||||||
|
parser.add_option("-p", "--justPrint", dest="justPrint",
|
||||||
|
action='store_true', default=False,
|
||||||
|
help="Don't actually run GATK, just setup data files")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(OPTIONS, args) = parser.parse_args()
|
(OPTIONS, args) = parser.parse_args()
|
||||||
if len(args) != 0:
|
if len(args) != 0:
|
||||||
|
|
@ -69,7 +74,7 @@ def main():
|
||||||
filebase = os.path.splitext(read_filename)[0]
|
filebase = os.path.splitext(read_filename)[0]
|
||||||
|
|
||||||
reads = os.path.join(OPTIONS.dataDir, read_filename)
|
reads = os.path.join(OPTIONS.dataDir, read_filename)
|
||||||
readsIndex = os.path.join(OPTIONS.dataDir, filebase + '.selected.bam.bai')
|
readsIndex = reads + '.bai'
|
||||||
subBAM = os.path.join(OPTIONS.dataDir, filebase + '.selected.bam')
|
subBAM = os.path.join(OPTIONS.dataDir, filebase + '.selected.bam')
|
||||||
pileup = os.path.join(OPTIONS.dataDir, filebase + '.selected.pileup')
|
pileup = os.path.join(OPTIONS.dataDir, filebase + '.selected.pileup')
|
||||||
validationOutput = os.path.join(OPTIONS.outputDir, filebase + '.validate.output')
|
validationOutput = os.path.join(OPTIONS.outputDir, filebase + '.validate.output')
|
||||||
|
|
@ -88,6 +93,7 @@ def main():
|
||||||
else:
|
else:
|
||||||
cmd = "samtools view -b " + reads + " " + region + " > " + subBAM
|
cmd = "samtools view -b " + reads + " " + region + " > " + subBAM
|
||||||
farm_commands.cmd(cmd, None, None)
|
farm_commands.cmd(cmd, None, None)
|
||||||
|
indexBAM(subBAM)
|
||||||
|
|
||||||
if not os.path.exists(pileup) or OPTIONS.rebuildAllFiles:
|
if not os.path.exists(pileup) or OPTIONS.rebuildAllFiles:
|
||||||
cmd = "samtools pileup -cf " + ref + " " + subBAM + " > " + pileup
|
cmd = "samtools pileup -cf " + ref + " " + subBAM + " > " + pileup
|
||||||
|
|
@ -97,7 +103,7 @@ def main():
|
||||||
analysis = "ValidatingPileup"
|
analysis = "ValidatingPileup"
|
||||||
cmd = "java -ea -Xmx1024m -jar ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar -T " + analysis + " -I " + subBAM + " -R " + ref + " -l INFO -S SILENT -U -B pileup SAMPileup " + pileup
|
cmd = "java -ea -Xmx1024m -jar ~/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar -T " + analysis + " -I " + subBAM + " -R " + ref + " -l INFO -S SILENT -U -B pileup SAMPileup " + pileup
|
||||||
print cmd
|
print cmd
|
||||||
farm_commands.cmd(cmd, OPTIONS.farmQueue, outputFile=validationOutput)
|
farm_commands.cmd(cmd, OPTIONS.farmQueue, outputFile=validationOutput, just_print_commands=OPTIONS.justPrint)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,12 @@
|
||||||
/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||||
|
|
||||||
# anthony PCR lane
|
# anthony PCR lane
|
||||||
/seq/dirseq/aphilipp/combo/sequences/pcr/samfiles/10035.5.clean.sam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
# samtools import /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.bam.ref_list /seq/dirseq/aphilipp/combo/sequences/pcr/samfiles/10035.5.clean.sam 10035.5.clean.bam
|
||||||
|
/humgen/gsa-scr1/GATK_Data/Validation_Data/10035.5.clean.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||||
|
|
||||||
# anthony HS lane
|
# anthony HS lane
|
||||||
/seq/dirseq/aphilipp/combo/sequences/hs/samfiles/30CLA.5.clean.sam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
# samtools import /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.bam.ref_list /seq/dirseq/aphilipp/combo/sequences/hs/samfiles/30CLA.5.clean.sam 30CLA.5.clean.bam
|
||||||
|
/humgen/gsa-scr1/GATK_Data/Validation_Data/30CLA.5.clean.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||||
|
|
||||||
# WGS tumor -- figure it out
|
# WGS tumor -- figure it out
|
||||||
#/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
#/broad/1KG/pilot3/sams/NA12892.bam /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta *
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue