misc. useful updates to python library
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3683 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
47c4a70ac1
commit
cf910d9cc2
|
|
@ -0,0 +1,254 @@
|
||||||
|
# import farm_commands2
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
from optparse import OptionParser
|
||||||
|
import glob
|
||||||
|
import operator
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
import vcfReader
|
||||||
|
import string
|
||||||
|
|
||||||
|
def average(l):
|
||||||
|
sum = reduce(operator.add, l, 0)
|
||||||
|
return sum / len(l)
|
||||||
|
|
||||||
|
def printHeaderSep():
|
||||||
|
print
|
||||||
|
print ''.join(['-'] * 80)
|
||||||
|
|
||||||
|
class Sample:
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.rawBases = 0
|
||||||
|
self.mappedBases = 0
|
||||||
|
self.nSNPs = 0
|
||||||
|
self.nIndels = 0
|
||||||
|
|
||||||
|
def getName(self): return self.name
|
||||||
|
def getNSNPs(self): return self.nSNPs
|
||||||
|
def getNIndels(self): return self.nIndels
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '[%s rawBases=%d mappedBases=%d percentMapped=%.2f nSNPs=%d nIndels=%d]' % (self.name, self.rawBases, self.mappedBases, (self.mappedBases * 100.0) / max(self.rawBases,1), self.nSNPs, self.nIndels)
|
||||||
|
__repr__ = __str__
|
||||||
|
|
||||||
|
def flatFileIterator(file, fields = None, skip = 0):
|
||||||
|
count = 0
|
||||||
|
for line in open(file):
|
||||||
|
count += 1
|
||||||
|
if count > skip:
|
||||||
|
s = map(string.strip, line.split('\t'))
|
||||||
|
if ( fields != None ):
|
||||||
|
s = map(lambda field: s[field], fields)
|
||||||
|
|
||||||
|
if len(s) == 1: s = s[0]
|
||||||
|
yield s
|
||||||
|
|
||||||
|
# 1. FASTQ_FILE, path to fastq file on ftp site
|
||||||
|
# 2. MD5, md5sum of file
|
||||||
|
# 3. RUN_ID, SRA/ERA run accession
|
||||||
|
# 4. STUDY_ID, SRA/ERA study accession
|
||||||
|
# 5. STUDY_NAME, Name of stury
|
||||||
|
# 6. CENTER_NAME, Submission centre name
|
||||||
|
# 7. SUBMISSION_ID, SRA/ERA submission accession
|
||||||
|
# 8. SUBMISSION_DATE, Date sequence submitted, YYYY-MM-DAY
|
||||||
|
# 9. SAMPLE_ID, SRA/ERA sample accession
|
||||||
|
# 10. SAMPLE_NAME, Sample name
|
||||||
|
# 11. POPULATION, Sample population
|
||||||
|
# 12. EXPERIMENT_ID, Experiment accession
|
||||||
|
# 13. INSTRUMENT_PLATFORM, Type of sequencing machine
|
||||||
|
# 14. INSTRUMENT_MODEL, Model of sequencing machine
|
||||||
|
# 15. LIBRARY_NAME, Library name
|
||||||
|
# 16. RUN_NAME, Name of machine run
|
||||||
|
# 17. RUN_BLOCK_NAME, Name of machine run sector
|
||||||
|
# 18. INSERT_SIZE, Submitter specifed insert size
|
||||||
|
# 19. LIBRARY_LAYOUT, Library layout, this can be either PAIRED or SINGLE
|
||||||
|
# 20. PAIRED_FASTQ, Name of mate pair file if exists (Runs with failed mates will have
|
||||||
|
# a library layout of PAIRED but no paired fastq file)
|
||||||
|
# 21. WITHDRAWN, 0/1 to indicate if the file has been withdrawn, only present if a file has been withdrawn
|
||||||
|
# 22. WITHDRAWN_DATE, date of withdrawal, this should only be defined if a file is
|
||||||
|
# withdrawn
|
||||||
|
# 23. COMMENT, comment about reason for withdrawal
|
||||||
|
# 24. READ_COUNT, read count for the file
|
||||||
|
# 25. BASE_COUNT, basepair count for the file
|
||||||
|
def countBases(samples, seqIndex):
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for project, sampleID, withdrawnP, bases in flatFileIterator(seqIndex, [3,9,20,24]):
|
||||||
|
if ( withdrawnP == "0" and useProject(project) and sampleID in samples ):
|
||||||
|
if OPTIONS.verbose: print project, sampleID, withdrawnP, bases
|
||||||
|
sample = samples[sampleID]
|
||||||
|
sample.rawBases += int(bases)
|
||||||
|
total += int(bases)
|
||||||
|
|
||||||
|
printStatus(samples)
|
||||||
|
print 'Total raw bases', total
|
||||||
|
return total
|
||||||
|
|
||||||
|
def printStatus(samples):
|
||||||
|
for sample in samples.itervalues():
|
||||||
|
print sample
|
||||||
|
|
||||||
|
def findVariantEvalResults(key, file, type=str):
|
||||||
|
def capture1(line):
|
||||||
|
if key in line:
|
||||||
|
s = line.split()
|
||||||
|
return type(s[len(s)-1])
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return [val for val in map(capture1, open(file)) if val != None]
|
||||||
|
|
||||||
|
|
||||||
|
def getDBSNPRate(file):
|
||||||
|
if file != None:
|
||||||
|
key = "[evaluation_name=eval].[comparison_name=dbsnp].[jexl_expression=none].[filter_name=called].[novelty_name=all].[analysis=Comp Overlap].[data_point=% evals at comp]"
|
||||||
|
return findVariantEvalResults(key, file, float)[0]
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def useProject(project):
|
||||||
|
return OPTIONS.project == None or project == OPTIONS.project
|
||||||
|
|
||||||
|
def countMappedBases(samples, alignmentIndex):
|
||||||
|
if ( OPTIONS.coverageFile != None ):
|
||||||
|
# read from summary file, looking for the line:
|
||||||
|
# Total 340710 1187.14 N/A N/A N/A
|
||||||
|
for parts in map( string.split, open(OPTIONS.coverageFile) ):
|
||||||
|
if parts[0] == "Total":
|
||||||
|
return -1, int(parts[1])
|
||||||
|
else:
|
||||||
|
return readMappedBasesFromBAS(samples, alignmentIndex)
|
||||||
|
|
||||||
|
def readMappedBasesFromBAS(samples, alignmentIndex):
|
||||||
|
totalBases = 0
|
||||||
|
totalMapped = 0
|
||||||
|
|
||||||
|
for project, sampleID, basFile in flatFileIterator(alignmentIndex, [2,3,6]):
|
||||||
|
#print project, sampleID, basFile
|
||||||
|
if ( useProject(project) and sampleID in samples ):
|
||||||
|
if OPTIONS.verbose: print project, sampleID, basFile
|
||||||
|
sample = samples[sampleID]
|
||||||
|
|
||||||
|
for rawBases, mappedBases in flatFileIterator(os.path.join(OPTIONS.root, basFile), [7, 8], skip=1):
|
||||||
|
#print ' ->', rawBases, mappedBases
|
||||||
|
if OPTIONS.rawBasesFromBas:
|
||||||
|
sample.rawBases += int(rawBases)
|
||||||
|
totalBases += int(rawBases)
|
||||||
|
sample.mappedBases += int(mappedBases)
|
||||||
|
totalMapped += int(mappedBases)
|
||||||
|
#print ' totals', totalBases, totalMapped
|
||||||
|
|
||||||
|
printStatus(samples)
|
||||||
|
print 'Total raw bases', totalBases
|
||||||
|
print 'Total mapped bases', totalMapped
|
||||||
|
|
||||||
|
return totalBases, totalMapped
|
||||||
|
|
||||||
|
def countSNPs(samples, snpsVCF, useIndels = False):
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
header, columnNames, remainingLines = vcfReader.readVCFHeader(open(snpsVCF))
|
||||||
|
sampleIDs = columnNames[9:]
|
||||||
|
|
||||||
|
for header, vcf, counter in vcfReader.lines2VCF(remainingLines, extendedOutput = True, decodeAll = False):
|
||||||
|
if ( counter > OPTIONS.maxRecords and OPTIONS.maxRecords != -1 ):
|
||||||
|
break
|
||||||
|
if vcf.passesFilters():
|
||||||
|
if ( vcf.isVariant() ):
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
if ( OPTIONS.verbose and total % 10000 == 0 ):
|
||||||
|
print ' progress', vcf.getChrom(), vcf.getPos()
|
||||||
|
|
||||||
|
genotypes = vcf.rest[1:]
|
||||||
|
for sampleID, genotypeField in itertools.izip(sampleIDs, genotypes):
|
||||||
|
#print sampleID, samples
|
||||||
|
if sampleID in samples:
|
||||||
|
genotype = genotypeField.split(':')[0]
|
||||||
|
variant = genotype != "0/0" and genotype != "0|0" and genotype != "0\0" and genotype != "./."
|
||||||
|
#print ' => ', vcf, sampleID, genotype, variant
|
||||||
|
if variant:
|
||||||
|
if ( useIndels ):
|
||||||
|
samples[sampleID].nIndels += 1
|
||||||
|
else:
|
||||||
|
samples[sampleID].nSNPs += 1
|
||||||
|
|
||||||
|
printStatus(samples)
|
||||||
|
return total
|
||||||
|
|
||||||
|
def countIndels(samples, indelsVCF):
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
if ( indelsVCF != None ):
|
||||||
|
return countSNPs(samples, indelsVCF, True)
|
||||||
|
|
||||||
|
return total
|
||||||
|
|
||||||
|
def readSamples(vcf):
|
||||||
|
print 'Reading samples for', OPTIONS.population
|
||||||
|
header, columnNames, remainingLines = vcfReader.readVCFHeader(open(vcf))
|
||||||
|
samples = map(Sample, columnNames[9:])
|
||||||
|
if ( OPTIONS.onlySample != None ):
|
||||||
|
samples = filter( lambda x: x.getName() == OPTIONS.onlySample, samples )
|
||||||
|
|
||||||
|
print 'No. samples: ', len(samples)
|
||||||
|
print 'Samples: ', map(Sample.getName, samples)
|
||||||
|
|
||||||
|
return dict(map( lambda x: (x.getName(), x), samples))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
usage = "usage: %prog"
|
||||||
|
parser = OptionParser(usage=usage)
|
||||||
|
parser.add_option("-a", "--alignmentIndex", dest="alignmentIndex",type='string', default=None, help="1KG formated alignment index file")
|
||||||
|
parser.add_option("-s", "--sequenceIndex", dest="sequenceIndex", type='string', default=None, help="1KG formated sequence index file")
|
||||||
|
parser.add_option("", "--onlySample", dest="onlySample", type='string', default=None, help="If provide, only this sample will be processed")
|
||||||
|
parser.add_option("", "--snps", dest="snps", type='string', default=None, help="SNPs VCF")
|
||||||
|
parser.add_option("", "--snpsEval", dest="snpsVE", type='string', default=None, help="SNPs VCF VariantEval")
|
||||||
|
parser.add_option("", "--indels", dest="indels", type='string', default=None, help="Indels VCF")
|
||||||
|
parser.add_option("", "--indelsEval", dest="indelsVE", type='string', default=None, help="Indels VCF VariantEval")
|
||||||
|
parser.add_option("", "--totalGenome", dest="totalGenome", type='float', default=2.96e9, help="Size, in bp, of the callable genome")
|
||||||
|
parser.add_option("", "--calledGenome", dest="calledGenome", type='float', default=None, help="Size, in bp, of the callable genome")
|
||||||
|
parser.add_option("-p", "--pop", dest="population", type='string', default="Anonymous", help="Population")
|
||||||
|
parser.add_option("", "--project", dest="project", type='string', default=None, help="If provided, will only include fastq/BAM files that match this project in the stats calculations")
|
||||||
|
parser.add_option("-r", "--root", dest="root",type='string', default=".", help="Path to the 1KG data")
|
||||||
|
parser.add_option("-M", "--maxRecords", dest="maxRecords", type='int', default=-1, help="If provided, will only include fastq/BAM files that match this regex in the stats calculations")
|
||||||
|
parser.add_option("-v", "--verbose", dest="verbose", action='store_true', default=False, help="If provided, will be verbose during output")
|
||||||
|
parser.add_option("", "--rawBasesFromBas", dest="rawBasesFromBas", action='store_true', default=False, help="If provided, we'll take our raw base counts from the BAS file")
|
||||||
|
parser.add_option("-o", "--output", dest="output",type='string', default=None, help="Path to the 1KG data")
|
||||||
|
parser.add_option("-c", "--coverageFile", dest="coverageFile",type='string', default=None, help="Path to GATK DoC .sample_summary file")
|
||||||
|
|
||||||
|
(OPTIONS, args) = parser.parse_args()
|
||||||
|
if len(args) != 0:
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
samples = readSamples(OPTIONS.snps)
|
||||||
|
nSamples = len(samples)
|
||||||
|
|
||||||
|
ignore, totalMappedBases = countMappedBases(samples, OPTIONS.alignmentIndex)
|
||||||
|
totalBases = countBases(samples, OPTIONS.sequenceIndex)
|
||||||
|
meanMappedDepth = totalMappedBases / OPTIONS.totalGenome / nSamples
|
||||||
|
totalSNPs = countSNPs(samples, OPTIONS.snps)
|
||||||
|
totalIndels = countIndels(samples, OPTIONS.indels)
|
||||||
|
|
||||||
|
snpNoveltyRate = 100 - getDBSNPRate(OPTIONS.snpsVE)
|
||||||
|
indelNoveltyRate = 100 - getDBSNPRate(OPTIONS.indelsVE)
|
||||||
|
|
||||||
|
out = sys.stdout
|
||||||
|
if ( OPTIONS.output != None ): out = open(OPTIONS.output, 'w')
|
||||||
|
print >> out, 'number of samples', nSamples
|
||||||
|
print >> out, 'total raw bases', totalBases
|
||||||
|
print >> out, 'total mapped bases', totalMappedBases
|
||||||
|
|
||||||
|
# mean mapped depth is total bases mapped divided by acgt reference base count divided by number of individuals, after rmdup: for exons this is calculated on the target region only
|
||||||
|
|
||||||
|
print >> out, 'mean mapped depth', meanMappedDepth
|
||||||
|
|
||||||
|
print >> out, 'bases called (fraction ref genome) %f (%.2f%%)' % (OPTIONS.calledGenome, 100.0 * OPTIONS.calledGenome / OPTIONS.totalGenome)
|
||||||
|
print >> out, 'number of SNP sites (%% novel) %d (%.2f%%)' % (totalSNPs, snpNoveltyRate)
|
||||||
|
print >> out, 'average # SNP sites per individual', average(map(Sample.getNSNPs, samples.itervalues()))
|
||||||
|
print >> out, 'number of indel sites (%% novel) %d (%.2f%%)' % (totalIndels, indelNoveltyRate)
|
||||||
|
print >> out, 'average # indel sites per individual', average(map(Sample.getNIndels, samples.itervalues()))
|
||||||
|
out.close()
|
||||||
|
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
import os.path
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global OPTIONS
|
||||||
|
usage = "usage: %prog [options] files"
|
||||||
|
parser = OptionParser(usage=usage)
|
||||||
|
parser.add_option("-c", "--category", dest="category",
|
||||||
|
type='string', default="Anonymous",
|
||||||
|
help="If provided, catagory name")
|
||||||
|
|
||||||
|
(OPTIONS, args) = parser.parse_args()
|
||||||
|
if len(args) == 0:
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
print '<Category name="%s">' % OPTIONS.category
|
||||||
|
for arg in args:
|
||||||
|
path = os.path.abspath(arg)
|
||||||
|
name = os.path.basename(arg)
|
||||||
|
print ' <Resource name="%s" path="%s" serverURL="http://www.broad.mit.edu/webservices/igv"/>' % (name, path)
|
||||||
|
print '</Category>'
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -19,6 +19,7 @@ ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
|
||||||
analysis = "CombineDuplicates"
|
analysis = "CombineDuplicates"
|
||||||
|
|
||||||
MERGE_BIN = '/seq/software/picard/current/bin/MergeSamFiles.jar'
|
MERGE_BIN = '/seq/software/picard/current/bin/MergeSamFiles.jar'
|
||||||
|
FIXMATES_BIN = '/seq/software/picard/current/bin/FixMateInformation.jar'
|
||||||
SAMTOOLS_MERGE_BIN = '/seq/dirseq/samtools/current/samtools merge'
|
SAMTOOLS_MERGE_BIN = '/seq/dirseq/samtools/current/samtools merge'
|
||||||
CALL_GENOTYPES_BIN = '/seq/software/picard/current/bin/CallGenotypes.jar'
|
CALL_GENOTYPES_BIN = '/seq/software/picard/current/bin/CallGenotypes.jar'
|
||||||
|
|
||||||
|
|
@ -167,6 +168,12 @@ def mergeBAMCmd( output_filename, inputFiles, mergeBin = MERGE_BIN, MSD = True,
|
||||||
return 'java ' + memLimit + ' -jar ' + mergeBin + ' ' + MSDStr + ' AS=true COMPRESSION_LEVEL=' + str(compression_level) + ' SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(inputFiles))
|
return 'java ' + memLimit + ' -jar ' + mergeBin + ' ' + MSDStr + ' AS=true COMPRESSION_LEVEL=' + str(compression_level) + ' SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(inputFiles))
|
||||||
#return 'java -Xmx4096m -jar ' + mergeBin + ' AS=true SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(inputFiles))
|
#return 'java -Xmx4096m -jar ' + mergeBin + ' AS=true SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(inputFiles))
|
||||||
|
|
||||||
|
def mergeFixingMatesBAMCmd( output_filename, inputFiles, memLimit = '-Xmx4096m', compression_level = 1, tmpdir = '/broad/shptmp' ):
|
||||||
|
if type(inputFiles) <> list:
|
||||||
|
inputFiles = list(inputFiles)
|
||||||
|
|
||||||
|
return 'java %s -Djava.io.tmpdir=%s -jar %s COMPRESSION_LEVEL=%d SO=coordinate O=%s VALIDATION_STRINGENCY=SILENT I=%s' % ( memLimit, tmpdir, FIXMATES_BIN, compression_level, output_filename, 'I='.join(inputFiles) )
|
||||||
|
|
||||||
def getPicardPath(lane, picardRoot = '/seq/picard/'):
|
def getPicardPath(lane, picardRoot = '/seq/picard/'):
|
||||||
flowcell, laneNo = lane.split('.')
|
flowcell, laneNo = lane.split('.')
|
||||||
filePat = os.path.join(picardRoot, flowcell, '*', laneNo, '*')
|
filePat = os.path.join(picardRoot, flowcell, '*', laneNo, '*')
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,12 @@ import string
|
||||||
import picard_utils
|
import picard_utils
|
||||||
from madPipelineUtils import *
|
from madPipelineUtils import *
|
||||||
|
|
||||||
|
def getContigs(optionContigs, hg18):
|
||||||
|
if optionContigs != None:
|
||||||
|
return optionContigs.split(",")
|
||||||
|
else:
|
||||||
|
return hg18
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
global OPTIONS
|
global OPTIONS
|
||||||
usage = "usage: %prog [options] stages input.bam outputRoot"
|
usage = "usage: %prog [options] stages input.bam outputRoot"
|
||||||
|
|
@ -28,6 +34,9 @@ def main():
|
||||||
parser.add_option("-n", "--name", dest="name",
|
parser.add_option("-n", "--name", dest="name",
|
||||||
type="string", default="realignBamByChr",
|
type="string", default="realignBamByChr",
|
||||||
help="Farm queue to send processing jobs to")
|
help="Farm queue to send processing jobs to")
|
||||||
|
parser.add_option("-c", "--contigs", dest="contigs",
|
||||||
|
type="string", default=None,
|
||||||
|
help="Comma-separated list of contig:start-stop values to pass to the cleaner. Overrides whole-genome if provided")
|
||||||
parser.add_option("-q", "--farm", dest="farmQueue",
|
parser.add_option("-q", "--farm", dest="farmQueue",
|
||||||
type="string", default=None,
|
type="string", default=None,
|
||||||
help="Farm queue to send processing jobs to")
|
help="Farm queue to send processing jobs to")
|
||||||
|
|
@ -61,7 +70,7 @@ def main():
|
||||||
|
|
||||||
out = open(outputBamList, 'w')
|
out = open(outputBamList, 'w')
|
||||||
realignInfo = []
|
realignInfo = []
|
||||||
for chr in hg18:
|
for chr in getContigs(OPTIONS.contigs, hg18):
|
||||||
lastJobs = None
|
lastJobs = None
|
||||||
|
|
||||||
def updateNewJobs(newjobs, lastJobs):
|
def updateNewJobs(newjobs, lastJobs):
|
||||||
|
|
@ -104,7 +113,7 @@ def createTargets( myPipelineArgs, chr, inputBam, outputRoot, args, lastJobs ):
|
||||||
|
|
||||||
def realign( myPipelineArgs, chr, inputBam, outputRoot, intervals, lastJobs ):
|
def realign( myPipelineArgs, chr, inputBam, outputRoot, intervals, lastJobs ):
|
||||||
outputBAM = outputRoot + ".bam"
|
outputBAM = outputRoot + ".bam"
|
||||||
GATKArgs = '-T IndelRealigner -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod -I %s -targetIntervals %s --output %s -sort ON_DISK -mrl 100000 -L %s' % (inputBam, intervals, outputBAM, chr)
|
GATKArgs = '-T IndelRealigner -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod -I %s -targetIntervals %s --output %s -L %s' % (inputBam, intervals, outputBAM, chr)
|
||||||
return simpleGATKCommand( myPipelineArgs, 'Realign' + chr, GATKArgs, lastJobs ), outputBAM
|
return simpleGATKCommand( myPipelineArgs, 'Realign' + chr, GATKArgs, lastJobs ), outputBAM
|
||||||
|
|
||||||
def index( myPipelineArgs, chr, inputBam, outputRoot, realignedBam, lastJobs ):
|
def index( myPipelineArgs, chr, inputBam, outputRoot, realignedBam, lastJobs ):
|
||||||
|
|
@ -112,7 +121,8 @@ def index( myPipelineArgs, chr, inputBam, outputRoot, realignedBam, lastJobs ):
|
||||||
|
|
||||||
def mergeBams( myPipelineArgs, outputFilename, bamsToMerge, lastJobs ):
|
def mergeBams( myPipelineArgs, outputFilename, bamsToMerge, lastJobs ):
|
||||||
print lastJobs
|
print lastJobs
|
||||||
cmd = picard_utils.mergeBAMCmd( outputFilename, bamsToMerge, compression_level = 5 )
|
#cmd = picard_utils.mergeBAMCmd( outputFilename, bamsToMerge, compression_level = 5 )
|
||||||
|
cmd = picard_utils.mergeFixingMatesBAMCmd(outputFilename, bamsToMerge, compression_level = 5)
|
||||||
return FarmJob(cmd, jobName = 'merge.' + myPipelineArgs.name, dependencies = lastJobs)
|
return FarmJob(cmd, jobName = 'merge.' + myPipelineArgs.name, dependencies = lastJobs)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ class VCFRecord:
|
||||||
def getFilter(self): return self.get("FILTER")
|
def getFilter(self): return self.get("FILTER")
|
||||||
def failsFilters(self): return not self.passesFilters()
|
def failsFilters(self): return not self.passesFilters()
|
||||||
def passesFilters(self):
|
def passesFilters(self):
|
||||||
v = self.getFilter() == "." or self.getFilter() == "0"
|
v = self.getFilter() == "." or self.getFilter() == "0" or self.getFilter() == 'PASSES'
|
||||||
#print self.getFilter(), ">>>", v, self
|
#print self.getFilter(), ">>>", v, self
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
|
@ -110,6 +110,8 @@ class VCFRecord:
|
||||||
else:
|
else:
|
||||||
return str(x) + '=' + str(y)
|
return str(x) + '=' + str(y)
|
||||||
v = ';'.join(map(lambda x: info2str(*x), sorted(self.info.iteritems(), key=lambda x: x[0])))
|
v = ';'.join(map(lambda x: info2str(*x), sorted(self.info.iteritems(), key=lambda x: x[0])))
|
||||||
|
if v == "":
|
||||||
|
v = "."
|
||||||
#print 'V = ', v
|
#print 'V = ', v
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
|
@ -134,12 +136,13 @@ class VCFRecord:
|
||||||
|
|
||||||
def parseInfo(s):
|
def parseInfo(s):
|
||||||
d = dict()
|
d = dict()
|
||||||
for elt in s.split(";"):
|
if s != "":
|
||||||
if '=' in elt:
|
for elt in s.split(";"):
|
||||||
key, val = elt.split('=')
|
if '=' in elt:
|
||||||
else:
|
key, val = elt.split('=')
|
||||||
key, val = elt, 1
|
else:
|
||||||
d[key] = val
|
key, val = elt, 1
|
||||||
|
d[key] = val
|
||||||
return d
|
return d
|
||||||
|
|
||||||
# def parseInfo(s):
|
# def parseInfo(s):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue