diff --git a/python/MergeBAMBatch.py b/python/MergeBAMBatch.py index 20bcb3a8d..1facedfb4 100755 --- a/python/MergeBAMBatch.py +++ b/python/MergeBAMBatch.py @@ -13,7 +13,7 @@ bam_ext = '.bam' if __name__ == "__main__": usage = "usage: %prog [options]" parser = OptionParser(usage=usage) - parser.add_option("-f", "--farm", dest="farm_sub", + parser.add_option("-q", "--farm", dest="farm_sub", type="string", default=None, help="Farm queue to send processing jobs to") parser.add_option("-d", "--dir", dest="output_dir", @@ -28,6 +28,10 @@ if __name__ == "__main__": parser.error("incorrect number of arguments") directory = OPTIONS.output_dir + + if not os.path.exists(directory): + os.mkdir(directory) + today = date.today() time_stamp = today.isoformat() @@ -41,7 +45,7 @@ if __name__ == "__main__": sources = reduce( operator.__add__, map( glob.glob, s[1:] ), [] ) if OPTIONS.ignoreExistingFiles or not os.path.exists(output_filename): - cmd = 'java -Xmx4096m -jar ' + MERGE_BIN + ' AS=true O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(sources)) + cmd = 'java -Xmx4096m -jar ' + MERGE_BIN + ' AS=true SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(sources)) print cmd farm_commands.cmd(cmd, OPTIONS.farm_sub, output) diff --git a/python/SAM.py b/python/SAM.py index 4a2e03a3c..cfeb3c008 100644 --- a/python/SAM.py +++ b/python/SAM.py @@ -92,6 +92,18 @@ SAM_ISFIRSTREAD = 0x0040 # the read is the first read in a pair 1,2 SAM_ISSECONDREAD = 0x0080 # the read is the second read in a pair 1,2 SAM_NOTPRIMARY = 0x0100 # the alignment is not primary (a read having split hits may have multiple primary alignment records) +SAM_FLAGS = { + SAM_SEQPAIRED : 'the read is paired in sequencing, no matter whether it is mapped in a pair', + SAM_MAPPAIRED : 'the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment) 1', + SAM_UNMAPPED : 'the query sequence itself is unmapped', + SAM_MATEUNMAPPED : 'the mate is unmapped 1', + SAM_QUERYSTRAND : 'strand of the query (0 for forward; 1 for reverse strand)' , + SAM_MATESTRAND : 'strand of the mate 1' , + SAM_ISFIRSTREAD : 'the read is the first read in a pair 1,2' , + SAM_ISSECONDREAD : 'the read is the second read in a pair 1,2', + SAM_NOTPRIMARY : 'the alignment is not primary (a read having split hits may have multiple primary alignment records)' + } + def SAMRecordFromArgs( qname, flags, rname, pos, mapq, cigar, seq, quals, pairContig = '*', pairPos = 0, insertSize = 0 ): r = SAMRecord() r.setValuesFromArgs( qname, flags, rname, pos, mapq, cigar, seq, quals, pairContig, pairPos, insertSize ) @@ -104,9 +116,16 @@ def SAMRecordFromString( str ): def SAMFlagValue( flags, testFlag ): return testFlag & flags + def SAMFlagIsSet( flags, testFlag ): return SAMFlagValue(flags, testFlag) <> 0 +def SAMFlagsDescs( flags ): + def keepMe(p): + flagKey, flagDesc = p + return [flagKey, SAMFlagIsSet(flags, flagKey), flagDesc] + return sorted(map( keepMe, SAM_FLAGS.iteritems() )) + # ----------------------------------------------------------------------------------------------- # # This is really the meat of the SAM I/O system.