Better merge support

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@748 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-05-18 21:18:51 +00:00
parent 3441795d9c
commit e9f85ef920
2 changed files with 25 additions and 2 deletions

View File

@ -13,7 +13,7 @@ bam_ext = '.bam'
if __name__ == "__main__":
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-f", "--farm", dest="farm_sub",
parser.add_option("-q", "--farm", dest="farm_sub",
type="string", default=None,
help="Farm queue to send processing jobs to")
parser.add_option("-d", "--dir", dest="output_dir",
@ -28,6 +28,10 @@ if __name__ == "__main__":
parser.error("incorrect number of arguments")
directory = OPTIONS.output_dir
if not os.path.exists(directory):
os.mkdir(directory)
today = date.today()
time_stamp = today.isoformat()
@ -41,7 +45,7 @@ if __name__ == "__main__":
sources = reduce( operator.__add__, map( glob.glob, s[1:] ), [] )
if OPTIONS.ignoreExistingFiles or not os.path.exists(output_filename):
cmd = 'java -Xmx4096m -jar ' + MERGE_BIN + ' AS=true O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(sources))
cmd = 'java -Xmx4096m -jar ' + MERGE_BIN + ' AS=true SO=coordinate O=' + output_filename + ' VALIDATION_STRINGENCY=SILENT ' + ' I=' + (' I='.join(sources))
print cmd
farm_commands.cmd(cmd, OPTIONS.farm_sub, output)

View File

@ -92,6 +92,18 @@ SAM_ISFIRSTREAD = 0x0040 # the read is the first read in a pair 1,2
SAM_ISSECONDREAD = 0x0080 # the read is the second read in a pair 1,2
SAM_NOTPRIMARY = 0x0100 # the alignment is not primary (a read having split hits may have multiple primary alignment records)
SAM_FLAGS = {
SAM_SEQPAIRED : 'the read is paired in sequencing, no matter whether it is mapped in a pair',
SAM_MAPPAIRED : 'the read is mapped in a proper pair (depends on the protocol, normally inferred during alignment) 1',
SAM_UNMAPPED : 'the query sequence itself is unmapped',
SAM_MATEUNMAPPED : 'the mate is unmapped 1',
SAM_QUERYSTRAND : 'strand of the query (0 for forward; 1 for reverse strand)' ,
SAM_MATESTRAND : 'strand of the mate 1' ,
SAM_ISFIRSTREAD : 'the read is the first read in a pair 1,2' ,
SAM_ISSECONDREAD : 'the read is the second read in a pair 1,2',
SAM_NOTPRIMARY : 'the alignment is not primary (a read having split hits may have multiple primary alignment records)'
}
def SAMRecordFromArgs( qname, flags, rname, pos, mapq, cigar, seq, quals, pairContig = '*', pairPos = 0, insertSize = 0 ):
r = SAMRecord()
r.setValuesFromArgs( qname, flags, rname, pos, mapq, cigar, seq, quals, pairContig, pairPos, insertSize )
@ -104,9 +116,16 @@ def SAMRecordFromString( str ):
def SAMFlagValue( flags, testFlag ):
return testFlag & flags
def SAMFlagIsSet( flags, testFlag ):
return SAMFlagValue(flags, testFlag) <> 0
def SAMFlagsDescs( flags ):
def keepMe(p):
flagKey, flagDesc = p
return [flagKey, SAMFlagIsSet(flags, flagKey), flagDesc]
return sorted(map( keepMe, SAM_FLAGS.iteritems() ))
# -----------------------------------------------------------------------------------------------
#
# This is really the meat of the SAM I/O system.