88 lines
2.6 KiB
Python
88 lines
2.6 KiB
Python
|
|
#!/usr/bin/env python
|
||
|
|
|
||
|
|
import string, sys
|
||
|
|
|
||
|
|
class aln_record:
|
||
|
|
"Stores one record of data from a MAQ .aln.txt file"
|
||
|
|
field_names = (
|
||
|
|
"read name",
|
||
|
|
"chromosome",
|
||
|
|
"position",
|
||
|
|
"strand",
|
||
|
|
"insert size from the outer coorniates of a pair",
|
||
|
|
"paired flag",
|
||
|
|
"mapping quality",
|
||
|
|
"single-end mapping quality",
|
||
|
|
"alternative mapping quality",
|
||
|
|
"number of mismatches of the best hit",
|
||
|
|
"sum of qualities of mismatched bases of the best hit",
|
||
|
|
"number of 0-mismatch hits of the first 24bp",
|
||
|
|
"number of 1-mismatch hits of the first 24bp on the reference",
|
||
|
|
"length of the read",
|
||
|
|
"read sequence",
|
||
|
|
"sequence quality"
|
||
|
|
)
|
||
|
|
max_field_name_len = max(map(len, field_names))
|
||
|
|
|
||
|
|
def __init__(self, obj, parse = True):
|
||
|
|
self.fields = []
|
||
|
|
if type(obj) == str:
|
||
|
|
self.setValuesFromString( obj, parse );
|
||
|
|
else:
|
||
|
|
raise TypeError("aln_record did not recognize type: "+str(type(obj)))
|
||
|
|
|
||
|
|
def setValuesFromString( self, line, parse = True ):
|
||
|
|
if parse:
|
||
|
|
formats = [str, str, int, str, int, int, int, int, int, int, int, int, int, int, str, str]
|
||
|
|
self.fields = map( lambda f, v: f(v), formats, line.split() )
|
||
|
|
else:
|
||
|
|
self.fields = line.split()
|
||
|
|
|
||
|
|
def __str__(self):
|
||
|
|
s = ""
|
||
|
|
for n,v in zip(aln_record.field_names, self.fields):
|
||
|
|
s += ("%"+str(aln_record.max_field_name_len)+"s : %s\n") % (n, str(v))
|
||
|
|
return s
|
||
|
|
|
||
|
|
#return string.join( map( str, self.fields ), ' ')
|
||
|
|
|
||
|
|
def id(self): return self.fields[0]
|
||
|
|
def contig(self): return self.fields[1]
|
||
|
|
|
||
|
|
def offset(self): return self.fields[2]-1
|
||
|
|
def pos(self): return self.fields[2]
|
||
|
|
|
||
|
|
# Quality of read mapping (only maq gives this field)
|
||
|
|
def map_qual(self): return self.fields[6]
|
||
|
|
|
||
|
|
#def offset_end(self): return self.fields[8]
|
||
|
|
#def pos_end(self): return self.fields[8]+1
|
||
|
|
|
||
|
|
#def linear_start(self): return self.fields[9]
|
||
|
|
|
||
|
|
|
||
|
|
class aln_file:
|
||
|
|
def __init__(self, filename, parse=True):
|
||
|
|
self.filename = filename
|
||
|
|
self.parse = parse
|
||
|
|
self.faln = open(self.filename)
|
||
|
|
|
||
|
|
def RecordGenerator(self):
|
||
|
|
for line in self.faln:
|
||
|
|
yield aln_record(line, self.parse)
|
||
|
|
raise StopIteration
|
||
|
|
|
||
|
|
def __iter__(self):
|
||
|
|
return self.RecordGenerator()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
if len(sys.argv) != 2:
|
||
|
|
print "To test aln_file class:\naln_file.py ALN_FILE"
|
||
|
|
else:
|
||
|
|
count = 0
|
||
|
|
for aln in aln_file(sys.argv[1]):
|
||
|
|
print aln
|
||
|
|
count += 1
|
||
|
|
#if count > 5:
|
||
|
|
# break
|