gatk-3.8/python/aln_file.nocvs.py

88 lines
2.6 KiB
Python
Executable File

#!/usr/bin/env python
import string, sys
class aln_record:
"Stores one record of data from a MAQ .aln.txt file"
field_names = (
"read name",
"chromosome",
"position",
"strand",
"insert size from the outer coorniates of a pair",
"paired flag",
"mapping quality",
"single-end mapping quality",
"alternative mapping quality",
"number of mismatches of the best hit",
"sum of qualities of mismatched bases of the best hit",
"number of 0-mismatch hits of the first 24bp",
"number of 1-mismatch hits of the first 24bp on the reference",
"length of the read",
"read sequence",
"sequence quality"
)
max_field_name_len = max(map(len, field_names))
def __init__(self, obj, parse = True):
self.fields = []
if type(obj) == str:
self.setValuesFromString( obj, parse );
else:
raise TypeError("aln_record did not recognize type: "+str(type(obj)))
def setValuesFromString( self, line, parse = True ):
if parse:
formats = [str, str, int, str, int, int, int, int, int, int, int, int, int, int, str, str]
self.fields = map( lambda f, v: f(v), formats, line.split() )
else:
self.fields = line.split()
def __str__(self):
s = ""
for n,v in zip(aln_record.field_names, self.fields):
s += ("%"+str(aln_record.max_field_name_len)+"s : %s\n") % (n, str(v))
return s
#return string.join( map( str, self.fields ), ' ')
def id(self): return self.fields[0]
def contig(self): return self.fields[1]
def offset(self): return self.fields[2]-1
def pos(self): return self.fields[2]
# Quality of read mapping (only maq gives this field)
def map_qual(self): return self.fields[6]
#def offset_end(self): return self.fields[8]
#def pos_end(self): return self.fields[8]+1
#def linear_start(self): return self.fields[9]
class aln_file:
def __init__(self, filename, parse=True):
self.filename = filename
self.parse = parse
self.faln = open(self.filename)
def RecordGenerator(self):
for line in self.faln:
yield aln_record(line, self.parse)
raise StopIteration
def __iter__(self):
return self.RecordGenerator()
if __name__ == "__main__":
if len(sys.argv) != 2:
print "To test aln_file class:\naln_file.py ALN_FILE"
else:
count = 0
for aln in aln_file(sys.argv[1]):
print aln
count += 1
#if count > 5:
# break