#!/util/bin/python from optparse import OptionParser import os import sys #import set import random import string from SAM import * def all(iterable): for element in iterable: if not element: return False return True def any(iterable): for element in iterable: if element: return True return False OPTIONS = None bases = set(['a', 't', 'g', 'c']) class Mutation: """Representation of a change from one sequence to another""" SNP = "SNP" INSERTION = "Insertion" DELETION = "Deletion" TYPES = set([SNP, INSERTION, DELETION]) CIGARChars = { SNP : 'M', INSERTION : 'I', DELETION : 'D' } def __init__( self, contig, pos, type, original, mutated ): # example: chr20 123 SNP "a" "t" <- a to t at base 123 on chr20 # example: chr20 123 INSERTION "" "atg" <- insertion of 3 bases starting at pos 123 # example: chr20 123 DELETION "atg" "" <- deletion of 3 bases starting at pos 123 self.contig = contig # contig of the mutation self._pos = pos # position of the change in contig coordinates self._type = type # one of the TYPES assert(self._type in Mutation.TYPES) self.original = original # String of the original genotype self.mutated = mutated # String of the mutated genotype def pos(self, offset=0): return self._pos - offset def type(self): return self._type def __len__(self): return max( len(self.mutated), len(self.original) ) def mutString( self ): if self._type == Mutation.SNP: return 'P:%s->%s' % (self.original, self.mutated) elif self._type == Mutation.INSERTION: return 'I:%s' % (self.mutated) elif self._type == Mutation.DELETION: return 'D:%s' % (self.original) else: raise Exception('Unexpected mutation type ' + self._type) def CIGARChar( self ): return Mutation.CIGARChars[self.type()] def __str__( self ): return '%s:%d %s' % (self.contig, self._pos, self.mutString()) def __repr__( self ): return 'Mutation(%s, %d, %s)' % (self.contig, self._pos, self.mutString()) def execute( self, offset, refseq, mutseq ): p = self.pos(offset) if self.type() == Mutation.SNP: mutseq[p] = self.mutated elif self.type() == Mutation.INSERTION: refseq[p:p] = string.join(['-'] * len(self),'') mutseq[p:p] = self.mutated elif self.type() == Mutation.DELETION: mutseq[p:p+len(self)] = string.join(['-'] * len(self), '') refseq.extend(self.mutated) mutseq.extend(self.mutated) #print refseq, mutseq return refseq, mutseq # --------------------------------------------------------------------------- # # Code for dealing with CIGAR strings # # --------------------------------------------------------------------------- def flatten(l): out = [] for item in l: if isinstance(item, (list, tuple)): out.extend(flatten(item)) else: out.append(item) return out def mutationsCIGAR( readStart, alignStart, readLen, mutations ): if len(mutations) == 1 and mutations[0].type() <> Mutation.SNP: mut = mutations[0] internalPos = mut.pos() - alignStart leftLen = max(internalPos - readStart,0) mutLen = min(internalPos + len(mut) - readStart, readLen - leftLen, len(mut)) rightLen = readLen - leftLen - mutLen #print mut, readStart, alignStart, internalPos, leftLen, mutLen, rightLen #print l = [ (leftLen, 'M'), (mutLen, mut.CIGARChar()), (rightLen, 'M') ] l = filter( lambda x: x[0] > 0, l ) return string.join(map(str, flatten(l)), '') if not all( map( lambda mut: mut.type() == Mutation.SNP, mutations ) ): # bail raise Exception('Currently only supports multiple SNPs CIGARS') return str(readLen) + 'M' # def mutationsCIGARBAD( refStart, readStart, readStop, mutations ): # sortedMutations = sorted(mutations, key=Mutation.pos) # CIGAR = [] # pos = readStart # # for mutation in sortedMutations: # internalMutationStart = mutation.pos() - refStart # if internalMutationStart >= readStart and internalMutationStart < readStop # delta = mutation.pos() - pos # if not OPTIONS.quiet: # print 'mutationsCIGAR', pos, CIGAR, delta, mutation # if mutation.type() <> Mutation.SNP and delta > 0: # CIGAR.append([delta, 'M']) # pos = mutation.pos() # # if mutation.type == Mutation.INSERTION: # CIGAR.append(['I', len(mutation)]) # if mutation.type == Mutation.DELETION: # CIGAR.append(['D', len(mutation)]) # # delta = refSeqPos + refLen - pos # if not OPTIONS.quiet: # print 'mutationsCIGAR', pos, CIGAR, delta, mutation # if delta > 0: # CIGAR.append([delta, 'M']) # # s = string.join(map( str, flatten(CIGAR) ), '') # print 'CIGAR:', CIGAR, s # return s # --------------------------------------------------------------------------- # # mutating the reference # # --------------------------------------------------------------------------- def executeMutations( mutations, seqIndex, refseq, mutseq ): for mutation in mutations: internalSite = mutation.pos() - seqIndex refseq, mutseq = mutation.execute( seqIndex, refseq, mutseq ) return refseq, mutseq def isBadSequenceForSim(seq): if any( map( lambda b: b not in bases, seq.tostring().lower() ) ): return True else: return False def mutateReference(ref, contig, mutSite, mutType, mutParams, nBasesToPad): #print 'mutateReference' # start and stop refStartIndex = mutSite - nBasesToPad + 1 internalStartIndex = nBasesToPad - 1 if OPTIONS.mutationType == 'SNP' or OPTIONS.mutationType == 'NONE': refStopIndex = mutSite + nBasesToPad else: refStopIndex = mutSite + nBasesToPad - 1 refsub = ref[refStartIndex : refStopIndex] mutseq = refsub.tomutable() refsub = refsub.tomutable() mutations = [] def success(): return True, mutations, refsub, mutseq def fail(): return False, [], None, None if refStartIndex < 0: return fail() if isBadSequenceForSim(refsub): #print 'rejecting seq', refsub.tostring() #print map( lambda b: b not in bases, refsub.tostring().lower() ) return fail() if not OPTIONS.quiet: print ref print refsub otherSites = set(range(refStartIndex, refStopIndex)) - set([mutSite]) # print 'otherSites', otherSites for site in [mutSite] + random.sample(otherSites, max(OPTIONS.mutationDensity-1,0)): internalSite = site - refStartIndex if mutType == 'NONE' or OPTIONS.mutationDensity < 1: pass elif mutType == 'SNP': mutBase = ref[site].lower() otherBases = bases - set(mutBase) otherBase = random.choice(list(otherBases)) assert mutBase <> otherBase if not OPTIONS.quiet: print mutBase, ' => ', otherBase mutations.append( Mutation( contig, site, Mutation.SNP, mutBase, otherBase ) ) elif mutType == 'INSERTION': inSeq = string.join([ random.choice(list(bases)) for i in range( OPTIONS.indelSize ) ], '') mutation = Mutation( contig, site, Mutation.INSERTION, '', inSeq ) mutations.append( mutation ) elif mutType == 'DELETION': inSeq = string.join([ random.choice(list(bases)) for i in range( OPTIONS.indelSize ) ], '') mutation = Mutation( contig, site, Mutation.DELETION, '', ref[refStopIndex:refStopIndex+OPTIONS.indelSize]) mutations.append( mutation ) else: raise Exception('Unexpected mutation type', mutType) # process the mutations refsub, mutseq = executeMutations( mutations, refStartIndex, refsub, mutseq ) return success() # version 1.0 -- prototype # def mutateReference(ref, mutSite, mutType, mutParams, nBasesToPad): # #print 'mutateReference' # refStartIndex = mutSite - nBasesToPad + 1 # refStopIndex = mutSite + nBasesToPad # internalStartIndex = nBasesToPad - 1 # # if refStartIndex < 0: # return False, None, None # # refsub = ref[refStartIndex : refStopIndex] # print ref # print refsub # # if mutType == 'SNP' or mutType == 'None': # #print ' In IF' # mutBase = ref[mutSite] # bases = set(['A', 'T', 'G', 'C']) # if mutBase in bases: # otherBases = bases - set(mutBase) # otherBase = random.choice(list(otherBases)) # assert mutBase <> otherBase # mutseq = refsub.tomutable() # # if mutType == 'SNP': # print mutBase, ' => ', otherBase # mutseq[internalStartIndex] = otherBase # # print refsub # print mutseq # align = Bio.Align.Generic.Alignment(Gapped(IUPAC.unambiguous_dna, '-')) # align.add_sequence("ref", refsub.tostring()) # align.add_sequence("mut", mutseq.tostring()) # print str(align) # # return True, refsub, mutseq # # return False, None, None # --------------------------------------------------------------------------- # # sample reads from alignments # # --------------------------------------------------------------------------- def accumulateBases( mutSeq, start, readLen ): count = 0 for stop in range(start, len(mutSeq)): if notDash(mutSeq[stop]): count += 1 #print stop, count if count == readLen: break stop += 1 read = string.join(filter( notDash, mutSeq[start:stop] ), '') return read, stop # doesn't support paired reads # # def sampleReadsFromAlignment(refSeq, mutSeq, alignStart, readLen, nReads, mutations): # #print 'REF:', refSeq.tostring() # #print 'MUT:', mutSeq.tostring() # #print '' # # # we are assuming that mutSeq is exactly 1 + 2 * readLen in size, which the mutation # # right in the middle # lastGoodStartSite = readLen - 1 # if not OPTIONS.quiet: # print refSeq.tostring(), 'ref' # print mutSeq.tostring(), 'mut' # # # we can potentially start at any site in mutSeq # nPotentialStarts = len(mutSeq) - readLen + 1 # potentialStarts = range(nPotentialStarts) # # if nReads.lower() == 'tile' or int(nReads) >= nPotentialStarts: # if OPTIONS.uniqueReadsOnly: # starts = potentialStarts # else: # starts = map( lambda x: random.choice(potentialStarts), range(nPotentialStarts)) # else: # starts = random.sample(potentialStarts, nPotentialStarts) # # #print 'potentialStarts', potentialStarts # #print 'Starts', starts # # def sampleRead( start ): # if mutSeq[start] <> '-': # read, stop = accumulateBases( mutSeq, start, readLen ) # remaining = len(mutSeq) - stop # refForRead = refSeq[start:stop] # #print 'XXXX', start, stop, refForRead, read # cigar = mutationsCIGAR( alignStart, readLen, mutations ) # if not OPTIONS.quiet: # leftPads = string.join(start * ['-'], '') # rightPads = string.join(remaining * ['-'], '') # print leftPads + mutSeq.tostring()[start:stop] + rightPads, start, stop, cigar # return filter(notDash, read), alignStart + start, cigar # else: # return False, False, False # # reads = map( sampleRead, starts ) # return [read for read in reads if read[0] <> False] class refSite: def __init__( self, refSeq, mutSeq, alignStart, mutations ): self.refSeq = refSeq self.mutSeq = mutSeq self.alignStart = alignStart self.mutations = mutations def sample1Read( refSite, start, readLen, reverseComplementP = False ): if refSite.mutSeq[start] <> '-': read, stop = accumulateBases( refSite.mutSeq, start, readLen ) mutSubSeq = refSite.mutSeq[start:stop] if reverseComplementP: s = Seq( read, IUPAC.unambiguous_dna ) #print 'reverseComplementP', read, s.reverse_complement().tostring(), mutSubSeq read = s.reverse_complement().tostring() mutSubSeq.complement() remaining = len(refSite.mutSeq) - stop refForRead = refSite.refSeq[start:stop] #print 'XXXX', start, stop, refForRead, read cigar = mutationsCIGAR( start, refSite.alignStart, readLen, refSite.mutations ) leftPads = string.join(start * ['-'], '') rightPads = string.join(remaining * ['-'], '') ppReadStr = leftPads + mutSubSeq.tostring() + rightPads # + '(' + read + ')' return True, filter(notDash, read), refSite.alignStart + start, cigar, ppReadStr else: return False, None, None, None, None def sampleReadsFromAlignment(wholeRef, refSeq, mutSeq, alignStart, readLen, nReads, mutations, pairedOffset, mutations2, refSeq2, mutSeq2): #print 'REF:', refSeq.tostring() #print 'MUT:', mutSeq.tostring() #print '' # pairedSite, mutations2, refSeq2, mutSeq2 can all be None refSite1 = refSite( refSeq, mutSeq, alignStart, mutations ) refSite2 = refSite( refSeq2, mutSeq2, alignStart + pairedOffset, mutations2 ) #print refSite1.alignStart, refSite2.alignStart, pairedOffset # we are assuming that mutSeq is exactly 1 + 2 * readLen in size, which the mutation # right in the middle lastGoodStartSite = readLen - 1 if not OPTIONS.quiet: if OPTIONS.generatePairedEnds: r = wholeRef.seq[refSite1.alignStart:refSite2.alignStart + 2*readLen - 1] print r.tostring(), 'ref' print r.complement().tostring(), 'ref, complement' else: print refSeq.tostring(), 'ref' print mutSeq.tostring(), 'mut' # we can potentially start at any site in mutSeq nPotentialStarts = len(mutSeq) - readLen + 1 potentialStarts = range(nPotentialStarts) #print 'potentialStarts', potentialStarts if nReads.lower() == 'tile' or int(nReads) >= nPotentialStarts: if OPTIONS.uniqueReadsOnly: starts = potentialStarts else: starts = map( lambda x: random.choice(potentialStarts), range(nPotentialStarts)) else: starts = random.sample(potentialStarts, int(nReads)) #print 'Starts', starts def sampleRead( start ): good1, read1, pos1, cigar1, ppstr1 = sample1Read( refSite1, start, readLen ) if OPTIONS.generatePairedEnds: good2, read2, pos2, cigar2, ppstr2 = sample1Read( refSite2, start, readLen, True ) if not OPTIONS.quiet: offsetDashes = string.join(['-'] * (pairedOffset), '') print print ppstr1 + offsetDashes, pos1, cigar1 print offsetDashes + ppstr2, pos2, cigar2 return [(read1, pos1, cigar1), (read2, pos2, cigar2)] else: if not OPTIONS.quiet: print ppstr1, pos1, cigar1 return [(read1, pos1, cigar1), (None, 0, None)] reads = map( sampleRead, starts ) return [read for read in reads if read[0][0] <> None] def notDash( c ): return c <> '-' def fakeQuals( seq ): return len(seq) * [30] #return range(len(seq)) def alignedRead2SAM( siteID, readID, fastaID, read, pos, cigar, read2, pos2, cigar2 ): rname = fastaID mapq = 40 quals = fakeQuals( read ) qname = '%s:%d.read.%d' % (fastaID, siteID, readID) if not OPTIONS.generatePairedEnds: # not in paired mode flags = [] record1 = SAMRecordFromArgs( qname, flags, rname, pos, mapq, cigar, read, quals ) record2 = None else: flags = [SAM_SEQPAIRED, SAM_MAPPAIRED] insertSize = pos2 - pos - len(read) record1 = SAMRecordFromArgs( qname, flags, rname, pos, mapq, cigar, read, quals, pairContig = rname, pairPos = pos2, insertSize = insertSize ) record2 = SAMRecordFromArgs( qname + 'p', flags, rname, pos2, mapq, cigar2, read2, quals, pairContig = rname, pairPos = pos, insertSize = insertSize ) #print 'read1, read2', record1.getSeq(), record2.getSeq() return [record1, record2] # --------------------------------------------------------------------------- # # paired end reads # # --------------------------------------------------------------------------- def pairedReadSite( ref, leftMutSite, readLen ): pairedOffset = int(round(random.normalvariate(OPTIONS.insertSize, OPTIONS.insertDev))) def fail(): return False, None if pairedOffset < 0: return fail() pairedSite = pairedOffset + leftMutSite + readLen #print 'pairedSite', pairedOffset, leftMutSite, pairedSite if pairedSite + readLen >= len(ref): return fail() refsub = ref[pairedSite - readLen:pairedSite + readLen + 1] if isBadSequenceForSim( refsub ): return fail() return True, pairedSite # --------------------------------------------------------------------------- # # build allowed regions and sampling starts from region # # --------------------------------------------------------------------------- def parseSamplingRange( arg, ref, readLen ): # returns a list of the allowed regions to sample in the reference def one(x): elts = x.split() #print 'elts', elts if len(elts) > 0: elts1 = elts[0].split(':') #print 'elts1', elts1 return map( int, elts1 ) else: return False if arg <> None: try: return [ one(arg) ] except: # try to read it as a file return filter( None, map( one, open(arg).readlines() ) ) else: return [[0, len(ref.seq) - readLen]] def sampleStartSites( sampleRanges, nsites ): print 'sampleStartSites', sampleRanges, nsites # build a set of potential sites if len(sampleRanges) > 1: potentialSites = set() for start, end in sampleRanges: #print 'potentialSites', start, end, potentialSites potentialSites = potentialSites.union(set(xrange(start, end))) else: start, end = sampleRanges[0] potentialSites = xrange(start, end) #print 'potentialSites', potentialSites # choose sites from potentials if len(potentialSites) <= nsites: # tile every site sites = potentialSites else: # we need to sample from the potential sites sites = random.sample( potentialSites, nsites ) # print 'Sites', sites print 'Number of potential start sites:', len(potentialSites) print 'Number of start sites that will be generated:', len(sites) return sites def readRef(referenceFasta): handle = open(referenceFasta) for seq_record in SeqIO.parse(handle, "fasta"): print seq_record.id print seq_record.name #print repr(seq_record.seq) print len(seq_record.seq) yield seq_record handle.close() import os.path def outputFilename(): root = os.path.splitext(os.path.split(OPTIONS.reference)[1])[0] if OPTIONS.generatePairedEnds: pairedP = 'Yes' else: pairedP = 'No' params = [['mut',OPTIONS.mutationType], ['den', max(OPTIONS.mutationDensity, OPTIONS.indelSize)], [OPTIONS.readLen,'bp'], \ ['nsites', OPTIONS.nSites], ['cov', OPTIONS.coverage], ['range', OPTIONS.range], ['paired', pairedP]] filename = root + '__' + string.join(map( lambda p: string.join(map(str, p),'.'), params), '_') root = os.path.join(OPTIONS.outputPrefix, filename) return root, root + '.sam' def main(): global OPTIONS, ROOT usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option("-r", "--ref", dest="reference", type="string", default=None, help="Reference DNA seqeunce in FASTA format") parser.add_option("-o", "--outputPrefix", dest="outputPrefix", type="string", default='', help="Output Prefix SAM file") parser.add_option("-c", "--coverage", dest="coverage", type="string", default='1', help="Number of reads to produce that cover each mutated region for the reference, or tile for all possible reads.") parser.add_option("-n", "--nsites", dest="nSites", type=int, default=1, help="Number of sites to generate mutations in the reference") parser.add_option("-l", "--readlen", dest="readLen", type=int, default=36, help="Length of reads to generate") parser.add_option("-s", "--seed", dest="seed", type=int, default=123456789, help="Random number seed. If 0, then system clock is used") parser.add_option("-t", "--muttype", dest="mutationType", type="string", default='None', help="Type of mutation to introduce from reference. Can be None, SNP, insertion, or deletion") parser.add_option("-e", "--mutdensity", dest="mutationDensity", type=int, default=1, help="Number of mutations to introduce from the reference") parser.add_option("-x", "--range", dest="range", type="string", default=None, help="Sampling range restriction, in the form of x:y without a space") parser.add_option("-u", "--unique", dest="uniqueReadsOnly", action='store_true', default=True, help="Assumes that the user wants unique reads generated. If the coverage is greater than the number of unique reads at the site, the region is just tiled") parser.add_option("-i", "--indelsize", dest="indelSize", type=int, default=0, help="Size in bp of the insertion or deletion to introduce") # Paired end options parser.add_option("", "--paired", dest="generatePairedEnds", action='store_true', default=False, help="Should we generate paired end reads?") parser.add_option("", "--insertSize", dest="insertSize", type=int, default=280, help="Size in bp of the paired library insertion") parser.add_option("", "--insertDev", dest="insertDev", type=int, default=5, help="Standard deviation in the size in bp of the paired library insertion") parser.add_option("-q", "--quiet", dest="quiet", action='store_true', default=False, help="Be quiet when generating output") parser.add_option("-d", "--debug", dest="debug", action='store_true', default=False, help="Verbose debugging output?") (OPTIONS, args) = parser.parse_args() if len(args) != 0: parser.error("incorrect number of arguments") root, outputSAM = outputFilename() if OPTIONS.seed == 0: random.seed(None) else: random.seed(OPTIONS.seed) OPTIONS.mutationType = OPTIONS.mutationType.upper() print '------------------------------------------------------------' print 'program:', sys.argv[0] print '' print ' ref: ', OPTIONS.reference print ' output: ', outputSAM print '' print ' mutation type: ', OPTIONS.mutationType print ' mutation density: ', OPTIONS.mutationDensity print ' indel size: ', OPTIONS.indelSize print ' readLen: ', OPTIONS.readLen print ' nSites: ', OPTIONS.nSites print ' coverage: ', OPTIONS.coverage print ' range restriction: ', OPTIONS.range print '' print ' paired ends?: ', OPTIONS.generatePairedEnds print ' insert size: ', OPTIONS.insertSize print ' insert stddev: ', OPTIONS.insertDev print '' print ' Debugging?: ', OPTIONS.debug print '------------------------------------------------------------' if OPTIONS.mutationType <> 'SNP' and OPTIONS.mutationDensity > 1: raise Exception('Does not support mutation density > 1 for mutations of class', OPTIONS.mutationType) readLen = OPTIONS.readLen header = SAMHeader( "MarkD", readLen ) SAMout = SAMIO( outputSAM, header, debugging=OPTIONS.debug ) mutationsout = open(root + '.mutations.txt', 'w') qualsout = open(root + '.quals.txt', 'w') refout = open(root + '.ref', 'w') fastaout = open(root + '.fasta', 'w') if OPTIONS.generatePairedEnds: fastaout2 = open(root + '.pairs.fasta', 'w') qualsout2 = open(root + '.pairs.quals.txt', 'w') counter = 0 refLen = 0 for ref in readRef(OPTIONS.reference): refLen = len(ref.seq) # write the crazy ref file info needed by samtools print >> refout, ref.id + '\t' + str(refLen) sampleRanges = parseSamplingRange( OPTIONS.range, ref, readLen ) sites = sampleStartSites( sampleRanges, OPTIONS.nSites ) for mutSite, siteCounter in zip( sites, range(1, len(sites)+1) ): #print 'Sampling site:', mutSite, refLen #mutSite = readLen-1 nSitesSelected = max(int(round(len(sites)/100.0)), 1) #print siteCounter, len(sites), nSitesSelected) if siteCounter % nSitesSelected == 0: print 'Sampled site %d of %d (%.2f%%)' % ( siteCounter, len(sites), (100.0*siteCounter) / len(sites)) good, mutations, refSeq, mutSeq = mutateReference(ref.seq, ref.id, mutSite, OPTIONS.mutationType, [], readLen) pairedSite, mutations2, refSeq2, mutSeq2 = 0, None, None, None if good and OPTIONS.generatePairedEnds: if OPTIONS.mutationType == 'SNP': pairedMutType = 'SNP' else: pairedMutType = 'NONE' good, pairedSite = pairedReadSite( ref.seq, mutSite, readLen ) print 'pairedReadSite', good, pairedSite, good if good: good, mutations2, refSeq2, mutSeq2 = mutateReference(ref.seq, ref.id, pairedSite, pairedMutType, [], readLen) #print 'Good', good, mutations, refSeq, mutSeq if good: print >> mutationsout, ref.id + ':' + str(mutSite) + '\t' + string.join(map(str, mutations), '\t') #print 'Mutations', mutations refSeqPos = mutSite - readLen + 1 readPairs = sampleReadsFromAlignment(ref, refSeq, mutSeq, refSeqPos, readLen, OPTIONS.coverage, mutations, pairedSite - mutSite, mutations2, refSeq2, mutSeq2) for i in range(len(readPairs)): counter += 1 read1, read2 = readPairs[i] seq, pos, cigar = read1 seq2, pos2, cigar2 = read2 # write out the sam and fasta files def write1( record, recordi ): if record <> None: SAMout.writeRecord( record ) sequences = [SeqRecord(Seq(record.getSeq()), id = record.getQName(), description='')] #print sequences if recordi % 2 == 0: localFastaOut, localQualsOut = fastaout, qualsout else: localFastaOut, localQualsOut = fastaout2, qualsout2 SeqIO.write(sequences, localFastaOut, "fasta") print >> localQualsOut, record.getQName(), string.join(map( str, record.getQuals() ), ' ') #print i, read1, read2 records = alignedRead2SAM( mutSite, i+1, ref.id, seq, pos + 1, cigar, seq2, pos2 + 1, cigar2 ) map( write1, records, range( len(records) ) ) SAMout.close() qualsout.close() fastaout.close() refout.close() mutationsout.close() if OPTIONS.generatePairedEnds: qualsout2.close() fastaout2.close() # for record in SAMIO( outputSAM ): # print record if __name__ == "__main__": import Bio from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Alphabet import IUPAC, Gapped main()