2017-09-17 05:51:13 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
|
|
import sys, getopt
|
2017-09-17 12:05:30 +08:00
|
|
|
import mappy as mp
|
2017-09-17 05:51:13 +08:00
|
|
|
|
|
|
|
|
def readfq(fp): # multi-line fasta/fastq parser
|
|
|
|
|
last = None
|
|
|
|
|
while True:
|
|
|
|
|
if not last:
|
|
|
|
|
for l in fp:
|
|
|
|
|
if l[0] in '>@':
|
|
|
|
|
last = l[:-1]
|
|
|
|
|
break
|
|
|
|
|
if not last: break
|
|
|
|
|
name, seqs, last = last[1:].split()[0], [], None
|
|
|
|
|
for l in fp:
|
|
|
|
|
if l[0] in '@+>':
|
|
|
|
|
last = l[:-1]
|
|
|
|
|
break
|
|
|
|
|
seqs.append(l[:-1])
|
|
|
|
|
if not last or last[0] != '+':
|
|
|
|
|
yield name, ''.join(seqs), None
|
|
|
|
|
if not last: break
|
|
|
|
|
else:
|
|
|
|
|
seq, leng, seqs = ''.join(seqs), 0, []
|
|
|
|
|
for l in fp:
|
|
|
|
|
seqs.append(l[:-1])
|
|
|
|
|
leng += len(l) - 1
|
|
|
|
|
if leng >= len(seq):
|
|
|
|
|
last = None
|
|
|
|
|
yield name, seq, ''.join(seqs);
|
|
|
|
|
break
|
|
|
|
|
if last:
|
|
|
|
|
yield name, seq, None
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
def main(argv):
|
|
|
|
|
opts, args = getopt.getopt(argv[1:], "")
|
|
|
|
|
if len(args) < 2:
|
|
|
|
|
print("Usage: mm2-lite.py <ref.fa>|<ref.mmi> <query.fq>")
|
|
|
|
|
sys.exit(1)
|
2017-09-17 12:05:30 +08:00
|
|
|
a = mp.Aligner(args[0]) # load/build index
|
2017-09-17 08:09:17 +08:00
|
|
|
if not a:
|
|
|
|
|
print("ERROR: failed to load/build index")
|
|
|
|
|
return
|
2017-09-17 05:51:13 +08:00
|
|
|
for name, seq, qual in readfq(open(args[1])): # read one sequence
|
|
|
|
|
for h in a.map(seq): # traverse hits
|
|
|
|
|
print('{}\t{}\t{}'.format(name, len(seq), h))
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main(sys.argv)
|