more functional minimap2.py; added categories

This commit is contained in:
Heng Li 2017-09-17 17:06:39 -04:00
parent 0b660c70e2
commit cf93e5c0a1
5 changed files with 43 additions and 7 deletions

View File

@ -10,6 +10,10 @@ matrix:
python: "2.7"
before_install: pip install cython
script: python setup.py build_ext
- language: python
python: "3.0"
before_install: pip install cython
script: python setup.py build_ext
- language: python
python: "3.5"
before_install: pip install cython

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "14 September 2017" "minimap2-2.1.1-r364-dirty" "Bioinformatics tools"
.TH minimap2 1 "17 September 2017" "minimap2-2.1.1-r402-dirty" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
@ -398,6 +398,7 @@ NM i Total number of mismatches and gaps in the alignment
AS i DP alignment score
ms i DP score of the max scoring segment in the alignment
nn i Number of ambiguous bases in the alignment
ts A Transcript strand (splice mode only)
cg Z CIGAR string (only in PAF)
.TE

View File

@ -67,8 +67,11 @@ cdef class Alignment:
else: strand = '?'
if self._is_primary != 0: tp = 'tp:A:P'
else: tp = 'tp:A:S'
if self._trans_strand > 0: ts = 'ts:A:+'
elif self._trans_strand < 0: ts = 'ts:A:-'
else: ts = 'ts:A:.'
return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
str(self._blen - self._NM), str(self._blen), str(self._mapq), "NM:i:" + str(self._NM), tp, "cg:Z:" + self.cigar_str])
str(self._blen - self._NM), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str])
cdef class ThreadBuffer:
cdef cmappy.mm_tbuf_t *_b

View File

@ -4,11 +4,28 @@ import sys, getopt
import mappy as mp
def main(argv):
opts, args = getopt.getopt(argv[1:], "")
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:")
if len(args) < 2:
print("Usage: minimap2.py <ref.fa>|<ref.mmi> <query.fq>")
print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
print("Options:")
print(" -x STR preset: sr, map-pb, map-ont, asm5, asm10 or splice")
print(" -n INT mininum number of minimizers")
print(" -m INT mininum chaining score")
print(" -k INT k-mer length")
print(" -w INT minimizer window length")
print(" -r INT band width")
sys.exit(1)
a = mp.Aligner(args[0]) # load/build index
preset, min_cnt, min_sc, k, w, bw = None, None, None, None, None, None
for opt, arg in opts:
if opt == '-x': preset = arg
elif opt == '-n': min_cnt = int(arg)
elif opt == '-m': min_chain_score = int(arg)
elif opt == '-r': bw = int(arg)
elif opt == '-k': k = int(arg)
elif opt == '-w': w = int(arg)
a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
for h in a.map(seq): # traverse hits

View File

@ -23,14 +23,15 @@ def readme():
setup(
name = 'mappy',
version = '2.2rc0',
version = '2.2rc1',
url = 'https://github.com/lh3/minimap2',
description = 'Minimap2 python binding',
long_description = readme(),
author = 'Heng Li',
author_email = 'lh3@me.com',
license = 'MIT',
keywords = ['bioinformatics', 'sequence-alignment'],
keywords = 'sequence-alignment',
scripts = ['python/minimap2.py'],
ext_modules = [Extension('mappy',
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c',
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
@ -41,4 +42,14 @@ setup(
extra_compile_args = ['-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4
include_dirs = ['.'],
libraries = ['z', 'm', 'pthread'])],
classifiers = [
'Development Status :: 4 - Beta',
'License :: OSI Approved :: MIT License',
'Operating System :: POSIX',
'Programming Language :: C',
'Programming Language :: Cython',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics'],
cmdclass = cmdclass)