diff --git a/MANIFEST.in b/MANIFEST.in index c1e8b53..2625063 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,8 +4,8 @@ include ksw2_dispatch.c include getopt.c include main.c include README.md -include python/mmappy.c -include python/cmmappy.h -include python/cmmappy.pxd -include python/mmappy.pyx +include python/mappy.c +include python/cmappy.h +include python/cmappy.pxd +include python/mappy.pyx include python/README.rst diff --git a/Makefile b/Makefile index 2549f08..0567269 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ clean: - rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mmappy.so mmappy.c python/mmappy.c mmappy.egg* + rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy.so mappy.c python/mappy.c mappy.egg* depend: (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c) diff --git a/python/README.rst b/python/README.rst index 1702a6c..1ccd4eb 100644 --- a/python/README.rst +++ b/python/README.rst @@ -1,15 +1,15 @@ -=============================== -Mmappy: Minimap2 Python Binding -=============================== +============================== +Mappy: Minimap2 Python Binding +============================== `Minimap2 `_ is a fast and accurate pairwise -aligner for genomic and transcribed nucleotide sequences. This module wraps -minimap2 and provides a convenient interface to calling minimap2 in Python. +aligner for genomic and transcribed nucleotide sequences. This Python extension +provides a convenient interface to calling minimap2 in Python. Installation ------------ -The mmappy module can be installed directly with: +The mappy module can be installed directly with: .. code:: shell @@ -21,7 +21,7 @@ or with `pip `_: .. code:: shell - pip install --user mmappy + pip install --user mappy Usage ----- @@ -30,8 +30,8 @@ The following Python program shows the key functionality of this module: .. code:: python - import mmappy as mm - a = mm.Aligner("test/MT-human.fa") # load or build index + import mappy as mp + a = mp.Aligner("test/MT-human.fa") # load or build index if not a: raise Exception("ERROR: failed to load/build index") for hit in a.map("GGTTAAATACAGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGT"): print("{}\t{}\t{}\t{}".format(hit.ctg, hit.r_st, hit.r_en, hit.cigar_str)) @@ -43,7 +43,7 @@ and prints them out. APIs ---- -Class mmappy.Aligner +Class mappy.Aligner ~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -86,7 +86,7 @@ Arguments: This method maps :code:`seq` against the index. It *yields* a generator, generating a series of :code:`Alignment` objects. -Class mmappy.Alignment +Class mappy.Alignment ~~~~~~~~~~~~~~~~~~~~~~~~ This class has the following properties: diff --git a/python/cmmappy.h b/python/cmappy.h similarity index 95% rename from python/cmmappy.h rename to python/cmappy.h index 0bedf74..386b7fc 100644 --- a/python/cmmappy.h +++ b/python/cmappy.h @@ -1,5 +1,5 @@ -#ifndef CMMAPPY_H -#define CMMAPPY_H +#ifndef CMAPPY_H +#define CMAPPY_H #include #include "minimap.h" diff --git a/python/cmmappy.pxd b/python/cmappy.pxd similarity index 96% rename from python/cmmappy.pxd rename to python/cmappy.pxd index f1b7820..c78777a 100644 --- a/python/cmmappy.pxd +++ b/python/cmappy.pxd @@ -62,7 +62,7 @@ cdef extern from "minimap.h": void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) # - # Mapping (key struct defined in cmmappy.h below) + # Mapping (key struct defined in cmappy.h below) # ctypedef struct mm_reg1_t: pass @@ -77,7 +77,7 @@ cdef extern from "minimap.h": # # Helper header (because it is hard to expose mm_reg1_t with Cython # -cdef extern from "cmmappy.h": +cdef extern from "cmappy.h": ctypedef struct mm_hitpy_t: const char *ctg int32_t ctg_start, ctg_end diff --git a/python/mmappy.pyx b/python/mappy.pyx similarity index 75% rename from python/mmappy.pyx rename to python/mappy.pyx index f677be6..dec76ff 100644 --- a/python/mmappy.pyx +++ b/python/mappy.pyx @@ -1,6 +1,6 @@ from libc.stdint cimport uint8_t, int8_t from libc.stdlib cimport free -cimport cmmappy +cimport cmappy cdef class Alignment: cdef int _ctg_len, _r_st, _r_en @@ -69,23 +69,23 @@ cdef class Alignment: str(self._blen - self._NM), str(self._blen), str(self._mapq), "NM:i:" + str(self._NM), tp, "cg:Z:" + self.cigar_str]) cdef class ThreadBuffer: - cdef cmmappy.mm_tbuf_t *_b + cdef cmappy.mm_tbuf_t *_b def __cinit__(self): - self._b = cmmappy.mm_tbuf_init() + self._b = cmappy.mm_tbuf_init() def __dealloc__(self): - cmmappy.mm_tbuf_destroy(self._b) + cmappy.mm_tbuf_destroy(self._b) cdef class Aligner: - cdef cmmappy.mm_idx_t *_idx - cdef cmmappy.mm_idxopt_t idx_opt - cdef cmmappy.mm_mapopt_t map_opt + cdef cmappy.mm_idx_t *_idx + cdef cmappy.mm_idxopt_t idx_opt + cdef cmappy.mm_mapopt_t map_opt def __cinit__(self, fn_idx_in, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None): - cmmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options + cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options if preset is not None: - cmmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset + cmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset self.map_opt.flag |= 4 # always perform alignment self.idx_opt.batch_size = 0x7fffffffffffffffL # always build a uni-part index if k is not None: self.idx_opt.k = k @@ -96,40 +96,40 @@ cdef class Aligner: if bw is not None: self.map_opt.bw = bw if best_n is not None: self.best_n = best_n - cdef cmmappy.mm_idx_reader_t *r; + cdef cmappy.mm_idx_reader_t *r; if fn_idx_out is None: - r = cmmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL) + r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL) else: - r = cmmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out) + r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out) if r is not NULL: - self._idx = cmmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part - cmmappy.mm_idx_reader_close(r) - cmmappy.mm_mapopt_update(&self.map_opt, self._idx) + self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part + cmappy.mm_idx_reader_close(r) + cmappy.mm_mapopt_update(&self.map_opt, self._idx) def __dealloc__(self): if self._idx is not NULL: - cmmappy.mm_idx_destroy(self._idx) + cmappy.mm_idx_destroy(self._idx) def __bool__(self): return (self._idx != NULL) def map(self, seq, buf=None): - cdef cmmappy.mm_reg1_t *regs - cdef cmmappy.mm_hitpy_t h + cdef cmappy.mm_reg1_t *regs + cdef cmappy.mm_hitpy_t h cdef ThreadBuffer b cdef int n_regs if self._idx is NULL: return None if buf is None: b = ThreadBuffer() else: b = buf - regs = cmmappy.mm_map(self._idx, len(seq), str.encode(seq), &n_regs, b._b, &self.map_opt, NULL) + regs = cmappy.mm_map(self._idx, len(seq), str.encode(seq), &n_regs, b._b, &self.map_opt, NULL) for i in range(n_regs): - cmmappy.mm_reg2hitpy(self._idx, ®s[i], &h) + cmappy.mm_reg2hitpy(self._idx, ®s[i], &h) cigar = [] for k in range(h.n_cigar32): c = h.cigar32[k] cigar.append([c>>4, c&0xf]) yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.blen, h.NM, h.trans_strand) - cmmappy.mm_free_reg1(®s[i]) + cmappy.mm_free_reg1(®s[i]) free(regs) diff --git a/python/mm2-lite.py b/python/mm2-lite.py index b294e32..85935a4 100755 --- a/python/mm2-lite.py +++ b/python/mm2-lite.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys, getopt -import mmappy as mm +import mappy as mp def readfq(fp): # multi-line fasta/fastq parser last = None @@ -39,7 +39,7 @@ def main(argv): if len(args) < 2: print("Usage: mm2-lite.py | ") sys.exit(1) - a = mm.Aligner(args[0]) # load/build index + a = mp.Aligner(args[0]) # load/build index if not a: print("ERROR: failed to load/build index") return diff --git a/setup.py b/setup.py index 5b39a0e..4078613 100644 --- a/setup.py +++ b/setup.py @@ -9,9 +9,9 @@ cmdclass = {} try: from Cython.Build import build_ext except ImportError: # without Cython - module_src = 'python/mmappy.c' + module_src = 'python/mappy.c' else: # with Cython - module_src = 'python/mmappy.pyx' + module_src = 'python/mappy.pyx' cmdclass['build_ext'] = build_ext import sys @@ -22,7 +22,7 @@ def readme(): return f.read() setup( - name = 'mmappy', + name = 'mappy', version = '2.2rc2', url = 'https://github.com/lh3/minimap2', description = 'Minimap2 python binding', @@ -31,13 +31,13 @@ setup( author_email = 'lh3@me.com', license = 'MIT', keywords = ['bioinformatics', 'sequence-alignment'], - ext_modules = [Extension('mmappy', + ext_modules = [Extension('mappy', sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c'], depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h', 'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h', - 'python/cmmappy.h', 'python/cmmappy.pxd'], + 'python/cmappy.h', 'python/cmappy.pxd'], extra_compile_args = ['-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4 include_dirs = ['.'], libraries = ['z', 'm', 'pthread'])],