first python version

This commit is contained in:
Heng Li 2017-09-16 08:44:47 -04:00
parent 5966e5d6e4
commit 446bde214d
4 changed files with 258 additions and 0 deletions

40
python/cminimap2.h 100644
View File

@ -0,0 +1,40 @@
#ifndef CMINIMAP2_H
#define CMINIMAP2_H
#include <stdlib.h>
#include "minimap.h"
typedef struct {
const char *ctg;
int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end;
int32_t n_cigar32;
uint8_t strand, mapq, is_primary;
uint32_t *cigar32;
} mm_hitpy_t;
static inline mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs)
{
mm_hitpy_t *h;
int i;
h = (mm_hitpy_t*)calloc(n_regs, sizeof(mm_hitpy_t));
for (i = 0; i < n_regs; ++i) {
mm_reg1_t *r = &regs[i];
h[i].ctg = mi->seq[r->rid].name;
h[i].ctg_start = r->rs, h[i].ctg_end = r->re;
h[i].qry_start = r->qs, h[i].qry_end = r->qe;
h[i].strand = r->rev;
h[i].mapq = r->mapq;
h[i].is_primary = (r->id == r->parent);
h[i].n_cigar32 = r->p->n_cigar;
h[i].cigar32 = r->p->cigar;
}
return h;
}
static inline void mm_free_reg1(mm_reg1_t *r)
{
free(r->p);
}
#endif

View File

@ -0,0 +1,87 @@
from libc.stdint cimport uint8_t, int32_t, int64_t, uint32_t, uint64_t
cdef extern from "minimap.h":
#
# Options
#
ctypedef struct mm_idxopt_t:
short k, w, is_hpc, bucket_bits
int mini_batch_size
uint64_t batch_size
ctypedef struct mm_mapopt_t:
int sdust_thres
int flag
int bw
int max_gap, max_gap_ref
int max_chain_skip
int min_cnt
int min_chain_score
float mask_level
float pri_ratio
int best_n
int max_join_long, max_join_short
int min_join_flank_sc
int a, b, q, e, q2, e2
int noncan
int zdrop
int min_dp_max
int min_ksw_len
float mid_occ_frac
int32_t mid_occ
int mini_batch_size
int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
#
# Indexing
#
ctypedef struct mm_idx_seq_t:
char *name
uint64_t offset
uint32_t len
ctypedef struct mm_idx_bucket_t:
pass
ctypedef struct mm_idx_t:
int32_t b, w, k, is_hpc
uint32_t n_seq
mm_idx_seq_t *seq
uint32_t *S
mm_idx_bucket_t *B
void *km
ctypedef struct mm_idx_reader_t:
pass
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out)
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
void mm_idx_reader_close(mm_idx_reader_t *r)
void mm_idx_destroy(mm_idx_t *mi)
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
#
# Mapping (key struct defined in cminimap2.h below)
#
ctypedef struct mm_reg1_t:
pass
ctypedef struct mm_tbuf_t:
pass
mm_tbuf_t *mm_tbuf_init()
void mm_tbuf_destroy(mm_tbuf_t *b)
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
cdef extern from "cminimap2.h":
ctypedef struct mm_hitpy_t:
const char *ctg
int32_t ctg_start, ctg_end
int32_t qry_start, qry_end
int32_t n_cigar32
uint8_t strand, mapq, is_primary
uint32_t *cigar32
mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs)
void mm_free_reg1(mm_reg1_t *r)

110
python/minimap2.pyx 100644
View File

@ -0,0 +1,110 @@
from libc.stdint cimport uint8_t, int8_t
from libc.stdlib cimport free
cimport cminimap2
cdef class Alignment:
cdef const char *_ctg
cdef int _r_st, _r_en
cdef int _q_st, _q_en
cdef int8_t _is_rev, _is_primary
cdef uint8_t _mapq
cdef _cigar
def __cinit__(self, ctg, cs, ce, strand, qs, qe, mapq, cigar, is_primary):
self._ctg, self._r_st, self._r_en = ctg, cs, ce
self._is_rev, self._q_st, self._q_en = strand, qs, qe
self._mapq = mapq
self._cigar = cigar
self._is_primary = is_primary
@property
def ctg(self):
return self._ctg
@property
def r_st(self):
return self._r_st
@property
def r_en(self):
return self._r_en
@property
def is_rev(self):
return self._is_rev
@property
def is_primary(self):
return self._is_primary
@property
def q_st(self):
return self._q_st
@property
def q_en(self):
return self._q_en
@property
def mapq(self):
return self._mapq
@property
def cigar(self):
return self._cigar
cdef class ThreadBuffer:
cdef cminimap2.mm_tbuf_t *_b
def __cinit__(self):
self._b = cminimap2.mm_tbuf_init()
def __dealloc__(self):
cminimap2.mm_tbuf_destroy(self._b)
cdef class Aligner:
cdef cminimap2.mm_idx_t *_idx
cdef public cminimap2.mm_idxopt_t idx_opt
cdef public cminimap2.mm_mapopt_t map_opt
def __cinit__(self, fn, preset=None):
self.config(preset)
cdef cminimap2.mm_idx_reader_t *r;
r = cminimap2.mm_idx_reader_open(fn, &self.idx_opt, NULL)
self._idx = cminimap2.mm_idx_reader_read(r, 3) # NB: ONLY read the first part
cminimap2.mm_idx_reader_close(r)
cminimap2.mm_mapopt_update(&self.map_opt, self._idx)
def __dealloc__(self):
if self._idx is not NULL:
cminimap2.mm_idx_destroy(self._idx)
def config(self, preset=None):
cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt)
if preset is not None:
cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt)
self.map_opt.flag |= 4 # always perform alignment
def map(self, seq, buf=None):
cdef cminimap2.mm_reg1_t *regs
cdef ThreadBuffer b
cdef int n_regs
if buf is None: b = ThreadBuffer()
else: b = buf
regs = cminimap2.mm_map(self._idx, len(seq), seq, &n_regs, b._b, &self.map_opt, NULL)
hits = cminimap2.mm_reg2hitpy(self._idx, n_regs, regs)
arr = []
for i in range(n_regs):
h = hits[i]
cigar = []
for k in range(h.n_cigar32):
c = h.cigar32[k]
cigar.append([c>>4, c&0xf])
arr.append(Alignment(h.ctg, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary))
cminimap2.mm_free_reg1(&regs[i])
free(regs)
free(hits)
return arr

21
setup.py 100644
View File

@ -0,0 +1,21 @@
from setuptools import setup, Extension
from Cython.Build import cythonize
import sys
sys.path.append('python')
setup(
name = 'minimap2',
version = '2.1.1',
url = 'https://github.com/lh3/minimap2',
description = 'Minimap2 python binding',
author = 'Heng Li',
author_email = 'lh3@me.com',
keywords = ['bioinformatics', 'sequence-alignment'],
ext_modules = cythonize([Extension('minimap2',
['python/minimap2.pyx', 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'kalloc.c',
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 'kthread.c', 'map.c',
'misc.c', 'sdust.c', 'sketch.c'],
extra_compile_args = ['-msse4'],
include_dirs = ['.'])])
)