first python version
This commit is contained in:
parent
5966e5d6e4
commit
446bde214d
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef CMINIMAP2_H
|
||||
#define CMINIMAP2_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "minimap.h"
|
||||
|
||||
typedef struct {
|
||||
const char *ctg;
|
||||
int32_t ctg_start, ctg_end;
|
||||
int32_t qry_start, qry_end;
|
||||
int32_t n_cigar32;
|
||||
uint8_t strand, mapq, is_primary;
|
||||
uint32_t *cigar32;
|
||||
} mm_hitpy_t;
|
||||
|
||||
static inline mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs)
|
||||
{
|
||||
mm_hitpy_t *h;
|
||||
int i;
|
||||
h = (mm_hitpy_t*)calloc(n_regs, sizeof(mm_hitpy_t));
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
h[i].ctg = mi->seq[r->rid].name;
|
||||
h[i].ctg_start = r->rs, h[i].ctg_end = r->re;
|
||||
h[i].qry_start = r->qs, h[i].qry_end = r->qe;
|
||||
h[i].strand = r->rev;
|
||||
h[i].mapq = r->mapq;
|
||||
h[i].is_primary = (r->id == r->parent);
|
||||
h[i].n_cigar32 = r->p->n_cigar;
|
||||
h[i].cigar32 = r->p->cigar;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static inline void mm_free_reg1(mm_reg1_t *r)
|
||||
{
|
||||
free(r->p);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
from libc.stdint cimport uint8_t, int32_t, int64_t, uint32_t, uint64_t
|
||||
|
||||
cdef extern from "minimap.h":
|
||||
#
|
||||
# Options
|
||||
#
|
||||
ctypedef struct mm_idxopt_t:
|
||||
short k, w, is_hpc, bucket_bits
|
||||
int mini_batch_size
|
||||
uint64_t batch_size
|
||||
|
||||
ctypedef struct mm_mapopt_t:
|
||||
int sdust_thres
|
||||
int flag
|
||||
int bw
|
||||
int max_gap, max_gap_ref
|
||||
int max_chain_skip
|
||||
int min_cnt
|
||||
int min_chain_score
|
||||
float mask_level
|
||||
float pri_ratio
|
||||
int best_n
|
||||
int max_join_long, max_join_short
|
||||
int min_join_flank_sc
|
||||
int a, b, q, e, q2, e2
|
||||
int noncan
|
||||
int zdrop
|
||||
int min_dp_max
|
||||
int min_ksw_len
|
||||
float mid_occ_frac
|
||||
int32_t mid_occ
|
||||
int mini_batch_size
|
||||
|
||||
int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
||||
|
||||
#
|
||||
# Indexing
|
||||
#
|
||||
ctypedef struct mm_idx_seq_t:
|
||||
char *name
|
||||
uint64_t offset
|
||||
uint32_t len
|
||||
|
||||
ctypedef struct mm_idx_bucket_t:
|
||||
pass
|
||||
|
||||
ctypedef struct mm_idx_t:
|
||||
int32_t b, w, k, is_hpc
|
||||
uint32_t n_seq
|
||||
mm_idx_seq_t *seq
|
||||
uint32_t *S
|
||||
mm_idx_bucket_t *B
|
||||
void *km
|
||||
|
||||
ctypedef struct mm_idx_reader_t:
|
||||
pass
|
||||
|
||||
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out)
|
||||
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
|
||||
void mm_idx_reader_close(mm_idx_reader_t *r)
|
||||
void mm_idx_destroy(mm_idx_t *mi)
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||
|
||||
#
|
||||
# Mapping (key struct defined in cminimap2.h below)
|
||||
#
|
||||
ctypedef struct mm_reg1_t:
|
||||
pass
|
||||
|
||||
ctypedef struct mm_tbuf_t:
|
||||
pass
|
||||
|
||||
mm_tbuf_t *mm_tbuf_init()
|
||||
void mm_tbuf_destroy(mm_tbuf_t *b)
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
|
||||
|
||||
cdef extern from "cminimap2.h":
|
||||
ctypedef struct mm_hitpy_t:
|
||||
const char *ctg
|
||||
int32_t ctg_start, ctg_end
|
||||
int32_t qry_start, qry_end
|
||||
int32_t n_cigar32
|
||||
uint8_t strand, mapq, is_primary
|
||||
uint32_t *cigar32
|
||||
|
||||
mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs)
|
||||
void mm_free_reg1(mm_reg1_t *r)
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
from libc.stdint cimport uint8_t, int8_t
|
||||
from libc.stdlib cimport free
|
||||
cimport cminimap2
|
||||
|
||||
cdef class Alignment:
|
||||
cdef const char *_ctg
|
||||
cdef int _r_st, _r_en
|
||||
cdef int _q_st, _q_en
|
||||
cdef int8_t _is_rev, _is_primary
|
||||
cdef uint8_t _mapq
|
||||
cdef _cigar
|
||||
|
||||
def __cinit__(self, ctg, cs, ce, strand, qs, qe, mapq, cigar, is_primary):
|
||||
self._ctg, self._r_st, self._r_en = ctg, cs, ce
|
||||
self._is_rev, self._q_st, self._q_en = strand, qs, qe
|
||||
self._mapq = mapq
|
||||
self._cigar = cigar
|
||||
self._is_primary = is_primary
|
||||
|
||||
@property
|
||||
def ctg(self):
|
||||
return self._ctg
|
||||
|
||||
@property
|
||||
def r_st(self):
|
||||
return self._r_st
|
||||
|
||||
@property
|
||||
def r_en(self):
|
||||
return self._r_en
|
||||
|
||||
@property
|
||||
def is_rev(self):
|
||||
return self._is_rev
|
||||
|
||||
@property
|
||||
def is_primary(self):
|
||||
return self._is_primary
|
||||
|
||||
@property
|
||||
def q_st(self):
|
||||
return self._q_st
|
||||
|
||||
@property
|
||||
def q_en(self):
|
||||
return self._q_en
|
||||
|
||||
@property
|
||||
def mapq(self):
|
||||
return self._mapq
|
||||
|
||||
@property
|
||||
def cigar(self):
|
||||
return self._cigar
|
||||
|
||||
cdef class ThreadBuffer:
|
||||
cdef cminimap2.mm_tbuf_t *_b
|
||||
|
||||
def __cinit__(self):
|
||||
self._b = cminimap2.mm_tbuf_init()
|
||||
|
||||
def __dealloc__(self):
|
||||
cminimap2.mm_tbuf_destroy(self._b)
|
||||
|
||||
cdef class Aligner:
|
||||
cdef cminimap2.mm_idx_t *_idx
|
||||
cdef public cminimap2.mm_idxopt_t idx_opt
|
||||
cdef public cminimap2.mm_mapopt_t map_opt
|
||||
|
||||
def __cinit__(self, fn, preset=None):
|
||||
self.config(preset)
|
||||
cdef cminimap2.mm_idx_reader_t *r;
|
||||
r = cminimap2.mm_idx_reader_open(fn, &self.idx_opt, NULL)
|
||||
self._idx = cminimap2.mm_idx_reader_read(r, 3) # NB: ONLY read the first part
|
||||
cminimap2.mm_idx_reader_close(r)
|
||||
cminimap2.mm_mapopt_update(&self.map_opt, self._idx)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._idx is not NULL:
|
||||
cminimap2.mm_idx_destroy(self._idx)
|
||||
|
||||
def config(self, preset=None):
|
||||
cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt)
|
||||
if preset is not None:
|
||||
cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt)
|
||||
self.map_opt.flag |= 4 # always perform alignment
|
||||
|
||||
def map(self, seq, buf=None):
|
||||
cdef cminimap2.mm_reg1_t *regs
|
||||
cdef ThreadBuffer b
|
||||
cdef int n_regs
|
||||
|
||||
if buf is None: b = ThreadBuffer()
|
||||
else: b = buf
|
||||
regs = cminimap2.mm_map(self._idx, len(seq), seq, &n_regs, b._b, &self.map_opt, NULL)
|
||||
hits = cminimap2.mm_reg2hitpy(self._idx, n_regs, regs)
|
||||
|
||||
arr = []
|
||||
for i in range(n_regs):
|
||||
h = hits[i]
|
||||
cigar = []
|
||||
for k in range(h.n_cigar32):
|
||||
c = h.cigar32[k]
|
||||
cigar.append([c>>4, c&0xf])
|
||||
arr.append(Alignment(h.ctg, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary))
|
||||
cminimap2.mm_free_reg1(®s[i])
|
||||
free(regs)
|
||||
free(hits)
|
||||
|
||||
return arr
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
from setuptools import setup, Extension
|
||||
from Cython.Build import cythonize
|
||||
import sys
|
||||
|
||||
sys.path.append('python')
|
||||
|
||||
setup(
|
||||
name = 'minimap2',
|
||||
version = '2.1.1',
|
||||
url = 'https://github.com/lh3/minimap2',
|
||||
description = 'Minimap2 python binding',
|
||||
author = 'Heng Li',
|
||||
author_email = 'lh3@me.com',
|
||||
keywords = ['bioinformatics', 'sequence-alignment'],
|
||||
ext_modules = cythonize([Extension('minimap2',
|
||||
['python/minimap2.pyx', 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'kalloc.c',
|
||||
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 'kthread.c', 'map.c',
|
||||
'misc.c', 'sdust.c', 'sketch.c'],
|
||||
extra_compile_args = ['-msse4'],
|
||||
include_dirs = ['.'])])
|
||||
)
|
||||
Loading…
Reference in New Issue