improvement to the python binding

This commit is contained in:
Heng Li 2017-09-16 11:14:01 -04:00
parent 7e34bea7ab
commit b22703a354
3 changed files with 92 additions and 68 deletions

View File

@ -8,28 +8,27 @@ typedef struct {
const char *ctg; const char *ctg;
int32_t ctg_start, ctg_end; int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end; int32_t qry_start, qry_end;
int32_t blen, NM, ctg_len;
uint8_t mapq, is_primary;
int8_t strand, trans_strand;
int32_t n_cigar32; int32_t n_cigar32;
uint8_t strand, mapq, is_primary;
uint32_t *cigar32; uint32_t *cigar32;
} mm_hitpy_t; } mm_hitpy_t;
static inline mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs) static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
{ {
mm_hitpy_t *h; h->ctg = mi->seq[r->rid].name;
int i; h->ctg_len = mi->seq[r->rid].len;
h = (mm_hitpy_t*)calloc(n_regs, sizeof(mm_hitpy_t)); h->ctg_start = r->rs, h->ctg_end = r->re;
for (i = 0; i < n_regs; ++i) { h->qry_start = r->qs, h->qry_end = r->qe;
mm_reg1_t *r = &regs[i]; h->strand = r->rev? -1 : 1;
h[i].ctg = mi->seq[r->rid].name; h->mapq = r->mapq;
h[i].ctg_start = r->rs, h[i].ctg_end = r->re; h->blen = r->p->blen;
h[i].qry_start = r->qs, h[i].qry_end = r->qe; h->NM = r->p->n_diff;
h[i].strand = r->rev; h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
h[i].mapq = r->mapq; h->is_primary = (r->id == r->parent);
h[i].is_primary = (r->id == r->parent); h->n_cigar32 = r->p->n_cigar;
h[i].n_cigar32 = r->p->n_cigar; h->cigar32 = r->p->cigar;
h[i].cigar32 = r->p->cigar;
}
return h;
} }
static inline void mm_free_reg1(mm_reg1_t *r) static inline void mm_free_reg1(mm_reg1_t *r)

View File

@ -79,12 +79,13 @@ cdef extern from "minimap.h":
# #
cdef extern from "cminimap2.h": cdef extern from "cminimap2.h":
ctypedef struct mm_hitpy_t: ctypedef struct mm_hitpy_t:
const char *ctg const char *ctg;
int32_t ctg_start, ctg_end int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end int32_t qry_start, qry_end;
int32_t n_cigar32 int32_t blen, NM, ctg_len;
uint8_t strand, mapq, is_primary uint8_t strand, mapq, is_primary, trans_strand;
uint32_t *cigar32 int32_t n_cigar32;
uint32_t *cigar32;
mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs) void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
void mm_free_reg1(mm_reg1_t *r) void mm_free_reg1(mm_reg1_t *r)

View File

@ -3,55 +3,70 @@ from libc.stdlib cimport free
cimport cminimap2 cimport cminimap2
cdef class Alignment: cdef class Alignment:
cdef const char *_ctg cdef int _ctg_len, _r_st, _r_en
cdef int _r_st, _r_en
cdef int _q_st, _q_en cdef int _q_st, _q_en
cdef int8_t _is_rev, _is_primary cdef int _NM, _blen
cdef uint8_t _mapq cdef int8_t _strand, _trans_strand
cdef _cigar cdef uint8_t _mapq, _is_primary
cdef _ctg, _cigar # these are python objects
def __cinit__(self, ctg, cs, ce, strand, qs, qe, mapq, cigar, is_primary): def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, blen, NM, trans_strand):
self._ctg, self._r_st, self._r_en = ctg, cs, ce self._ctg, self._ctg_len, self._r_st, self._r_en = ctg, cl, cs, ce
self._is_rev, self._q_st, self._q_en = strand, qs, qe self._strand, self._q_st, self._q_en = strand, qs, qe
self._NM, self._blen = NM, blen
self._mapq = mapq self._mapq = mapq
self._cigar = cigar self._cigar = cigar
self._is_primary = is_primary self._is_primary = is_primary
self._trans_strand = trans_strand
@property @property
def ctg(self): def ctg(self): return self._ctg
return self._ctg
@property @property
def r_st(self): def ctg_len(self): return self._ctg_len
return self._r_st
@property @property
def r_en(self): def r_st(self): return self._r_st
return self._r_en
@property @property
def is_rev(self): def r_en(self): return self._r_en
return (self._is_rev != 0)
@property @property
def is_primary(self): def strand(self): return self.strand
return (self._is_primary != 0)
@property @property
def q_st(self): def trans_strand(self): return self.trans_strand
return self._q_st
@property @property
def q_en(self): def NM(self): return self._NM
return self._q_en
@property @property
def mapq(self): def is_primary(self): return (self._is_primary != 0)
return self._mapq
@property @property
def cigar(self): def q_st(self): return self._q_st
return self._cigar
@property
def q_en(self): return self._q_en
@property
def mapq(self): return self._mapq
@property
def cigar(self): return self._cigar
@property
def cigar_str(self):
return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar))
def __str__(self):
if self._strand > 0: strand = '+'
elif self._strand < 0: strand = '-'
else: strand = '?'
if self._is_primary != 0: tp = 'tp:A:P'
else: tp = 'tp:A:S'
return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
str(self._blen - self._NM), str(self._blen), str(self._mapq), "NM:i:" + str(self._NM), tp, "cg:Z:" + self.cigar_str])
cdef class ThreadBuffer: cdef class ThreadBuffer:
cdef cminimap2.mm_tbuf_t *_b cdef cminimap2.mm_tbuf_t *_b
@ -67,46 +82,55 @@ cdef class Aligner:
cdef public cminimap2.mm_idxopt_t idx_opt cdef public cminimap2.mm_idxopt_t idx_opt
cdef public cminimap2.mm_mapopt_t map_opt cdef public cminimap2.mm_mapopt_t map_opt
def __cinit__(self, fn, preset=None): def __cinit__(self, fn_idx_in, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None):
self._config(preset)
cdef cminimap2.mm_idx_reader_t *r; cdef cminimap2.mm_idx_reader_t *r;
r = cminimap2.mm_idx_reader_open(fn, &self.idx_opt, NULL)
self._idx = cminimap2.mm_idx_reader_read(r, 3) # NB: ONLY read the first part self._config(preset)
cminimap2.mm_idx_reader_close(r) self.idx_opt.batch_size = 0x7fffffffffffffffL # always build a uni-part index
cminimap2.mm_mapopt_update(&self.map_opt, self._idx) if k is not None: self.idx_opt.k = k
if w is not None: self.idx_opt.w = w
if min_cnt is not None: self.map_opt.min_cnt = min_cnt
if min_chain_score is not None: self.map_opt.min_chain_score = min_chain_score
if min_dp_score is not None: self.map_opt.min_dp_max = min_dp_score
if bw is not None: self.map_opt.bw = bw
if best_n is not None: self.best_n = best_n
if fn_idx_out is None:
r = cminimap2.mm_idx_reader_open(fn_idx_in, &self.idx_opt, NULL)
else:
r = cminimap2.mm_idx_reader_open(fn_idx_in, &self.idx_opt, fn_idx_out)
if r is not NULL:
self._idx = cminimap2.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part
cminimap2.mm_idx_reader_close(r)
cminimap2.mm_mapopt_update(&self.map_opt, self._idx)
def __dealloc__(self): def __dealloc__(self):
if self._idx is not NULL: if self._idx is not NULL:
cminimap2.mm_idx_destroy(self._idx) cminimap2.mm_idx_destroy(self._idx)
def _config(self, preset=None): cdef _config(self, preset=None):
cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options
if preset is not None: if preset is not None:
cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt) cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt) # preset
self.map_opt.flag |= 4 # always perform alignment self.map_opt.flag |= 4 # always perform alignment
def map(self, seq, buf=None): def map(self, seq, buf=None):
cdef cminimap2.mm_reg1_t *regs cdef cminimap2.mm_reg1_t *regs
cdef cminimap2.mm_hitpy_t h
cdef ThreadBuffer b cdef ThreadBuffer b
cdef int n_regs cdef int n_regs
if self._idx is NULL: if self._idx is NULL: return None
return None
if buf is None: b = ThreadBuffer() if buf is None: b = ThreadBuffer()
else: b = buf else: b = buf
regs = cminimap2.mm_map(self._idx, len(seq), seq, &n_regs, b._b, &self.map_opt, NULL) regs = cminimap2.mm_map(self._idx, len(seq), seq, &n_regs, b._b, &self.map_opt, NULL)
hits = cminimap2.mm_reg2hitpy(self._idx, n_regs, regs)
arr = []
for i in range(n_regs): for i in range(n_regs):
h = hits[i] cminimap2.mm_reg2hitpy(self._idx, &regs[i], &h)
cigar = [] cigar = []
for k in range(h.n_cigar32): for k in range(h.n_cigar32):
c = h.cigar32[k] c = h.cigar32[k]
cigar.append([c>>4, c&0xf]) cigar.append([c>>4, c&0xf])
arr.append(Alignment(h.ctg, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary)) yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.blen, h.NM, h.trans_strand)
cminimap2.mm_free_reg1(&regs[i]) cminimap2.mm_free_reg1(&regs[i])
free(regs) free(regs)
free(hits)
return arr