diff --git a/python/cminimap2.h b/python/cminimap2.h index b7e90b5..2eb9faa 100644 --- a/python/cminimap2.h +++ b/python/cminimap2.h @@ -8,28 +8,27 @@ typedef struct { const char *ctg; int32_t ctg_start, ctg_end; int32_t qry_start, qry_end; + int32_t blen, NM, ctg_len; + uint8_t mapq, is_primary; + int8_t strand, trans_strand; int32_t n_cigar32; - uint8_t strand, mapq, is_primary; uint32_t *cigar32; } mm_hitpy_t; -static inline mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs) +static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) { - mm_hitpy_t *h; - int i; - h = (mm_hitpy_t*)calloc(n_regs, sizeof(mm_hitpy_t)); - for (i = 0; i < n_regs; ++i) { - mm_reg1_t *r = ®s[i]; - h[i].ctg = mi->seq[r->rid].name; - h[i].ctg_start = r->rs, h[i].ctg_end = r->re; - h[i].qry_start = r->qs, h[i].qry_end = r->qe; - h[i].strand = r->rev; - h[i].mapq = r->mapq; - h[i].is_primary = (r->id == r->parent); - h[i].n_cigar32 = r->p->n_cigar; - h[i].cigar32 = r->p->cigar; - } - return h; + h->ctg = mi->seq[r->rid].name; + h->ctg_len = mi->seq[r->rid].len; + h->ctg_start = r->rs, h->ctg_end = r->re; + h->qry_start = r->qs, h->qry_end = r->qe; + h->strand = r->rev? -1 : 1; + h->mapq = r->mapq; + h->blen = r->p->blen; + h->NM = r->p->n_diff; + h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0; + h->is_primary = (r->id == r->parent); + h->n_cigar32 = r->p->n_cigar; + h->cigar32 = r->p->cigar; } static inline void mm_free_reg1(mm_reg1_t *r) diff --git a/python/cminimap2.pxd b/python/cminimap2.pxd index b4511fe..b19c347 100644 --- a/python/cminimap2.pxd +++ b/python/cminimap2.pxd @@ -79,12 +79,13 @@ cdef extern from "minimap.h": # cdef extern from "cminimap2.h": ctypedef struct mm_hitpy_t: - const char *ctg - int32_t ctg_start, ctg_end - int32_t qry_start, qry_end - int32_t n_cigar32 - uint8_t strand, mapq, is_primary - uint32_t *cigar32 + const char *ctg; + int32_t ctg_start, ctg_end; + int32_t qry_start, qry_end; + int32_t blen, NM, ctg_len; + uint8_t strand, mapq, is_primary, trans_strand; + int32_t n_cigar32; + uint32_t *cigar32; - mm_hitpy_t *mm_reg2hitpy(const mm_idx_t *mi, int n_regs, mm_reg1_t *regs) + void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) void mm_free_reg1(mm_reg1_t *r) diff --git a/python/minimap2.pyx b/python/minimap2.pyx index ca347be..6aa1a81 100644 --- a/python/minimap2.pyx +++ b/python/minimap2.pyx @@ -3,55 +3,70 @@ from libc.stdlib cimport free cimport cminimap2 cdef class Alignment: - cdef const char *_ctg - cdef int _r_st, _r_en + cdef int _ctg_len, _r_st, _r_en cdef int _q_st, _q_en - cdef int8_t _is_rev, _is_primary - cdef uint8_t _mapq - cdef _cigar + cdef int _NM, _blen + cdef int8_t _strand, _trans_strand + cdef uint8_t _mapq, _is_primary + cdef _ctg, _cigar # these are python objects - def __cinit__(self, ctg, cs, ce, strand, qs, qe, mapq, cigar, is_primary): - self._ctg, self._r_st, self._r_en = ctg, cs, ce - self._is_rev, self._q_st, self._q_en = strand, qs, qe + def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, blen, NM, trans_strand): + self._ctg, self._ctg_len, self._r_st, self._r_en = ctg, cl, cs, ce + self._strand, self._q_st, self._q_en = strand, qs, qe + self._NM, self._blen = NM, blen self._mapq = mapq self._cigar = cigar self._is_primary = is_primary + self._trans_strand = trans_strand @property - def ctg(self): - return self._ctg + def ctg(self): return self._ctg @property - def r_st(self): - return self._r_st + def ctg_len(self): return self._ctg_len @property - def r_en(self): - return self._r_en + def r_st(self): return self._r_st @property - def is_rev(self): - return (self._is_rev != 0) + def r_en(self): return self._r_en @property - def is_primary(self): - return (self._is_primary != 0) + def strand(self): return self.strand @property - def q_st(self): - return self._q_st + def trans_strand(self): return self.trans_strand @property - def q_en(self): - return self._q_en + def NM(self): return self._NM @property - def mapq(self): - return self._mapq + def is_primary(self): return (self._is_primary != 0) @property - def cigar(self): - return self._cigar + def q_st(self): return self._q_st + + @property + def q_en(self): return self._q_en + + @property + def mapq(self): return self._mapq + + @property + def cigar(self): return self._cigar + + @property + def cigar_str(self): + return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar)) + + def __str__(self): + if self._strand > 0: strand = '+' + elif self._strand < 0: strand = '-' + else: strand = '?' + if self._is_primary != 0: tp = 'tp:A:P' + else: tp = 'tp:A:S' + return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en), + str(self._blen - self._NM), str(self._blen), str(self._mapq), "NM:i:" + str(self._NM), tp, "cg:Z:" + self.cigar_str]) cdef class ThreadBuffer: cdef cminimap2.mm_tbuf_t *_b @@ -67,46 +82,55 @@ cdef class Aligner: cdef public cminimap2.mm_idxopt_t idx_opt cdef public cminimap2.mm_mapopt_t map_opt - def __cinit__(self, fn, preset=None): - self._config(preset) + def __cinit__(self, fn_idx_in, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None): cdef cminimap2.mm_idx_reader_t *r; - r = cminimap2.mm_idx_reader_open(fn, &self.idx_opt, NULL) - self._idx = cminimap2.mm_idx_reader_read(r, 3) # NB: ONLY read the first part - cminimap2.mm_idx_reader_close(r) - cminimap2.mm_mapopt_update(&self.map_opt, self._idx) + + self._config(preset) + self.idx_opt.batch_size = 0x7fffffffffffffffL # always build a uni-part index + if k is not None: self.idx_opt.k = k + if w is not None: self.idx_opt.w = w + if min_cnt is not None: self.map_opt.min_cnt = min_cnt + if min_chain_score is not None: self.map_opt.min_chain_score = min_chain_score + if min_dp_score is not None: self.map_opt.min_dp_max = min_dp_score + if bw is not None: self.map_opt.bw = bw + if best_n is not None: self.best_n = best_n + + if fn_idx_out is None: + r = cminimap2.mm_idx_reader_open(fn_idx_in, &self.idx_opt, NULL) + else: + r = cminimap2.mm_idx_reader_open(fn_idx_in, &self.idx_opt, fn_idx_out) + if r is not NULL: + self._idx = cminimap2.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part + cminimap2.mm_idx_reader_close(r) + cminimap2.mm_mapopt_update(&self.map_opt, self._idx) def __dealloc__(self): if self._idx is not NULL: cminimap2.mm_idx_destroy(self._idx) - def _config(self, preset=None): - cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) + cdef _config(self, preset=None): + cminimap2.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options if preset is not None: - cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt) + cminimap2.mm_set_opt(preset, &self.idx_opt, &self.map_opt) # preset self.map_opt.flag |= 4 # always perform alignment def map(self, seq, buf=None): cdef cminimap2.mm_reg1_t *regs + cdef cminimap2.mm_hitpy_t h cdef ThreadBuffer b cdef int n_regs - if self._idx is NULL: - return None + if self._idx is NULL: return None if buf is None: b = ThreadBuffer() else: b = buf regs = cminimap2.mm_map(self._idx, len(seq), seq, &n_regs, b._b, &self.map_opt, NULL) - hits = cminimap2.mm_reg2hitpy(self._idx, n_regs, regs) - arr = [] for i in range(n_regs): - h = hits[i] + cminimap2.mm_reg2hitpy(self._idx, ®s[i], &h) cigar = [] for k in range(h.n_cigar32): c = h.cigar32[k] cigar.append([c>>4, c&0xf]) - arr.append(Alignment(h.ctg, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary)) + yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.blen, h.NM, h.trans_strand) cminimap2.mm_free_reg1(®s[i]) free(regs) - free(hits) - - return arr