parent
12a5a5fa3c
commit
46d6349af4
4
hit.c
4
hit.c
|
|
@ -390,8 +390,10 @@ mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int
|
|||
for (s = 0; s < n_segs; ++s) {
|
||||
regs[s] = mm_gen_regs(km, hash, qlens[s], seg[s].n_u, seg[s].u, seg[s].a);
|
||||
n_regs[s] = seg[s].n_u;
|
||||
for (i = 0; i < n_regs[s]; ++i)
|
||||
for (i = 0; i < n_regs[s]; ++i) {
|
||||
regs[s][i].seg_split = 1;
|
||||
regs[s][i].seg_id = s;
|
||||
}
|
||||
}
|
||||
return seg;
|
||||
}
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.7-r669-dirty"
|
||||
#define MM_VERSION "2.7-r670-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
20
map.c
20
map.c
|
|
@ -246,7 +246,7 @@ static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_i
|
|||
}
|
||||
}
|
||||
|
||||
static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int qlen, const char *seq, const char *qual, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int qlen, const char *seq, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
if (!(opt->flag & MM_F_CIGAR)) return regs;
|
||||
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
|
||||
|
|
@ -258,7 +258,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
|||
return regs;
|
||||
}
|
||||
|
||||
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, const char **quals, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
{
|
||||
int i, j, rep_len, qlen_sum, n_regs0, n_mini_pos;
|
||||
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
|
||||
|
|
@ -339,7 +339,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
if (!is_sr) mm_est_err(mi, qlen_sum, n_regs0, regs0, a, n_mini_pos, mini_pos);
|
||||
|
||||
if (n_segs == 1) { // uni-segment
|
||||
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a);
|
||||
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
||||
mm_set_mapq(n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr);
|
||||
n_regs[0] = n_regs0, regs[0] = regs0;
|
||||
} else { // multi-segment
|
||||
|
|
@ -348,7 +348,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
free(regs0);
|
||||
for (i = 0; i < n_segs; ++i) {
|
||||
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b); // update mm_reg1_t::parent
|
||||
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], quals? quals[i] : 0, &n_regs[i], regs[i], seg[i].a);
|
||||
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], &n_regs[i], regs[i], seg[i].a);
|
||||
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
|
||||
}
|
||||
mm_seg_free(b->km, n_segs, seg);
|
||||
|
|
@ -376,7 +376,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
{
|
||||
mm_reg1_t *regs;
|
||||
mm_map_frag(mi, 1, &qlen, &seq, 0, n_regs, ®s, b, opt, qname);
|
||||
mm_map_frag(mi, 1, &qlen, &seq, n_regs, ®s, b, opt, qname);
|
||||
return regs;
|
||||
}
|
||||
|
||||
|
|
@ -404,11 +404,10 @@ typedef struct {
|
|||
static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
||||
{
|
||||
step_t *s = (step_t*)_data;
|
||||
int qlens[MM_MAX_SEG], j, off = s->seg_off[i], pe_ori = s->p->opt->pe_ori, is_sr = !!(s->p->opt->flag & MM_F_SR);
|
||||
const char *qseqs[MM_MAX_SEG], *quals[MM_MAX_SEG];
|
||||
int qlens[MM_MAX_SEG], j, off = s->seg_off[i], pe_ori = s->p->opt->pe_ori;
|
||||
const char *qseqs[MM_MAX_SEG];
|
||||
mm_tbuf_t *b = s->buf[tid];
|
||||
assert(s->n_seg[i] <= MM_MAX_SEG);
|
||||
memset(quals, 0, sizeof(char*) * MM_MAX_SEG);
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
||||
fprintf(stderr, "QR\t%s\t%d\t%d\n", s->seq[off].name, tid, s->seq[off].l_seq);
|
||||
for (j = 0; j < s->n_seg[i]; ++j) {
|
||||
|
|
@ -416,13 +415,12 @@ static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
|||
mm_revcomp_bseq(&s->seq[off + j]);
|
||||
qlens[j] = s->seq[off + j].l_seq;
|
||||
qseqs[j] = s->seq[off + j].seq;
|
||||
quals[j] = is_sr? s->seq[off + j].qual : 0;
|
||||
}
|
||||
if (s->p->opt->flag & MM_F_INDEPEND_SEG) {
|
||||
for (j = 0; j < s->n_seg[i]; ++j)
|
||||
mm_map_frag(s->p->mi, 1, &qlens[j], &qseqs[j], &quals[j], &s->n_reg[off+j], &s->reg[off+j], b, s->p->opt, s->seq[off+j].name);
|
||||
mm_map_frag(s->p->mi, 1, &qlens[j], &qseqs[j], &s->n_reg[off+j], &s->reg[off+j], b, s->p->opt, s->seq[off+j].name);
|
||||
} else {
|
||||
mm_map_frag(s->p->mi, s->n_seg[i], qlens, qseqs, quals, &s->n_reg[off], &s->reg[off], b, s->p->opt, s->seq[off].name);
|
||||
mm_map_frag(s->p->mi, s->n_seg[i], qlens, qseqs, &s->n_reg[off], &s->reg[off], b, s->p->opt, s->seq[off].name);
|
||||
}
|
||||
for (j = 0; j < s->n_seg[i]; ++j) // flip the query strand and coordinate to the original read strand
|
||||
if (s->n_seg[i] == 2 && ((j == 0 && (pe_ori>>1&1)) || (j == 1 && (pe_ori&1)))) {
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ typedef struct {
|
|||
int32_t mlen, blen; // seeded exact match length; seeded alignment block length
|
||||
int32_t n_sub; // number of suboptimal mappings
|
||||
int32_t score0; // initial chaining score (before chain merging/spliting)
|
||||
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, dummy:16;
|
||||
uint32_t mapq:8, split:2, rev:1, inv:1, sam_pri:1, proper_frag:1, pe_thru:1, seg_split:1, seg_id:8, dummy:8;
|
||||
uint32_t hash;
|
||||
float div;
|
||||
mm_extra_t *p;
|
||||
|
|
@ -279,6 +279,8 @@ void mm_tbuf_destroy(mm_tbuf_t *b);
|
|||
*/
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
|
||||
|
||||
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname);
|
||||
|
||||
/**
|
||||
* Align a fasta/fastq file and print alignments to stdout
|
||||
*
|
||||
|
|
|
|||
|
|
@ -81,10 +81,13 @@ This constructor accepts the following arguments:
|
|||
|
||||
.. code:: python
|
||||
|
||||
mappy.Aligner.map(seq)
|
||||
mappy.Aligner.map(seq, seq2=None)
|
||||
|
||||
This method aligns :code:`seq` against the index. It is a generator, *yielding*
|
||||
a series of :code:`mappy.Alignment` objects.
|
||||
a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy
|
||||
performs paired-end alignment, assuming the two ends are in the FR orientation.
|
||||
Alignments of the two ends can be distinguished by the :code:`read_num` field
|
||||
(see below).
|
||||
|
||||
Class mappy.Alignment
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
@ -118,6 +121,9 @@ properties:
|
|||
* **is_primary**: if the alignment is primary (typically the best and the first
|
||||
to generate)
|
||||
|
||||
* **read_num**: read number that the alignment corresponds to; 1 for the first
|
||||
read and 2 for the second read
|
||||
|
||||
* **cigar_str**: CIGAR string
|
||||
|
||||
* **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ typedef struct {
|
|||
int32_t blen, mlen, NM, ctg_len;
|
||||
uint8_t mapq, is_primary;
|
||||
int8_t strand, trans_strand;
|
||||
int32_t seg_id;
|
||||
int32_t n_cigar32;
|
||||
uint32_t *cigar32;
|
||||
} mm_hitpy_t;
|
||||
|
|
@ -32,6 +33,7 @@ static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
|||
h->NM = r->blen - r->mlen + r->p->n_ambi;
|
||||
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
|
||||
h->is_primary = (r->id == r->parent);
|
||||
h->seg_id = r->seg_id;
|
||||
h->n_cigar32 = r->p->n_cigar;
|
||||
h->cigar32 = r->p->cigar;
|
||||
}
|
||||
|
|
@ -68,4 +70,35 @@ static inline void mm_reset_timer(void)
|
|||
mm_realtime0 = realtime();
|
||||
}
|
||||
|
||||
extern unsigned char seq_comp_table[256];
|
||||
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
||||
{
|
||||
if (seq2 == 0) {
|
||||
return mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
|
||||
} else {
|
||||
int _n_regs[2];
|
||||
mm_reg1_t *regs[2];
|
||||
char *seq[2];
|
||||
int i, len[2];
|
||||
len[0] = strlen(seq1);
|
||||
len[1] = strlen(seq2);
|
||||
seq[0] = (char*)seq1;
|
||||
seq[1] = strdup(seq2);
|
||||
for (i = 0; i < len[1]>>1; ++i) {
|
||||
int t = seq[1][len[1] - i - 1];
|
||||
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
|
||||
seq[1][i] = seq_comp_table[t];
|
||||
}
|
||||
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
|
||||
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
|
||||
for (i = 0; i < _n_regs[1]; ++i)
|
||||
regs[1][i].rev = !regs[1][i].rev;
|
||||
*n_regs = _n_regs[0] + _n_regs[1];
|
||||
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
|
||||
memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
|
||||
free(regs[1]);
|
||||
return regs[0];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -79,7 +79,6 @@ cdef extern from "minimap.h":
|
|||
|
||||
mm_tbuf_t *mm_tbuf_init()
|
||||
void mm_tbuf_destroy(mm_tbuf_t *b)
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
|
||||
|
||||
#
|
||||
# Helper header (because it is hard to expose mm_reg1_t with Cython)
|
||||
|
|
@ -92,11 +91,13 @@ cdef extern from "cmappy.h":
|
|||
int32_t blen, mlen, NM, ctg_len
|
||||
uint8_t mapq, is_primary
|
||||
int8_t strand, trans_strand
|
||||
int32_t seg_id
|
||||
int32_t n_cigar32
|
||||
uint32_t *cigar32
|
||||
|
||||
void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
|
||||
void mm_free_reg1(mm_reg1_t *r)
|
||||
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
||||
|
||||
ctypedef struct kstring_t:
|
||||
unsigned l, m
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ cdef class Alignment:
|
|||
cdef int _NM, _mlen, _blen
|
||||
cdef int8_t _strand, _trans_strand
|
||||
cdef uint8_t _mapq, _is_primary
|
||||
cdef int _seg_id
|
||||
cdef _ctg, _cigar # these are python objects
|
||||
|
||||
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand):
|
||||
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id):
|
||||
self._ctg = ctg if isinstance(ctg, str) else ctg.decode()
|
||||
self._ctg_len, self._r_st, self._r_en = cl, cs, ce
|
||||
self._strand, self._q_st, self._q_en = strand, qs, qe
|
||||
|
|
@ -21,6 +22,7 @@ cdef class Alignment:
|
|||
self._cigar = cigar
|
||||
self._is_primary = is_primary
|
||||
self._trans_strand = trans_strand
|
||||
self._seg_id = seg_id
|
||||
|
||||
@property
|
||||
def ctg(self): return self._ctg
|
||||
|
|
@ -64,6 +66,9 @@ cdef class Alignment:
|
|||
@property
|
||||
def cigar(self): return self._cigar
|
||||
|
||||
@property
|
||||
def read_num(self): return self._seg_id + 1
|
||||
|
||||
@property
|
||||
def cigar_str(self):
|
||||
return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar))
|
||||
|
|
@ -125,7 +130,7 @@ cdef class Aligner:
|
|||
def __bool__(self):
|
||||
return (self._idx != NULL)
|
||||
|
||||
def map(self, seq, buf=None):
|
||||
def map(self, seq, seq2=None, buf=None):
|
||||
cdef cmappy.mm_reg1_t *regs
|
||||
cdef cmappy.mm_hitpy_t h
|
||||
cdef ThreadBuffer b
|
||||
|
|
@ -134,7 +139,8 @@ cdef class Aligner:
|
|||
if self._idx is NULL: return None
|
||||
if buf is None: b = ThreadBuffer()
|
||||
else: b = buf
|
||||
regs = cmappy.mm_map(self._idx, len(seq), str.encode(seq), &n_regs, b._b, &self.map_opt, NULL)
|
||||
if seq2 is None: regs = cmappy.mm_map_aux(self._idx, str.encode(seq), NULL, &n_regs, b._b, &self.map_opt)
|
||||
else: regs = cmappy.mm_map_aux(self._idx, str.encode(seq), str.encode(seq2), &n_regs, b._b, &self.map_opt)
|
||||
|
||||
for i in range(n_regs):
|
||||
cmappy.mm_reg2hitpy(self._idx, ®s[i], &h)
|
||||
|
|
@ -142,7 +148,7 @@ cdef class Aligner:
|
|||
for k in range(h.n_cigar32):
|
||||
c = h.cigar32[k]
|
||||
cigar.append([c>>4, c&0xf])
|
||||
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand)
|
||||
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id)
|
||||
cmappy.mm_free_reg1(®s[i])
|
||||
free(regs)
|
||||
|
||||
|
|
|
|||
4
setup.py
4
setup.py
|
|
@ -23,7 +23,7 @@ def readme():
|
|||
|
||||
setup(
|
||||
name = 'mappy',
|
||||
version = '2.7',
|
||||
version = '2.8',
|
||||
url = 'https://github.com/lh3/minimap2',
|
||||
description = 'Minimap2 python binding',
|
||||
long_description = readme(),
|
||||
|
|
@ -39,7 +39,7 @@ setup(
|
|||
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
|
||||
'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h',
|
||||
'python/cmappy.h', 'python/cmappy.pxd'],
|
||||
extra_compile_args = ['-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4
|
||||
extra_compile_args = ['-DHAVE_KALLOC', '-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4
|
||||
include_dirs = ['.'],
|
||||
libraries = ['z', 'm', 'pthread'])],
|
||||
classifiers = [
|
||||
|
|
|
|||
Loading…
Reference in New Issue