r819: mappy to support cs/MD

This commit is contained in:
Heng Li 2018-07-24 23:29:55 -04:00
parent 8c064a5f29
commit ff9917a1c4
11 changed files with 115 additions and 28 deletions

View File

@ -133,10 +133,10 @@ void mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int
free(str.s); free(str.s);
} }
static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq, const mm_reg1_t *r, char *tmp, int no_iden) static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq, const mm_reg1_t *r, char *tmp, int no_iden, int write_tag)
{ {
int i, q_off, t_off; int i, q_off, t_off;
mm_sprintf_lite(s, "\tcs:Z:"); if (write_tag) mm_sprintf_lite(s, "\tcs:Z:");
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) { for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4; int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
assert(op >= 0 && op <= 3); assert(op >= 0 && op <= 3);
@ -181,10 +181,10 @@ static void write_cs_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
assert(t_off == r->re - r->rs && q_off == r->qe - r->qs); assert(t_off == r->re - r->rs && q_off == r->qe - r->qs);
} }
static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq, const mm_reg1_t *r, char *tmp) static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq, const mm_reg1_t *r, char *tmp, int write_tag)
{ {
int i, q_off, t_off, l_MD = 0; int i, q_off, t_off, l_MD = 0;
mm_sprintf_lite(s, "\tMD:Z:"); if (write_tag) mm_sprintf_lite(s, "\tMD:Z:");
for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) { for (i = q_off = t_off = 0; i < (int)r->p->n_cigar; ++i) {
int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4; int j, op = r->p->cigar[i]&0xf, len = r->p->cigar[i]>>4;
assert(op >= 0 && op <= 2); // introns (aka reference skips) are not supported assert(op >= 0 && op <= 2); // introns (aka reference skips) are not supported
@ -210,7 +210,7 @@ static void write_MD_core(kstring_t *s, const uint8_t *tseq, const uint8_t *qseq
assert(t_off == r->re - r->rs && q_off == r->qe - r->qs); assert(t_off == r->re - r->rs && q_off == r->qe - r->qs);
} }
static void write_cs_or_MD(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int no_iden, int is_MD) static void write_cs_or_MD(void *km, kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int no_iden, int is_MD, int write_tag)
{ {
extern unsigned char seq_nt4_table[256]; extern unsigned char seq_nt4_table[256];
int i; int i;
@ -230,11 +230,34 @@ static void write_cs_or_MD(void *km, kstring_t *s, const mm_idx_t *mi, const mm_
qseq[r->qe - i - 1] = c >= 4? 4 : 3 - c; qseq[r->qe - i - 1] = c >= 4? 4 : 3 - c;
} }
} }
if (is_MD) write_MD_core(s, tseq, qseq, r, tmp); if (is_MD) write_MD_core(s, tseq, qseq, r, tmp, write_tag);
else write_cs_core(s, tseq, qseq, r, tmp, no_iden); else write_cs_core(s, tseq, qseq, r, tmp, no_iden, write_tag);
kfree(km, qseq); kfree(km, tseq); kfree(km, tmp); kfree(km, qseq); kfree(km, tseq); kfree(km, tmp);
} }
int mm_gen_cs_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int no_iden)
{
mm_bseq1_t t;
kstring_t str;
str.s = *buf, str.l = 0, str.m = *max_len;
t.l_seq = strlen(seq);
t.seq = (char*)seq;
write_cs_or_MD(km, &str, mi, &t, r, no_iden, is_MD, 0);
*max_len = str.m;
*buf = str.s;
return str.l;
}
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
{
return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 0, no_iden);
}
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
{
return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 1, 0);
}
static inline void write_tags(kstring_t *s, const mm_reg1_t *r) static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
{ {
int type; int type;
@ -277,7 +300,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]); mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MIDNSHP=XB"[r->p->cigar[k]&0xf]);
} }
if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD))) if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD)))
write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD); write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD, 1);
if ((opt_flag & MM_F_COPY_COMMENT) && t->comment) if ((opt_flag & MM_F_COPY_COMMENT) && t->comment)
mm_sprintf_lite(s, "\t%s", t->comment); mm_sprintf_lite(s, "\t%s", t->comment);
} }
@ -476,7 +499,7 @@ void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
} }
} }
if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD))) if (r->p && (opt_flag & (MM_F_OUT_CS|MM_F_OUT_MD)))
write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD); write_cs_or_MD(km, s, mi, t, r, !(opt_flag&MM_F_OUT_CS_LONG), opt_flag&MM_F_OUT_MD, 1);
if (cigar_in_tag) if (cigar_in_tag)
write_sam_cigar(s, flag, 1, t->l_seq, r, opt_flag); write_sam_cigar(s, flag, 1, t->l_seq, r, opt_flag);
} }

2
main.c
View File

@ -10,7 +10,7 @@
#include "getopt.h" #include "getopt.h"
#endif #endif
#define MM_VERSION "2.11-r815-dirty" #define MM_VERSION "2.11-r819-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>

7
map.c
View File

@ -29,6 +29,11 @@ void mm_tbuf_destroy(mm_tbuf_t *b)
free(b); free(b);
} }
void *mm_tbuf_get_km(mm_tbuf_t *b)
{
return b->km;
}
static int mm_dust_minier(void *km, int n, mm128_t *a, int l_seq, const char *seq, int sdust_thres) static int mm_dust_minier(void *km, int n, mm128_t *a, int l_seq, const char *seq, int sdust_thres)
{ {
int n_dreg, j, k, u = 0; int n_dreg, j, k, u = 0;
@ -682,7 +687,7 @@ int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_sp
for (pl.rid_shift[0] = 0, i = 1; i < n_split_idx; ++i) for (pl.rid_shift[0] = 0, i = 1; i < n_split_idx; ++i)
pl.rid_shift[i] += pl.rid_shift[i - 1]; pl.rid_shift[i] += pl.rid_shift[i - 1];
if (opt->flag & MM_F_OUT_SAM) if (opt->flag & MM_F_OUT_SAM)
for (i = 0; i < pl.mi->n_seq; ++i) for (i = 0; i < (int32_t)pl.mi->n_seq; ++i)
printf("@SQ\tSN:%s\tLN:%d\n", pl.mi->seq[i].name, pl.mi->seq[i].len); printf("@SQ\tSN:%s\tLN:%d\n", pl.mi->seq[i].name, pl.mi->seq[i].len);
kt_pipeline(2, worker_pipeline, &pl, 3); kt_pipeline(2, worker_pipeline, &pl, 3);

View File

@ -301,6 +301,8 @@ mm_tbuf_t *mm_tbuf_init(void);
*/ */
void mm_tbuf_destroy(mm_tbuf_t *b); void mm_tbuf_destroy(mm_tbuf_t *b);
void *mm_tbuf_get_km(mm_tbuf_t *b);
/** /**
* Align a query sequence against an index * Align a query sequence against an index
* *
@ -337,6 +339,22 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int
int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads); int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads);
/**
* Generate the cs tag (new in 2.12)
*
* @param km memory blocks; set to NULL if unsure
* @param buf buffer to write the cs/MD tag; typicall NULL on the first call
* @param max_len max length of the buffer; typically set to 0 on the first call
* @param mi index
* @param r alignment
* @param seq query sequence
* @param no_iden true to use : instead of =
*
* @return the length of cs
*/
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
// query sequence name and sequence in the minimap2 index // query sequence name and sequence in the minimap2 index
int mm_idx_index_name(mm_idx_t *mi); int mm_idx_index_name(mm_idx_t *mi);
int mm_idx_name2id(const mm_idx_t *mi, const char *name); int mm_idx_name2id(const mm_idx_t *mi, const char *name);

View File

@ -132,6 +132,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo) int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
{ {
if (mo->split_prefix && (mo->flag & (MM_F_OUT_CS|MM_F_OUT_MD))) {
if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m --cs or --MD doesn't work with --split-prefix\033[0m\n");
return -6;
}
if (io->k <= 0 || io->w <= 0) { if (io->k <= 0 || io->w <= 0) {
if (mm_verbose >= 1) if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m -k and -w must be positive\033[0m\n"); fprintf(stderr, "[ERROR]\033[1;31m -k and -w must be positive\033[0m\n");

View File

@ -43,7 +43,7 @@ The following Python script demonstrates the key functionality of mappy:
APIs APIs
---- ----
Mappy implements two classes and one global function. Mappy implements two classes and two global function.
Class mappy.Aligner Class mappy.Aligner
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
@ -83,13 +83,15 @@ This constructor accepts the following arguments:
.. code:: python .. code:: python
mappy.Aligner.map(seq, seq2=None) mappy.Aligner.map(seq, seq2=None, cs=False, MD=False)
This method aligns :code:`seq` against the index. It is a generator, *yielding* This method aligns :code:`seq` against the index. It is a generator, *yielding*
a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy
performs paired-end alignment, assuming the two ends are in the FR orientation. performs paired-end alignment, assuming the two ends are in the FR orientation.
Alignments of the two ends can be distinguished by the :code:`read_num` field Alignments of the two ends can be distinguished by the :code:`read_num` field
(see Class mappy.Alignment below). (see Class mappy.Alignment below). Argument :code:`cs` asks mappy to generate
the :code:`cs` tag; :code:`MD` is similar. These two arguments might slightly
degrade performance and are not enabled by default.
.. code:: python .. code:: python
@ -139,6 +141,11 @@ properties:
* **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two * **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two
numbers give the length and the operator of each CIGAR operation. numbers give the length and the operator of each CIGAR operation.
* **MD**: the :code:`MD` tag as in the SAM format. It is an empty string unless
the :code:`MD` argument is applied when calling :code:`mappy.Aligner.map()`.
* **cs**: the :code:`cs` tag.
An :code:`Alignment` object can be converted to a string with :code:`str()` in An :code:`Alignment` object can be converted to a string with :code:`str()` in
the following format: the following format:

View File

@ -126,8 +126,8 @@ static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int e
*len = 0; *len = 0;
rid = mm_idx_name2id(mi, name); rid = mm_idx_name2id(mi, name);
if (rid < 0) return 0; if (rid < 0) return 0;
if (st >= mi->seq[rid].len || st >= en) return 0; if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
if (en < 0 || en > mi->seq[rid].len) if (en < 0 || (uint32_t)en > mi->seq[rid].len)
en = mi->seq[rid].len; en = mi->seq[rid].len;
s = (char*)malloc(en - st + 1); s = (char*)malloc(en - st + 1);
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s); *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);

View File

@ -40,6 +40,7 @@ cdef extern from "minimap.h":
int32_t mid_occ int32_t mid_occ
int32_t max_occ int32_t max_occ
int mini_batch_size int mini_batch_size
const char *split_prefix
int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
int mm_verbose int mm_verbose
@ -86,6 +87,9 @@ cdef extern from "minimap.h":
mm_tbuf_t *mm_tbuf_init() mm_tbuf_t *mm_tbuf_init()
void mm_tbuf_destroy(mm_tbuf_t *b) void mm_tbuf_destroy(mm_tbuf_t *b)
void *mm_tbuf_get_km(mm_tbuf_t *b)
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
# #
# Helper header (because it is hard to expose mm_reg1_t with Cython) # Helper header (because it is hard to expose mm_reg1_t with Cython)

View File

@ -14,9 +14,9 @@ cdef class Alignment:
cdef int8_t _strand, _trans_strand cdef int8_t _strand, _trans_strand
cdef uint8_t _mapq, _is_primary cdef uint8_t _mapq, _is_primary
cdef int _seg_id cdef int _seg_id
cdef _ctg, _cigar # these are python objects cdef _ctg, _cigar, _cs, _MD # these are python objects
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id): def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, MD_str):
self._ctg = ctg if isinstance(ctg, str) else ctg.decode() self._ctg = ctg if isinstance(ctg, str) else ctg.decode()
self._ctg_len, self._r_st, self._r_en = cl, cs, ce self._ctg_len, self._r_st, self._r_en = cl, cs, ce
self._strand, self._q_st, self._q_en = strand, qs, qe self._strand, self._q_st, self._q_en = strand, qs, qe
@ -26,6 +26,8 @@ cdef class Alignment:
self._is_primary = is_primary self._is_primary = is_primary
self._trans_strand = trans_strand self._trans_strand = trans_strand
self._seg_id = seg_id self._seg_id = seg_id
self._cs = cs_str
self._MD = MD_str
@property @property
def ctg(self): return self._ctg def ctg(self): return self._ctg
@ -72,6 +74,12 @@ cdef class Alignment:
@property @property
def read_num(self): return self._seg_id + 1 def read_num(self): return self._seg_id + 1
@property
def cs(self): return self._cs
@property
def MD(self): return self._MD
@property @property
def cigar_str(self): def cigar_str(self):
return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar)) return "".join(map(lambda x: str(x[0]) + 'MIDNSH'[x[1]], self._cigar))
@ -85,8 +93,10 @@ cdef class Alignment:
if self._trans_strand > 0: ts = 'ts:A:+' if self._trans_strand > 0: ts = 'ts:A:+'
elif self._trans_strand < 0: ts = 'ts:A:-' elif self._trans_strand < 0: ts = 'ts:A:-'
else: ts = 'ts:A:.' else: ts = 'ts:A:.'
return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en), a = [str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]) str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]
if self._cs != "": a.append("cs:Z:" + self._cs)
return "\t".join(a)
cdef class ThreadBuffer: cdef class ThreadBuffer:
cdef cmappy.mm_tbuf_t *_b cdef cmappy.mm_tbuf_t *_b
@ -135,18 +145,23 @@ cdef class Aligner:
def __bool__(self): def __bool__(self):
return (self._idx != NULL) return (self._idx != NULL)
def map(self, seq, seq2=None, buf=None, max_frag_len=None): def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None):
cdef cmappy.mm_reg1_t *regs cdef cmappy.mm_reg1_t *regs
cdef cmappy.mm_hitpy_t h cdef cmappy.mm_hitpy_t h
cdef ThreadBuffer b cdef ThreadBuffer b
cdef int n_regs cdef int n_regs
cdef char *cs_str = NULL
cdef int l_cs_str, m_cs_str = 0
cdef void *km
cdef cmappy.mm_mapopt_t map_opt cdef cmappy.mm_mapopt_t map_opt
map_opt = self.map_opt map_opt = self.map_opt
if max_frag_len is not None: map_opt.max_frag_len = max_frag_len if max_frag_len is not None: map_opt.max_frag_len = max_frag_len
if self._idx is NULL: return None if self._idx is NULL: return None
if buf is None: b = ThreadBuffer() if buf is None: b = ThreadBuffer()
else: b = buf else: b = buf
km = cmappy.mm_tbuf_get_km(b._b)
_seq = seq if isinstance(seq, bytes) else seq.encode() _seq = seq if isinstance(seq, bytes) else seq.encode()
if seq2 is None: if seq2 is None:
@ -157,13 +172,21 @@ cdef class Aligner:
for i in range(n_regs): for i in range(n_regs):
cmappy.mm_reg2hitpy(self._idx, &regs[i], &h) cmappy.mm_reg2hitpy(self._idx, &regs[i], &h)
cigar = [] cigar, _cs, _MD = [], '', ''
for k in range(h.n_cigar32): for k in range(h.n_cigar32): # convert the 32-bit CIGAR encoding to Python array
c = h.cigar32[k] c = h.cigar32[k]
cigar.append([c>>4, c&0xf]) cigar.append([c>>4, c&0xf])
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id) if cs or MD: # generate the cs and/or the MD tag, if requested
if cs:
l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, &regs[i], _seq, 1)
_cs = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
if MD:
l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, &regs[i], _seq)
_MD = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _MD)
cmappy.mm_free_reg1(&regs[i]) cmappy.mm_free_reg1(&regs[i])
free(regs) free(regs)
free(cs_str)
def seq(self, str name, int start=0, int end=0x7fffffff): def seq(self, str name, int start=0, int end=0x7fffffff):
cdef int l cdef int l

View File

@ -4,7 +4,7 @@ import sys, getopt
import mappy as mp import mappy as mp
def main(argv): def main(argv):
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:") opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:c")
if len(args) < 2: if len(args) < 2:
print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>") print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
print("Options:") print("Options:")
@ -14,9 +14,10 @@ def main(argv):
print(" -k INT k-mer length") print(" -k INT k-mer length")
print(" -w INT minimizer window length") print(" -w INT minimizer window length")
print(" -r INT band width") print(" -r INT band width")
print(" -c output the cs tag")
sys.exit(1) sys.exit(1)
preset, min_cnt, min_sc, k, w, bw = None, None, None, None, None, None preset, min_cnt, min_sc, k, w, bw, out_cs = None, None, None, None, None, None, False
for opt, arg in opts: for opt, arg in opts:
if opt == '-x': preset = arg if opt == '-x': preset = arg
elif opt == '-n': min_cnt = int(arg) elif opt == '-n': min_cnt = int(arg)
@ -24,11 +25,12 @@ def main(argv):
elif opt == '-r': bw = int(arg) elif opt == '-r': bw = int(arg)
elif opt == '-k': k = int(arg) elif opt == '-k': k = int(arg)
elif opt == '-w': w = int(arg) elif opt == '-w': w = int(arg)
elif opt == '-c': out_cs = True
a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw) a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0])) if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
for name, seq, qual in mp.fastx_read(args[1]): # read one sequence for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
for h in a.map(seq): # traverse hits for h in a.map(seq, cs=out_cs): # traverse hits
print('{}\t{}\t{}'.format(name, len(seq), h)) print('{}\t{}\t{}'.format(name, len(seq), h))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -35,7 +35,7 @@ setup(
ext_modules = [Extension('mappy', ext_modules = [Extension('mappy',
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c', sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c',
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c'], 'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c', 'splitidx.c'],
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h', depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h', 'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h',
'python/cmappy.h', 'python/cmappy.pxd'], 'python/cmappy.h', 'python/cmappy.pxd'],