From 7dc709720861a73bb282636e551a75e0da9eb307 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 20 Feb 2018 09:41:25 -0500 Subject: [PATCH] added reverse complement --- python/README.rst | 12 ++++++++++-- python/cmappy.h | 12 ++++++++++++ python/cmappy.pxd | 1 + python/mappy.pyx | 5 +++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/python/README.rst b/python/README.rst index 379eef5..ae53b02 100644 --- a/python/README.rst +++ b/python/README.rst @@ -139,8 +139,8 @@ the following format: It is effectively the PAF format without the QueryName and QueryLength columns (the first two columns in PAF). -Function mappy.fastx_read -~~~~~~~~~~~~~~~~~~~~~~~~~ +Miscellaneous Functions +~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -149,3 +149,11 @@ Function mappy.fastx_read This generator function opens a FASTA/FASTQ file and *yields* a :code:`(name,seq,qual)` tuple for each sequence entry. The input file may be optionally gzip'd. + +.. code:: python + + mappy.revcomp(seq) + +Return the reverse complement of DNA string :code:`seq`. This function +recognizes IUB code and preserves the letter cases. Uracil :code:`U` is +complemented to :code:`A`. diff --git a/python/cmappy.h b/python/cmappy.h index dc9d509..4044aeb 100644 --- a/python/cmappy.h +++ b/python/cmappy.h @@ -101,4 +101,16 @@ static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const } } +static inline uint8_t *mappy_revcomp(int len, uint8_t *seq) +{ + int i; + for (i = 0; i < len>>1; ++i) { + uint8_t t = seq_comp_table[seq[i]]; + seq[i] = seq_comp_table[seq[len - 1 - i]]; + seq[len - 1 - i] = t; + } + if (len&1) seq[len>>1] = seq_comp_table[seq[len>>1]]; + return seq; +} + #endif diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 4891f98..7570103 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -116,5 +116,6 @@ cdef extern from "cmappy.h": void mm_fastx_close(kseq_t *ks) int kseq_read(kseq_t *seq) + uint8_t *mappy_revcomp(int l, uint8_t *seq) int mm_verbose_level(int v) void mm_reset_timer() diff --git a/python/mappy.pyx b/python/mappy.pyx index 711e49f..1318e16 100644 --- a/python/mappy.pyx +++ b/python/mappy.pyx @@ -164,6 +164,11 @@ def fastx_read(fn): yield name, seq, qual cmappy.mm_fastx_close(ks) +def revcomp(seq): + cdef uint8_t *s + s = cmappy.mappy_revcomp(len(seq), str.encode(seq)) + return s if isinstance(s, str) else s.decode() + def verbose(v=None): if v is None: v = -1 return cmappy.mm_verbose_level(v)