added reverse complement

This commit is contained in:
Heng Li 2018-02-20 09:41:25 -05:00
parent f434653432
commit 7dc7097208
4 changed files with 28 additions and 2 deletions

View File

@ -139,8 +139,8 @@ the following format:
It is effectively the PAF format without the QueryName and QueryLength columns It is effectively the PAF format without the QueryName and QueryLength columns
(the first two columns in PAF). (the first two columns in PAF).
Function mappy.fastx_read Miscellaneous Functions
~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~
.. code:: python .. code:: python
@ -149,3 +149,11 @@ Function mappy.fastx_read
This generator function opens a FASTA/FASTQ file and *yields* a This generator function opens a FASTA/FASTQ file and *yields* a
:code:`(name,seq,qual)` tuple for each sequence entry. The input file may be :code:`(name,seq,qual)` tuple for each sequence entry. The input file may be
optionally gzip'd. optionally gzip'd.
.. code:: python
mappy.revcomp(seq)
Return the reverse complement of DNA string :code:`seq`. This function
recognizes IUB code and preserves the letter cases. Uracil :code:`U` is
complemented to :code:`A`.

View File

@ -101,4 +101,16 @@ static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const
} }
} }
static inline uint8_t *mappy_revcomp(int len, uint8_t *seq)
{
int i;
for (i = 0; i < len>>1; ++i) {
uint8_t t = seq_comp_table[seq[i]];
seq[i] = seq_comp_table[seq[len - 1 - i]];
seq[len - 1 - i] = t;
}
if (len&1) seq[len>>1] = seq_comp_table[seq[len>>1]];
return seq;
}
#endif #endif

View File

@ -116,5 +116,6 @@ cdef extern from "cmappy.h":
void mm_fastx_close(kseq_t *ks) void mm_fastx_close(kseq_t *ks)
int kseq_read(kseq_t *seq) int kseq_read(kseq_t *seq)
uint8_t *mappy_revcomp(int l, uint8_t *seq)
int mm_verbose_level(int v) int mm_verbose_level(int v)
void mm_reset_timer() void mm_reset_timer()

View File

@ -164,6 +164,11 @@ def fastx_read(fn):
yield name, seq, qual yield name, seq, qual
cmappy.mm_fastx_close(ks) cmappy.mm_fastx_close(ks)
def revcomp(seq):
cdef uint8_t *s
s = cmappy.mappy_revcomp(len(seq), str.encode(seq))
return s if isinstance(s, str) else s.decode()
def verbose(v=None): def verbose(v=None):
if v is None: v = -1 if v is None: v = -1
return cmappy.mm_verbose_level(v) return cmappy.mm_verbose_level(v)