minimap2/python/cmappy.h

153 lines
3.6 KiB
C
Raw Permalink Normal View History

#ifndef CMAPPY_H
#define CMAPPY_H
2017-09-16 20:44:47 +08:00
#include <stdlib.h>
2017-09-18 02:41:59 +08:00
#include <string.h>
#include <zlib.h>
2017-09-16 20:44:47 +08:00
#include "minimap.h"
2017-09-18 02:41:59 +08:00
#include "kseq.h"
KSEQ_DECLARE(gzFile)
2017-09-16 20:44:47 +08:00
typedef struct {
const char *ctg;
int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end;
2017-10-16 23:15:07 +08:00
int32_t blen, mlen, NM, ctg_len;
2017-09-16 23:14:01 +08:00
uint8_t mapq, is_primary;
int8_t strand, trans_strand;
int32_t seg_id;
2017-09-16 20:44:47 +08:00
int32_t n_cigar32;
uint32_t *cigar32;
} mm_hitpy_t;
2017-09-16 23:14:01 +08:00
static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
2017-09-16 20:44:47 +08:00
{
2017-09-16 23:14:01 +08:00
h->ctg = mi->seq[r->rid].name;
h->ctg_len = mi->seq[r->rid].len;
h->ctg_start = r->rs, h->ctg_end = r->re;
h->qry_start = r->qs, h->qry_end = r->qe;
h->strand = r->rev? -1 : 1;
h->mapq = r->mapq;
2017-10-16 23:15:07 +08:00
h->mlen = r->mlen;
h->blen = r->blen;
h->NM = r->blen - r->mlen + r->p->n_ambi;
2017-09-16 23:14:01 +08:00
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
h->is_primary = (r->id == r->parent);
h->seg_id = r->seg_id;
2017-09-16 23:14:01 +08:00
h->n_cigar32 = r->p->n_cigar;
h->cigar32 = r->p->cigar;
2017-09-16 20:44:47 +08:00
}
static inline void mm_free_reg1(mm_reg1_t *r)
{
free(r->p);
}
2017-09-18 02:41:59 +08:00
static inline kseq_t *mm_fastx_open(const char *fn)
{
gzFile fp;
fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
return kseq_init(fp);
}
static inline void mm_fastx_close(kseq_t *ks)
{
gzFile fp;
fp = ks->f->f;
kseq_destroy(ks);
gzclose(fp);
}
static inline int mm_verbose_level(int v)
{
if (v >= 0) mm_verbose = v;
return mm_verbose;
}
static inline void mm_reset_timer(void)
{
extern double realtime(void);
mm_realtime0 = realtime();
}
extern unsigned char seq_comp_table[256];
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
{
mm_reg1_t *r;
Py_BEGIN_ALLOW_THREADS
if (seq2 == 0) {
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
} else {
int _n_regs[2];
mm_reg1_t *regs[2];
char *seq[2];
int i, len[2];
len[0] = strlen(seq1);
len[1] = strlen(seq2);
seq[0] = (char*)seq1;
seq[1] = strdup(seq2);
for (i = 0; i < len[1]>>1; ++i) {
int t = seq[1][len[1] - i - 1];
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
seq[1][i] = seq_comp_table[t];
}
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
for (i = 0; i < _n_regs[1]; ++i)
regs[1][i].rev = !regs[1][i].rev;
*n_regs = _n_regs[0] + _n_regs[1];
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
memcpy(&regs[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
free(regs[1]);
r = regs[0];
}
Py_END_ALLOW_THREADS
return r;
}
2018-02-21 00:05:54 +08:00
static inline char *mappy_revcomp(int len, const uint8_t *seq)
2018-02-20 22:41:25 +08:00
{
int i;
2018-02-21 00:05:54 +08:00
char *rev;
rev = (char*)malloc(len + 1);
for (i = 0; i < len; ++i)
rev[len - i - 1] = seq_comp_table[seq[i]];
rev[len] = 0;
return rev;
2018-02-20 22:41:25 +08:00
}
2018-02-23 23:18:26 +08:00
static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
{
int i, rid;
char *s;
*len = 0;
rid = mm_idx_name2id(mi, name);
if (rid < 0) return 0;
2018-07-25 11:29:55 +08:00
if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
if (en < 0 || (uint32_t)en > mi->seq[rid].len)
en = mi->seq[rid].len;
2018-02-23 23:18:26 +08:00
s = (char*)malloc(en - st + 1);
2018-03-27 03:12:54 +08:00
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
2018-02-23 23:18:26 +08:00
for (i = 0; i < *len; ++i)
s[i] = "ACGTN"[(uint8_t)s[i]];
s[*len] = 0;
return s;
}
2018-08-06 08:57:05 +08:00
static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
{
const char *fake_name = "N/A";
char *s;
mm_idx_t *mi;
s = (char*)calloc(len + 1, 1);
memcpy(s, seq, len);
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
free(s);
return mi;
}
2017-09-16 20:44:47 +08:00
#endif