r823: mappy to index a single sequence
This commit is contained in:
parent
5ab6538757
commit
b0f39a1a61
6
index.c
6
index.c
|
|
@ -372,7 +372,9 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
|
|||
uint64_t sum_len = 0;
|
||||
mm128_v a = {0,0,0};
|
||||
mm_idx_t *mi;
|
||||
khash_t(str) *h;
|
||||
int i, flag = 0;
|
||||
|
||||
if (n <= 0) return 0;
|
||||
for (i = 0; i < n; ++i) // get the total length
|
||||
sum_len += strlen(seq[i]);
|
||||
|
|
@ -383,13 +385,17 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
|
|||
mi->n_seq = n;
|
||||
mi->seq = (mm_idx_seq_t*)kcalloc(mi->km, n, sizeof(mm_idx_seq_t)); // ->seq is allocated from km
|
||||
mi->S = (uint32_t*)calloc((sum_len + 7) / 8, 4);
|
||||
mi->h = h = kh_init(str);
|
||||
for (i = 0, sum_len = 0; i < n; ++i) {
|
||||
const char *s = seq[i];
|
||||
mm_idx_seq_t *p = &mi->seq[i];
|
||||
uint32_t j;
|
||||
if (name && name[i]) {
|
||||
int absent;
|
||||
p->name = (char*)kmalloc(mi->km, strlen(name[i]) + 1);
|
||||
strcpy(p->name, name[i]);
|
||||
kh_put(str, h, p->name, &absent);
|
||||
assert(absent);
|
||||
}
|
||||
p->offset = sum_len;
|
||||
p->len = strlen(s);
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "getopt.h"
|
||||
#endif
|
||||
|
||||
#define MM_VERSION "2.11-r822-dirty"
|
||||
#define MM_VERSION "2.11-r823-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
|
|
@ -137,4 +137,16 @@ static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int e
|
|||
return s;
|
||||
}
|
||||
|
||||
static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
|
||||
{
|
||||
const char *fake_name = "N/A";
|
||||
char *s;
|
||||
mm_idx_t *mi;
|
||||
s = (char*)calloc(len + 1, 1);
|
||||
memcpy(s, seq, len);
|
||||
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
|
||||
free(s);
|
||||
return mi;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ cdef extern from "cmappy.h":
|
|||
void mm_free_reg1(mm_reg1_t *r)
|
||||
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
||||
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
|
||||
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l)
|
||||
|
||||
ctypedef struct kstring_t:
|
||||
unsigned l, m
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ cdef class Aligner:
|
|||
cdef cmappy.mm_idxopt_t idx_opt
|
||||
cdef cmappy.mm_mapopt_t map_opt
|
||||
|
||||
def __cinit__(self, fn_idx_in, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None):
|
||||
def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None):
|
||||
cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options
|
||||
if preset is not None:
|
||||
cmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset
|
||||
|
|
@ -126,17 +126,24 @@ cdef class Aligner:
|
|||
if bw is not None: self.map_opt.bw = bw
|
||||
if best_n is not None: self.map_opt.best_n = best_n
|
||||
if max_frag_len is not None: self.map_opt.max_frag_len = max_frag_len
|
||||
if extra_flags is not None: self.map_opt.flag |= extra_flags
|
||||
|
||||
cdef cmappy.mm_idx_reader_t *r;
|
||||
if fn_idx_out is None:
|
||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL)
|
||||
|
||||
if seq is None:
|
||||
if fn_idx_out is None:
|
||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL)
|
||||
else:
|
||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out)
|
||||
if r is not NULL:
|
||||
self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part
|
||||
cmappy.mm_idx_reader_close(r)
|
||||
cmappy.mm_mapopt_update(&self.map_opt, self._idx)
|
||||
cmappy.mm_idx_index_name(self._idx)
|
||||
else:
|
||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out)
|
||||
if r is not NULL:
|
||||
self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part
|
||||
cmappy.mm_idx_reader_close(r)
|
||||
self._idx = cmappy.mappy_idx_seq(self.idx_opt.w, self.idx_opt.k, self.idx_opt.flag&1, self.idx_opt.bucket_bits, str.encode(seq), len(seq))
|
||||
cmappy.mm_mapopt_update(&self.map_opt, self._idx)
|
||||
cmappy.mm_idx_index_name(self._idx)
|
||||
self.map_opt.mid_occ = 1000 # don't filter high-occ seeds
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._idx is not NULL:
|
||||
|
|
@ -145,7 +152,7 @@ cdef class Aligner:
|
|||
def __bool__(self):
|
||||
return (self._idx != NULL)
|
||||
|
||||
def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None):
|
||||
def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
|
||||
cdef cmappy.mm_reg1_t *regs
|
||||
cdef cmappy.mm_hitpy_t h
|
||||
cdef ThreadBuffer b
|
||||
|
|
@ -157,6 +164,7 @@ cdef class Aligner:
|
|||
|
||||
map_opt = self.map_opt
|
||||
if max_frag_len is not None: map_opt.max_frag_len = max_frag_len
|
||||
if extra_flags is not None: map_opt.flag |= extra_flags
|
||||
|
||||
if self._idx is NULL: return None
|
||||
if buf is None: b = ThreadBuffer()
|
||||
|
|
|
|||
Loading…
Reference in New Issue