r823: mappy to index a single sequence
This commit is contained in:
parent
5ab6538757
commit
b0f39a1a61
6
index.c
6
index.c
|
|
@ -372,7 +372,9 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
|
||||||
uint64_t sum_len = 0;
|
uint64_t sum_len = 0;
|
||||||
mm128_v a = {0,0,0};
|
mm128_v a = {0,0,0};
|
||||||
mm_idx_t *mi;
|
mm_idx_t *mi;
|
||||||
|
khash_t(str) *h;
|
||||||
int i, flag = 0;
|
int i, flag = 0;
|
||||||
|
|
||||||
if (n <= 0) return 0;
|
if (n <= 0) return 0;
|
||||||
for (i = 0; i < n; ++i) // get the total length
|
for (i = 0; i < n; ++i) // get the total length
|
||||||
sum_len += strlen(seq[i]);
|
sum_len += strlen(seq[i]);
|
||||||
|
|
@ -383,13 +385,17 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
|
||||||
mi->n_seq = n;
|
mi->n_seq = n;
|
||||||
mi->seq = (mm_idx_seq_t*)kcalloc(mi->km, n, sizeof(mm_idx_seq_t)); // ->seq is allocated from km
|
mi->seq = (mm_idx_seq_t*)kcalloc(mi->km, n, sizeof(mm_idx_seq_t)); // ->seq is allocated from km
|
||||||
mi->S = (uint32_t*)calloc((sum_len + 7) / 8, 4);
|
mi->S = (uint32_t*)calloc((sum_len + 7) / 8, 4);
|
||||||
|
mi->h = h = kh_init(str);
|
||||||
for (i = 0, sum_len = 0; i < n; ++i) {
|
for (i = 0, sum_len = 0; i < n; ++i) {
|
||||||
const char *s = seq[i];
|
const char *s = seq[i];
|
||||||
mm_idx_seq_t *p = &mi->seq[i];
|
mm_idx_seq_t *p = &mi->seq[i];
|
||||||
uint32_t j;
|
uint32_t j;
|
||||||
if (name && name[i]) {
|
if (name && name[i]) {
|
||||||
|
int absent;
|
||||||
p->name = (char*)kmalloc(mi->km, strlen(name[i]) + 1);
|
p->name = (char*)kmalloc(mi->km, strlen(name[i]) + 1);
|
||||||
strcpy(p->name, name[i]);
|
strcpy(p->name, name[i]);
|
||||||
|
kh_put(str, h, p->name, &absent);
|
||||||
|
assert(absent);
|
||||||
}
|
}
|
||||||
p->offset = sum_len;
|
p->offset = sum_len;
|
||||||
p->len = strlen(s);
|
p->len = strlen(s);
|
||||||
|
|
|
||||||
2
main.c
2
main.c
|
|
@ -10,7 +10,7 @@
|
||||||
#include "getopt.h"
|
#include "getopt.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MM_VERSION "2.11-r822-dirty"
|
#define MM_VERSION "2.11-r823-dirty"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
|
||||||
|
|
@ -137,4 +137,16 @@ static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int e
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
|
||||||
|
{
|
||||||
|
const char *fake_name = "N/A";
|
||||||
|
char *s;
|
||||||
|
mm_idx_t *mi;
|
||||||
|
s = (char*)calloc(len + 1, 1);
|
||||||
|
memcpy(s, seq, len);
|
||||||
|
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
|
||||||
|
free(s);
|
||||||
|
return mi;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,7 @@ cdef extern from "cmappy.h":
|
||||||
void mm_free_reg1(mm_reg1_t *r)
|
void mm_free_reg1(mm_reg1_t *r)
|
||||||
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
|
||||||
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
|
char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
|
||||||
|
mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l)
|
||||||
|
|
||||||
ctypedef struct kstring_t:
|
ctypedef struct kstring_t:
|
||||||
unsigned l, m
|
unsigned l, m
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ cdef class Aligner:
|
||||||
cdef cmappy.mm_idxopt_t idx_opt
|
cdef cmappy.mm_idxopt_t idx_opt
|
||||||
cdef cmappy.mm_mapopt_t map_opt
|
cdef cmappy.mm_mapopt_t map_opt
|
||||||
|
|
||||||
def __cinit__(self, fn_idx_in, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None):
|
def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None):
|
||||||
cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options
|
cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options
|
||||||
if preset is not None:
|
if preset is not None:
|
||||||
cmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset
|
cmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset
|
||||||
|
|
@ -126,17 +126,24 @@ cdef class Aligner:
|
||||||
if bw is not None: self.map_opt.bw = bw
|
if bw is not None: self.map_opt.bw = bw
|
||||||
if best_n is not None: self.map_opt.best_n = best_n
|
if best_n is not None: self.map_opt.best_n = best_n
|
||||||
if max_frag_len is not None: self.map_opt.max_frag_len = max_frag_len
|
if max_frag_len is not None: self.map_opt.max_frag_len = max_frag_len
|
||||||
|
if extra_flags is not None: self.map_opt.flag |= extra_flags
|
||||||
|
|
||||||
cdef cmappy.mm_idx_reader_t *r;
|
cdef cmappy.mm_idx_reader_t *r;
|
||||||
if fn_idx_out is None:
|
|
||||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL)
|
if seq is None:
|
||||||
|
if fn_idx_out is None:
|
||||||
|
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL)
|
||||||
|
else:
|
||||||
|
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out)
|
||||||
|
if r is not NULL:
|
||||||
|
self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part
|
||||||
|
cmappy.mm_idx_reader_close(r)
|
||||||
|
cmappy.mm_mapopt_update(&self.map_opt, self._idx)
|
||||||
|
cmappy.mm_idx_index_name(self._idx)
|
||||||
else:
|
else:
|
||||||
r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, fn_idx_out)
|
self._idx = cmappy.mappy_idx_seq(self.idx_opt.w, self.idx_opt.k, self.idx_opt.flag&1, self.idx_opt.bucket_bits, str.encode(seq), len(seq))
|
||||||
if r is not NULL:
|
|
||||||
self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part
|
|
||||||
cmappy.mm_idx_reader_close(r)
|
|
||||||
cmappy.mm_mapopt_update(&self.map_opt, self._idx)
|
cmappy.mm_mapopt_update(&self.map_opt, self._idx)
|
||||||
cmappy.mm_idx_index_name(self._idx)
|
self.map_opt.mid_occ = 1000 # don't filter high-occ seeds
|
||||||
|
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
if self._idx is not NULL:
|
if self._idx is not NULL:
|
||||||
|
|
@ -145,7 +152,7 @@ cdef class Aligner:
|
||||||
def __bool__(self):
|
def __bool__(self):
|
||||||
return (self._idx != NULL)
|
return (self._idx != NULL)
|
||||||
|
|
||||||
def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None):
|
def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
|
||||||
cdef cmappy.mm_reg1_t *regs
|
cdef cmappy.mm_reg1_t *regs
|
||||||
cdef cmappy.mm_hitpy_t h
|
cdef cmappy.mm_hitpy_t h
|
||||||
cdef ThreadBuffer b
|
cdef ThreadBuffer b
|
||||||
|
|
@ -157,6 +164,7 @@ cdef class Aligner:
|
||||||
|
|
||||||
map_opt = self.map_opt
|
map_opt = self.map_opt
|
||||||
if max_frag_len is not None: map_opt.max_frag_len = max_frag_len
|
if max_frag_len is not None: map_opt.max_frag_len = max_frag_len
|
||||||
|
if extra_flags is not None: map_opt.flag |= extra_flags
|
||||||
|
|
||||||
if self._idx is NULL: return None
|
if self._idx is NULL: return None
|
||||||
if buf is None: b = ThreadBuffer()
|
if buf is None: b = ThreadBuffer()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue