redesigned indexing and option APIs
This commit is contained in:
parent
0f7455cefa
commit
eb00521d9b
44
index.c
44
index.c
|
|
@ -21,6 +21,15 @@ typedef khash_t(idx) idxhash_t;
|
|||
|
||||
#define kroundup64(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, (x)|=(x)>>32, ++(x))
|
||||
|
||||
void mm_idxopt_init(mm_idxopt_t *opt)
|
||||
{
|
||||
memset(opt, 0, sizeof(mm_idxopt_t));
|
||||
opt->k = 15, opt->w = 10, opt->is_hpc = 0;
|
||||
opt->bucket_bits = 14;
|
||||
opt->mini_batch_size = 50000000;
|
||||
opt->batch_size = 4000000000ULL;
|
||||
}
|
||||
|
||||
mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc)
|
||||
{
|
||||
mm_idx_t *mi;
|
||||
|
|
@ -314,7 +323,7 @@ mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads)
|
|||
mm_idx_t *mi;
|
||||
fp = mm_bseq_open(fn);
|
||||
if (fp == 0) return 0;
|
||||
mi = mm_idx_gen(fp, w, k, MM_IDX_DEF_B, is_hpc, 1<<18, n_threads, UINT64_MAX, 1);
|
||||
mi = mm_idx_gen(fp, w, k, 14, is_hpc, 1<<18, n_threads, UINT64_MAX, 1);
|
||||
mm_bseq_close(fp);
|
||||
return mi;
|
||||
}
|
||||
|
|
@ -429,3 +438,36 @@ int mm_idx_is_idx(const char *fn)
|
|||
close(fd);
|
||||
return is_idx;
|
||||
}
|
||||
|
||||
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt)
|
||||
{
|
||||
int is_idx;
|
||||
mm_idx_reader_t *r;
|
||||
is_idx = mm_idx_is_idx(fn);
|
||||
if (is_idx < 0) return 0; // failed to open the index
|
||||
r = (mm_idx_reader_t*)calloc(1, sizeof(mm_idx_reader_t));
|
||||
r->is_idx = is_idx;
|
||||
r->opt = *opt;
|
||||
if (r->is_idx) r->fp.idx = fopen(fn, "rb");
|
||||
else r->fp.seq = mm_bseq_open(fn);
|
||||
return r;
|
||||
}
|
||||
|
||||
void mm_idx_reader_close(mm_idx_reader_t *r)
|
||||
{
|
||||
if (r->is_idx) fclose(r->fp.idx);
|
||||
else mm_bseq_close(r->fp.seq);
|
||||
}
|
||||
|
||||
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
|
||||
{
|
||||
mm_idx_t *mi;
|
||||
if (r->is_idx) {
|
||||
mi = mm_idx_load(r->fp.idx);
|
||||
if (mi && mm_verbose >= 2 && (mi->k != r->opt.k || mi->w != r->opt.w || mi->is_hpc != r->opt.is_hpc))
|
||||
fprintf(stderr, "[WARNING] Indexing parameters (-k, -w or -H) overridden by parameters used in the prebuilt index.\n");
|
||||
} else
|
||||
mi = mm_idx_gen(r->fp.seq, r->opt.w, r->opt.k, r->opt.bucket_bits, r->opt.is_hpc, r->opt.mini_batch_size, n_threads, r->opt.batch_size, 1);
|
||||
if (mi) ++r->n_parts;
|
||||
return mi;
|
||||
}
|
||||
|
|
|
|||
103
main.c
103
main.c
|
|
@ -25,7 +25,7 @@ void liftrlimit() {}
|
|||
static struct option long_options[] = {
|
||||
{ "bucket-bits", required_argument, 0, 0 },
|
||||
{ "mb-size", required_argument, 0, 'K' },
|
||||
{ "int-rname", no_argument, 0, 0 },
|
||||
{ "int-rname", no_argument, 0, 0 }, // obsolete; kept as a placeholder
|
||||
{ "no-kalloc", no_argument, 0, 0 },
|
||||
{ "print-qname", no_argument, 0, 0 },
|
||||
{ "no-self", no_argument, 0, 0 },
|
||||
|
|
@ -62,21 +62,21 @@ static inline int64_t mm_parse_num(const char *str)
|
|||
int main(int argc, char *argv[])
|
||||
{
|
||||
mm_mapopt_t opt;
|
||||
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx, idx_par_set = 0, max_intron_len = 0, n_idx_part = 0;
|
||||
int minibatch_size = 200000000;
|
||||
uint64_t batch_size = 4000000000ULL;
|
||||
mm_bseq_file_t *fp = 0;
|
||||
mm_idxopt_t ipt;
|
||||
int i, c, n_threads = 3, long_idx, max_intron_len = 0;
|
||||
char *fnw = 0, *rg = 0, *s;
|
||||
FILE *fpr = 0, *fpw = 0, *fp_help = stderr;
|
||||
FILE *fpw = 0, *fp_help = stderr;
|
||||
mm_idx_reader_t *idx_rdr;
|
||||
|
||||
liftrlimit();
|
||||
mm_realtime0 = realtime();
|
||||
mm_mapopt_init(&opt);
|
||||
mm_idxopt_init(&ipt);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:h", long_options, &long_idx)) >= 0) {
|
||||
if (c == 'w') w = atoi(optarg), idx_par_set = 1;
|
||||
else if (c == 'k') k = atoi(optarg), idx_par_set = 1;
|
||||
else if (c == 'H') is_hpc = 1, idx_par_set = 1;
|
||||
if (c == 'w') ipt.w = atoi(optarg);
|
||||
else if (c == 'k') ipt.k = atoi(optarg);
|
||||
else if (c == 'H') ipt.is_hpc = 1;
|
||||
else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I
|
||||
else if (c == 'r') opt.bw = (int)mm_parse_num(optarg);
|
||||
else if (c == 't') n_threads = atoi(optarg);
|
||||
|
|
@ -98,12 +98,11 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'B') opt.b = atoi(optarg);
|
||||
else if (c == 'z') opt.zdrop = atoi(optarg);
|
||||
else if (c == 's') opt.min_dp_max = atoi(optarg);
|
||||
else if (c == 'I') batch_size = mm_parse_num(optarg);
|
||||
else if (c == 'K') minibatch_size = (int)mm_parse_num(optarg);
|
||||
else if (c == 'I') ipt.batch_size = mm_parse_num(optarg);
|
||||
else if (c == 'K') ipt.mini_batch_size = (int)mm_parse_num(optarg);
|
||||
else if (c == 'R') rg = optarg;
|
||||
else if (c == 'h') fp_help = stdout;
|
||||
else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // --bucket-bits
|
||||
else if (c == 0 && long_idx == 2) keep_name = 0; // --int-rname
|
||||
else if (c == 0 && long_idx == 0) ipt.bucket_bits = atoi(optarg); // --bucket-bits
|
||||
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
|
||||
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
|
||||
else if (c == 0 && long_idx == 5) opt.flag |= MM_F_NO_SELF; // --no-self
|
||||
|
|
@ -140,55 +139,12 @@ int main(int argc, char *argv[])
|
|||
opt.e = opt.e2 = strtol(optarg, &s, 10);
|
||||
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
|
||||
} else if (c == 'x') {
|
||||
if (strcmp(optarg, "ava-ont") == 0) {
|
||||
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
|
||||
minibatch_size = 500000000;
|
||||
k = 15, w = 5;
|
||||
} else if (strcmp(optarg, "ava-pb") == 0) {
|
||||
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.max_gap = 10000, opt.max_chain_skip = 25;
|
||||
minibatch_size = 500000000;
|
||||
is_hpc = 1, k = 19, w = 5;
|
||||
} else if (strcmp(optarg, "map10k") == 0 || strcmp(optarg, "map-pb") == 0) {
|
||||
is_hpc = 1, k = 19;
|
||||
} else if (strcmp(optarg, "map-ont") == 0) {
|
||||
is_hpc = 0, k = 15;
|
||||
} else if (strcmp(optarg, "asm5") == 0) {
|
||||
k = 19, w = 19;
|
||||
opt.a = 1, opt.b = 19, opt.q = 39, opt.q2 = 81, opt.e = 3, opt.e2 = 1, opt.zdrop = 200;
|
||||
opt.min_dp_max = 200;
|
||||
} else if (strcmp(optarg, "asm10") == 0) {
|
||||
k = 19, w = 19;
|
||||
opt.a = 1, opt.b = 9, opt.q = 16, opt.q2 = 41, opt.e = 2, opt.e2 = 1, opt.zdrop = 200;
|
||||
opt.min_dp_max = 200;
|
||||
} else if (strcmp(optarg, "short") == 0 || strcmp(optarg, "sr") == 0) {
|
||||
k = 21, w = 11, is_hpc = 0;
|
||||
minibatch_size = 50000000;
|
||||
opt.flag |= MM_F_APPROX_EXT;
|
||||
opt.a = 2, opt.b = 8, opt.q = 12, opt.e = 2, opt.q2 = 32, opt.e2 = 1;
|
||||
opt.max_gap = 100;
|
||||
opt.pri_ratio = 0.5f;
|
||||
opt.min_cnt = 2;
|
||||
opt.min_chain_score = 20;
|
||||
opt.min_dp_max = 40;
|
||||
opt.best_n = 20;
|
||||
opt.bw = 50;
|
||||
opt.mid_occ = 1000;
|
||||
} else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) {
|
||||
k = 15, w = 5;
|
||||
opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;
|
||||
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 200000;
|
||||
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0;
|
||||
opt.noncan = 5;
|
||||
opt.zdrop = 200;
|
||||
} else {
|
||||
if (mm_preset(optarg, &ipt, &opt) < 0) {
|
||||
fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (w < 0) w = (int)(.6666667 * k + .499);
|
||||
if ((opt.flag & MM_F_SPLICE) && max_intron_len > 0)
|
||||
opt.max_gap_ref = opt.bw = max_intron_len;
|
||||
|
||||
|
|
@ -197,8 +153,8 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, "Options:\n");
|
||||
fprintf(fp_help, " Indexing:\n");
|
||||
fprintf(fp_help, " -H use homopolymer-compressed k-mer\n");
|
||||
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", k);
|
||||
fprintf(fp_help, " -w INT minizer window size [{-k}*2/3]\n");
|
||||
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
||||
fprintf(fp_help, " -w INT minizer window size [%d]\n", ipt.w);
|
||||
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
|
||||
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
||||
fprintf(fp_help, " Mapping:\n");
|
||||
|
|
@ -227,7 +183,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " -c output CIGAR in PAF\n");
|
||||
fprintf(fp_help, " -S output the cs tag in PAF (cs encodes both query and ref sequences)\n");
|
||||
fprintf(fp_help, " -t INT number of threads [%d]\n", n_threads);
|
||||
fprintf(fp_help, " -K NUM minibatch size [200M]\n");
|
||||
fprintf(fp_help, " -K NUM minibatch size for mapping [200M]\n");
|
||||
// fprintf(fp_help, " -v INT verbose level [%d]\n", mm_verbose);
|
||||
fprintf(fp_help, " --version show version number\n");
|
||||
fprintf(fp_help, " Preset:\n");
|
||||
|
|
@ -244,33 +200,22 @@ int main(int argc, char *argv[])
|
|||
return fp_help == stdout? 0 : 1;
|
||||
}
|
||||
|
||||
is_idx = mm_idx_is_idx(argv[optind]);
|
||||
if (is_idx < 0) {
|
||||
idx_rdr = mm_idx_reader_open(argv[optind], &ipt);
|
||||
if (idx_rdr == 0) {
|
||||
fprintf(stderr, "[ERROR] failed to open file '%s'\n", argv[optind]);
|
||||
return 1;
|
||||
}
|
||||
if (!is_idx && fnw == 0 && argc - optind < 2) {
|
||||
if (!idx_rdr->is_idx && fnw == 0 && argc - optind < 2) {
|
||||
fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n");
|
||||
return 1;
|
||||
}
|
||||
if (is_idx) fpr = fopen(argv[optind], "rb");
|
||||
else fp = mm_bseq_open(argv[optind]);
|
||||
if (fnw) fpw = fopen(fnw, "wb");
|
||||
if (opt.flag & MM_F_OUT_SAM)
|
||||
mm_write_sam_hdr_no_SQ(rg, MM_VERSION, argc, argv);
|
||||
for (;;) {
|
||||
mm_idx_t *mi;
|
||||
if (fpr) {
|
||||
mi = mm_idx_load(fpr);
|
||||
if (mi == 0) break;
|
||||
if (idx_par_set && mm_verbose >= 2 && (mi->k != k || mi->w != w || mi->is_hpc != is_hpc))
|
||||
fprintf(stderr, "[WARNING] \033[1;31mIndexing parameters on the command line (-k/-w/-H) overridden by parameters in the prebuilt index.\033[0m\n");
|
||||
} else {
|
||||
mi = mm_idx_gen(fp, w, k, bucket_bits, is_hpc, minibatch_size, n_threads, batch_size, keep_name);
|
||||
}
|
||||
if (mi == 0) break;
|
||||
++n_idx_part;
|
||||
if (mm_verbose >= 2 && n_idx_part > 1 && (opt.flag&MM_F_OUT_SAM) && !(opt.flag&MM_F_NO_SAM_SQ))
|
||||
if ((mi = mm_idx_reader_read(idx_rdr, n_threads)) == 0) break;
|
||||
if (mm_verbose >= 2 && idx_rdr->n_parts > 1 && (opt.flag&MM_F_OUT_SAM) && !(opt.flag&MM_F_NO_SAM_SQ))
|
||||
fprintf(stderr, "[WARNING] \033[1;31mSAM output is malformated due to internal @SQ lines. Please add option --no-sam-sq or filter afterwards.\033[0m\n");
|
||||
if (mm_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
|
||||
|
|
@ -278,17 +223,15 @@ int main(int argc, char *argv[])
|
|||
if (fpw) {
|
||||
mm_idx_dump(fpw, mi);
|
||||
if (mm_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s::%.3f*%.2f] dumpped the (partial) index to disk\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
|
||||
fprintf(stderr, "[M::%s::%.3f*%.2f] dumpped the current part of the index to disk\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
|
||||
}
|
||||
if (argc != optind + 1) mm_mapopt_update(&opt, mi);
|
||||
if (mm_verbose >= 3) mm_idx_stat(mi);
|
||||
for (i = optind + 1; i < argc; ++i)
|
||||
mm_map_file(mi, argv[i], &opt, n_threads, minibatch_size);
|
||||
mm_map_file(mi, argv[i], &opt, n_threads);
|
||||
mm_idx_destroy(mi);
|
||||
}
|
||||
if (fpw) fclose(fpw);
|
||||
if (fpr) fclose(fpr);
|
||||
if (fp) mm_bseq_close(fp);
|
||||
|
||||
fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
|
||||
fprintf(stderr, "[M::%s] CMD:", __func__);
|
||||
|
|
|
|||
53
map.c
53
map.c
|
|
@ -34,6 +34,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->zdrop = 400;
|
||||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||
opt->min_ksw_len = 200;
|
||||
opt->mini_batch_size = 200000000;
|
||||
}
|
||||
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||
|
|
@ -49,6 +50,54 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
|||
opt->mid_occ, opt->max_occ);
|
||||
}
|
||||
|
||||
int mm_preset(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
||||
{
|
||||
if (strcmp(preset, "ava-ont") == 0) {
|
||||
io->is_hpc = 0, io->k = 15, io->w = 5;
|
||||
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||
mo->mini_batch_size = 500000000;
|
||||
} else if (strcmp(preset, "ava-pb") == 0) {
|
||||
io->is_hpc = 1, io->k = 19, io->w = 5;
|
||||
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||
mo->mini_batch_size = 500000000;
|
||||
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
||||
io->is_hpc = 1, io->k = 19;
|
||||
} else if (strcmp(preset, "map-ont") == 0) {
|
||||
io->is_hpc = 0, io->k = 15;
|
||||
} else if (strcmp(preset, "asm5") == 0) {
|
||||
io->is_hpc = 0, io->k = 19, io->w = 19;
|
||||
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = 200;
|
||||
mo->min_dp_max = 200;
|
||||
} else if (strcmp(preset, "asm10") == 0) {
|
||||
io->is_hpc = 0, io->k = 19, io->w = 19;
|
||||
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = 200;
|
||||
mo->min_dp_max = 200;
|
||||
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
|
||||
io->is_hpc = 0, io->k = 21, io->w = 11;
|
||||
mo->flag |= MM_F_APPROX_EXT;
|
||||
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1;
|
||||
mo->max_gap = 100;
|
||||
mo->pri_ratio = 0.5f;
|
||||
mo->min_cnt = 2;
|
||||
mo->min_chain_score = 20;
|
||||
mo->min_dp_max = 40;
|
||||
mo->best_n = 20;
|
||||
mo->bw = 50;
|
||||
mo->mid_occ = 1000;
|
||||
mo->mini_batch_size = 50000000;
|
||||
} else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) {
|
||||
io->is_hpc = 0, io->k = 15, io->w = 5;
|
||||
mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;
|
||||
mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000;
|
||||
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
|
||||
mo->noncan = 5;
|
||||
mo->zdrop = 200;
|
||||
} else return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t n;
|
||||
uint32_t qpos;
|
||||
|
|
@ -304,7 +353,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads, int mini_batch_size)
|
||||
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads)
|
||||
{
|
||||
pipeline_t pl;
|
||||
memset(&pl, 0, sizeof(pipeline_t));
|
||||
|
|
@ -315,7 +364,7 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int
|
|||
return -1;
|
||||
}
|
||||
pl.opt = opt, pl.mi = idx;
|
||||
pl.n_threads = n_threads, pl.mini_batch_size = mini_batch_size;
|
||||
pl.n_threads = n_threads, pl.mini_batch_size = opt->mini_batch_size;
|
||||
if ((opt->flag & MM_F_OUT_SAM) && !(opt->flag & MM_F_NO_SAM_SQ))
|
||||
mm_write_sam_SQ(idx);
|
||||
kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3);
|
||||
|
|
|
|||
47
minimap.h
47
minimap.h
|
|
@ -5,8 +5,6 @@
|
|||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define MM_IDX_DEF_B 14
|
||||
|
||||
#define MM_F_NO_SELF 0x001
|
||||
#define MM_F_AVA 0x002
|
||||
#define MM_F_CIGAR 0x004
|
||||
|
|
@ -105,46 +103,65 @@ typedef struct {
|
|||
int min_dp_max;
|
||||
int min_ksw_len;
|
||||
|
||||
int mini_batch_size;
|
||||
int32_t max_occ, mid_occ;
|
||||
} mm_mapopt_t;
|
||||
|
||||
typedef struct {
|
||||
short k, w, is_hpc, bucket_bits;
|
||||
int mini_batch_size;
|
||||
uint64_t batch_size;
|
||||
} mm_idxopt_t;
|
||||
|
||||
struct mm_bseq_file_s;
|
||||
|
||||
typedef struct {
|
||||
int is_idx, n_parts;
|
||||
mm_idxopt_t opt;
|
||||
union {
|
||||
struct mm_bseq_file_s *seq;
|
||||
FILE *idx;
|
||||
} fp;
|
||||
} mm_idx_reader_t;
|
||||
|
||||
extern int mm_verbose, mm_dbg_flag;
|
||||
extern double mm_realtime0;
|
||||
|
||||
struct mm_tbuf_s;
|
||||
typedef struct mm_tbuf_s mm_tbuf_t;
|
||||
|
||||
struct mm_bseq_file_s;
|
||||
|
||||
#define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2))
|
||||
#define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf)
|
||||
|
||||
// compute minimizers
|
||||
void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
|
||||
void mm_idxopt_init(mm_idxopt_t *opt);
|
||||
void mm_mapopt_init(mm_mapopt_t *opt);
|
||||
int mm_preset(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
|
||||
|
||||
mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt);
|
||||
int mm_idx_reader_is_idx(const mm_idx_reader_t *r);
|
||||
mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
|
||||
void mm_idx_reader_close(mm_idx_reader_t *r);
|
||||
void mm_idx_destroy(mm_idx_t *mi);
|
||||
|
||||
// minimizer indexing
|
||||
mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc);
|
||||
void mm_idx_destroy(mm_idx_t *mi);
|
||||
mm_idx_t *mm_idx_gen(struct mm_bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name);
|
||||
void mm_idx_stat(const mm_idx_t *idx);
|
||||
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
||||
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
||||
|
||||
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);
|
||||
int mm_idx_is_idx(const char *fn);
|
||||
|
||||
// minimizer index I/O
|
||||
void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
|
||||
mm_idx_t *mm_idx_load(FILE *fp);
|
||||
|
||||
// mapping
|
||||
void mm_mapopt_init(mm_mapopt_t *opt);
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
|
||||
mm_tbuf_t *mm_tbuf_init(void);
|
||||
void mm_tbuf_destroy(mm_tbuf_t *b);
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
|
||||
|
||||
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads, int tbatch_size);
|
||||
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads);
|
||||
|
||||
// obsolete APIs (for backward compatibility)
|
||||
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
2
mmpriv.h
2
mmpriv.h
|
|
@ -40,6 +40,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end);
|
|||
void radix_sort_64(uint64_t *beg, uint64_t *end);
|
||||
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
|
||||
|
||||
void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
|
||||
|
||||
void mm_write_sam_SQ(const mm_idx_t *idx);
|
||||
void mm_write_sam_hdr_no_SQ(const char *rg, const char *ver, int argc, char *argv[]);
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag);
|
||||
|
|
|
|||
Loading…
Reference in New Issue