diff --git a/align.c b/align.c index 808804b..4bcc439 100644 --- a/align.c +++ b/align.c @@ -222,7 +222,7 @@ static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x static inline void mm_adjust_minier(const mm_idx_t *mi, uint8_t *const qseq0[2], mm128_t *a, int32_t *r, int32_t *q) { - if (mi->is_hpc) { + if (mi->flag & MM_I_HPC) { const uint8_t *qseq = qseq0[a->x>>63]; int i, c; *q = (int32_t)a->y; @@ -351,14 +351,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int int32_t rs1, qs1, re1, qe1; int8_t mat[25]; - if (is_sr) assert(!mi->is_hpc); // HPC won't work with SR because with HPC we can't easily tell if there is a gap + if (is_sr) assert(!(mi->flag & MM_I_HPC)); // HPC won't work with SR because with HPC we can't easily tell if there is a gap r2->cnt = 0; if (r->cnt == 0) return; ksw_gen_simple_mat(5, mat, opt->a, opt->b); bw = (int)(opt->bw * 1.5 + 1.); - if (is_sr && !mi->is_hpc) { + if (is_sr && !(mi->flag & MM_I_HPC)) { mm_max_stretch(opt, r, a, &as1, &cnt1); rs = (int32_t)a[as1].x + 1 - (int32_t)(a[as1].y>>32&0xff); qs = (int32_t)a[as1].y + 1 - (int32_t)(a[as1].y>>32&0xff); @@ -470,7 +470,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int for (i = is_sr? cnt1 - 1 : 1; i < cnt1; ++i) { // gap filling if ((a[as1+i].y & (MM_SEED_IGNORE|MM_SEED_TANDEM)) && i != cnt1 - 1) continue; - if (is_sr && !mi->is_hpc) { + if (is_sr && !(mi->flag & MM_I_HPC)) { re = (int32_t)a[as1 + i].x + 1; qe = (int32_t)a[as1 + i].y + 1; } else mm_adjust_minier(mi, qseq0, &a[as1 + i], &re, &qe); diff --git a/index.c b/index.c index 93c5e21..3766fb7 100644 --- a/index.c +++ b/index.c @@ -31,19 +31,19 @@ typedef struct mm_idx_bucket_s { void mm_idxopt_init(mm_idxopt_t *opt) { memset(opt, 0, sizeof(mm_idxopt_t)); - opt->k = 15, opt->w = 10, opt->is_hpc = 0; + opt->k = 15, opt->w = 10, opt->flag = 0; opt->bucket_bits = 14; opt->mini_batch_size = 50000000; opt->batch_size = 4000000000ULL; } -mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc) +mm_idx_t *mm_idx_init(int w, int k, int b, int flag) { mm_idx_t *mi; if (k*2 < b) b = k * 2; if (w < 1) w = 1; mi = (mm_idx_t*)calloc(1, sizeof(mm_idx_t)); - mi->w = w, mi->k = k, mi->b = b, mi->is_hpc = is_hpc; + mi->w = w, mi->k = k, mi->b = b, mi->flag = flag; mi->B = (mm_idx_bucket_t*)calloc(1<km = km_init(); return mi; @@ -89,7 +89,7 @@ void mm_idx_stat(const mm_idx_t *mi) { int i, n = 0, n1 = 0; uint64_t sum = 0, len = 0; - fprintf(stderr, "[M::%s] kmer size: %d; skip: %d; is_HPC: %d; #seq: %d\n", __func__, mi->k, mi->w, mi->is_hpc, mi->n_seq); + fprintf(stderr, "[M::%s] kmer size: %d; skip: %d; is_hpc: %d; #seq: %d\n", __func__, mi->k, mi->w, mi->flag&MM_I_HPC, mi->n_seq); for (i = 0; i < mi->n_seq; ++i) len += mi->seq[i].len; for (i = 0; i < 1<b; ++i) @@ -214,7 +214,7 @@ static void mm_idx_post(mm_idx_t *mi, int n_threads) #include "bseq.h" typedef struct { - int mini_batch_size, keep_name; + int mini_batch_size; uint64_t batch_size, sum_len; mm_bseq_file_t *fp; mm_idx_t *mi; @@ -266,7 +266,7 @@ static void *worker_pipeline(void *shared, int step, void *in) for (i = 0; i < s->n_seq; ++i) { mm_idx_seq_t *seq = &p->mi->seq[p->mi->n_seq]; uint32_t j; - if (p->keep_name) { + if (!(p->mi->flag & MM_I_NO_NAME)) { seq->name = (char*)kmalloc(p->mi->km, strlen(s->seq[i].name) + 1); strcpy(seq->name, s->seq[i].name); } else seq->name = 0; @@ -288,7 +288,7 @@ static void *worker_pipeline(void *shared, int step, void *in) step_t *s = (step_t*)in; for (i = 0; i < s->n_seq; ++i) { mm_bseq1_t *t = &s->seq[i]; - mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->is_hpc, &s->a); + mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->flag&MM_I_HPC, &s->a); free(t->seq); free(t->name); } free(s->seq); s->seq = 0; @@ -301,16 +301,15 @@ static void *worker_pipeline(void *shared, int step, void *in) return 0; } -mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name) +mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini_batch_size, int n_threads, uint64_t batch_size) { pipeline_t pl; if (fp == 0 || mm_bseq_eof(fp)) return 0; memset(&pl, 0, sizeof(pipeline_t)); pl.mini_batch_size = mini_batch_size < batch_size? mini_batch_size : batch_size; - pl.keep_name = keep_name; pl.batch_size = batch_size; pl.fp = fp; - pl.mi = mm_idx_init(w, k, b, is_hpc); + pl.mi = mm_idx_init(w, k, b, flag); kt_pipeline(n_threads < 3? n_threads : 3, worker_pipeline, &pl, 3); if (mm_verbose >= 3) @@ -323,13 +322,13 @@ mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int is_hpc, int mi return pl.mi; } -mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads) // a simpler interface +mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads) // a simpler interface { mm_bseq_file_t *fp; mm_idx_t *mi; fp = mm_bseq_open(fn); if (fp == 0) return 0; - mi = mm_idx_gen(fp, w, k, 14, is_hpc, 1<<18, n_threads, UINT64_MAX, 1); + mi = mm_idx_gen(fp, w, k, 14, flag, 1<<18, n_threads, UINT64_MAX); mm_bseq_close(fp); return mi; } @@ -344,7 +343,7 @@ void mm_idx_dump(FILE *fp, const mm_idx_t *mi) uint32_t x[5]; int i; - x[0] = mi->w, x[1] = mi->k, x[2] = mi->b, x[3] = mi->n_seq, x[4] = mi->is_hpc; + x[0] = mi->w, x[1] = mi->k, x[2] = mi->b, x[3] = mi->n_seq, x[4] = mi->flag; fwrite(MM_IDX_MAGIC, 1, 4, fp); fwrite(x, 4, 5, fp); for (i = 0; i < mi->n_seq; ++i) { @@ -476,10 +475,10 @@ mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads) mm_idx_t *mi; if (r->is_idx) { mi = mm_idx_load(r->fp.idx); - if (mi && mm_verbose >= 2 && (mi->k != r->opt.k || mi->w != r->opt.w || mi->is_hpc != r->opt.is_hpc)) + if (mi && mm_verbose >= 2 && (mi->k != r->opt.k || mi->w != r->opt.w || (mi->flag&MM_I_HPC) != (r->opt.flag&MM_I_HPC))) fprintf(stderr, "[WARNING]\033[1;31m Indexing parameters (-k, -w or -H) overridden by parameters used in the prebuilt index.\033[0m\n"); } else - mi = mm_idx_gen(r->fp.seq, r->opt.w, r->opt.k, r->opt.bucket_bits, r->opt.is_hpc, r->opt.mini_batch_size, n_threads, r->opt.batch_size, 1); + mi = mm_idx_gen(r->fp.seq, r->opt.w, r->opt.k, r->opt.bucket_bits, r->opt.flag, r->opt.mini_batch_size, n_threads, r->opt.batch_size); if (mi) { if (r->fp_out) mm_idx_dump(r->fp_out, mi); ++r->n_parts; diff --git a/main.c b/main.c index c233b55..648b0df 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.5-r572" +#define MM_VERSION "2.5-r573-dirty" #ifdef __linux__ #include @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) while ((c = getopt_long(argc, argv, opt_str, long_options, &long_idx)) >= 0) { if (c == 'w') ipt.w = atoi(optarg); else if (c == 'k') ipt.k = atoi(optarg); - else if (c == 'H') ipt.is_hpc = 1; + else if (c == 'H') ipt.flag |= MM_I_HPC; else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I else if (c == 'r') opt.bw = (int)mm_parse_num(optarg); else if (c == 't') n_threads = atoi(optarg); diff --git a/map.c b/map.c index 086f357..6330f88 100644 --- a/map.c +++ b/map.c @@ -64,29 +64,29 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mm_idxopt_init(io); mm_mapopt_init(mo); } else if (strcmp(preset, "ava-ont") == 0) { - io->is_hpc = 0, io->k = 15, io->w = 5; + io->flag = 0, io->k = 15, io->w = 5; mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; } else if (strcmp(preset, "ava-pb") == 0) { - io->is_hpc = 1, io->k = 19, io->w = 5; + io->flag |= MM_I_HPC, io->k = 19, io->w = 5; mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; } else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) { - io->is_hpc = 1, io->k = 19; + io->flag |= MM_I_HPC, io->k = 19; } else if (strcmp(preset, "map-ont") == 0) { - io->is_hpc = 0, io->k = 15; + io->flag = 0, io->k = 15; } else if (strcmp(preset, "asm5") == 0) { - io->is_hpc = 0, io->k = 19, io->w = 19; + io->flag = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = 200; mo->min_dp_max = 200; mo->best_n = 50; } else if (strcmp(preset, "asm10") == 0) { - io->is_hpc = 0, io->k = 19, io->w = 19; + io->flag = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = 200; mo->min_dp_max = 200; mo->best_n = 50; } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { - io->is_hpc = 0, io->k = 21, io->w = 11; + io->flag = 0, io->k = 21, io->w = 11; mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS; mo->pe_ori = 0<<1|1; // FR mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 24, mo->e2 = 1; @@ -104,7 +104,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) mo->max_occ = 5000; mo->mini_batch_size = 50000000; } else if (strcmp(preset, "splice") == 0 || strcmp(preset, "cdna") == 0) { - io->is_hpc = 0, io->k = 15, io->w = 5; + io->flag = 0, io->k = 15, io->w = 5; mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK; mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000; mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0; @@ -173,7 +173,7 @@ static void collect_minimizers(const mm_mapopt_t *opt, const mm_idx_t *mi, int n int i, j, n, sum = 0; b->mini.n = 0; for (i = n = 0; i < n_segs; ++i) { - mm_sketch(b->km, seqs[i], qlens[i], mi->w, mi->k, i, mi->is_hpc, &b->mini); + mm_sketch(b->km, seqs[i], qlens[i], mi->w, mi->k, i, mi->flag&MM_I_HPC, &b->mini); for (j = n; j < b->mini.n; ++j) b->mini.a[j].y += sum << 1; if (opt->sdust_thres > 0) // mask low-complexity minimizers diff --git a/minimap.h b/minimap.h index 0eb279f..45a8f30 100644 --- a/minimap.h +++ b/minimap.h @@ -26,6 +26,10 @@ #define MM_F_SPLICE_FLANK 0x40000 #define MM_F_SOFTCLIP 0x80000 +#define MM_I_HPC 0x1 +#define MM_I_NO_SEQ 0x2 +#define MM_I_NO_NAME 0x4 + #define MM_IDX_MAGIC "MMI\2" #define MM_MAX_SEG 255 @@ -46,7 +50,7 @@ typedef struct { } mm_idx_seq_t; typedef struct { - int32_t b, w, k, is_hpc; + int32_t b, w, k, flag; uint32_t n_seq; // number of reference sequences mm_idx_seq_t *seq; // sequence name, length and offset uint32_t *S; // 4-bit packed sequence @@ -80,7 +84,7 @@ typedef struct { // indexing and mapping options typedef struct { - short k, w, is_hpc, bucket_bits; + short k, w, flag, bucket_bits; int mini_batch_size; uint64_t batch_size; } mm_idxopt_t; @@ -269,7 +273,7 @@ int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_ // deprecated APIs for backward compatibility void mm_mapopt_init(mm_mapopt_t *opt); -mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads); +mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads); #ifdef __cplusplus } diff --git a/python/cmappy.pxd b/python/cmappy.pxd index e3444a9..de969a4 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -5,7 +5,7 @@ cdef extern from "minimap.h": # Options # ctypedef struct mm_idxopt_t: - short k, w, is_hpc, bucket_bits + short k, w, flag, bucket_bits int mini_batch_size uint64_t batch_size @@ -51,7 +51,7 @@ cdef extern from "minimap.h": pass ctypedef struct mm_idx_t: - int32_t b, w, k, is_hpc + int32_t b, w, k, flag uint32_t n_seq mm_idx_seq_t *seq uint32_t *S