From d2e14705e7742329fca0191162ccccfe6906299d Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 18 Jan 2020 12:24:44 -0500 Subject: [PATCH] r968: allow large mini_batch; resolves #491 --- bseq.c | 10 +++++----- bseq.h | 10 +++++----- main.c | 5 +++-- map.c | 3 ++- minimap.h | 4 ++-- python/cmappy.pxd | 4 ++-- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/bseq.c b/bseq.c index 2b890cd..815fca1 100644 --- a/bseq.c +++ b/bseq.c @@ -77,7 +77,7 @@ static inline void kseq2bseq(kseq_t *ks, mm_bseq1_t *s, int with_qual, int with_ s->l_seq = ks->seq.l; } -mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int with_comment, int frag_mode, int *n_) +mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_) { int64_t size = 0; int ret; @@ -116,17 +116,17 @@ mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int return a.a; } -mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int chunk_size, int with_qual, int frag_mode, int *n_) +mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_) { return mm_bseq_read3(fp, chunk_size, with_qual, 0, frag_mode, n_); } -mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_) +mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_) { return mm_bseq_read2(fp, chunk_size, with_qual, 0, n_); } -mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int with_comment, int *n_) +mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_) { int i; int64_t size = 0; @@ -156,7 +156,7 @@ mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, in return a.a; } -mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int *n_) +mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_) { return mm_bseq_read_frag2(n_fp, fp, chunk_size, with_qual, 0, n_); } diff --git a/bseq.h b/bseq.h index 2f25d3a..c0bdc63 100644 --- a/bseq.h +++ b/bseq.h @@ -18,11 +18,11 @@ typedef struct { mm_bseq_file_t *mm_bseq_open(const char *fn); void mm_bseq_close(mm_bseq_file_t *fp); -mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int with_comment, int frag_mode, int *n_); -mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int chunk_size, int with_qual, int frag_mode, int *n_); -mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_); -mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int with_comment, int *n_); -mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int *n_); +mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_); +mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_); +mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_); +mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_); +mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_); int mm_bseq_eof(mm_bseq_file_t *fp); extern unsigned char seq_nt4_table[256]; diff --git a/main.c b/main.c index e3b8387..1eb677d 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.17-r967-dirty" +#define MM_VERSION "2.17-r968-dirty" #ifdef __linux__ #include @@ -167,7 +167,7 @@ int main(int argc, char *argv[]) else if (c == 's') opt.min_dp_max = atoi(o.arg); else if (c == 'C') opt.noncan = atoi(o.arg); else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg); - else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(o.arg); + else if (c == 'K') opt.mini_batch_size = mm_parse_num(o.arg); else if (c == 'R') rg = o.arg; else if (c == 'h') fp_help = stdout; else if (c == '2') opt.flag |= MM_F_2_IO_THREADS; @@ -382,6 +382,7 @@ int main(int argc, char *argv[]) if (argc != o.ind + 1) mm_mapopt_update(&opt, mi); if (mm_verbose >= 3) mm_idx_stat(mi); if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1); + ret = 0; if (!(opt.flag & MM_F_FRAG_MODE)) { for (i = o.ind + 1; i < argc; ++i) { ret = mm_map_file(mi, argv[i], &opt, n_threads); diff --git a/map.c b/map.c index 8cfd8f4..d924b7d 100644 --- a/map.c +++ b/map.c @@ -399,7 +399,8 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm **************************/ typedef struct { - int mini_batch_size, n_processed, n_threads, n_fp; + int n_processed, n_threads, n_fp; + int64_t mini_batch_size; const mm_mapopt_t *opt; mm_bseq_file_t **fp; const mm_idx_t *mi; diff --git a/minimap.h b/minimap.h index 8837fdb..5da9c8b 100644 --- a/minimap.h +++ b/minimap.h @@ -100,7 +100,7 @@ typedef struct { // indexing and mapping options typedef struct { short k, w, flag, bucket_bits; - int mini_batch_size; + int64_t mini_batch_size; uint64_t batch_size; } mm_idxopt_t; @@ -144,7 +144,7 @@ typedef struct { int32_t min_mid_occ; int32_t mid_occ; // ignore seeds with occurrences above this threshold int32_t max_occ; - int mini_batch_size; // size of a batch of query bases to process in parallel + int64_t mini_batch_size; // size of a batch of query bases to process in parallel int64_t max_sw_mat; const char *split_prefix; diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 8564ac0..a9953bd 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -6,7 +6,7 @@ cdef extern from "minimap.h": # ctypedef struct mm_idxopt_t: short k, w, flag, bucket_bits - int mini_batch_size + int64_t mini_batch_size uint64_t batch_size ctypedef struct mm_mapopt_t: @@ -42,7 +42,7 @@ cdef extern from "minimap.h": int32_t min_mid_occ int32_t mid_occ int32_t max_occ - int mini_batch_size + int64_t mini_batch_size int64_t max_sw_mat const char *split_prefix