r968: allow large mini_batch; resolves #491

This commit is contained in:
Heng Li 2020-01-18 12:24:44 -05:00
parent 24f50f38e8
commit d2e14705e7
6 changed files with 19 additions and 17 deletions

10
bseq.c
View File

@ -77,7 +77,7 @@ static inline void kseq2bseq(kseq_t *ks, mm_bseq1_t *s, int with_qual, int with_
s->l_seq = ks->seq.l;
}
mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int with_comment, int frag_mode, int *n_)
mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_)
{
int64_t size = 0;
int ret;
@ -116,17 +116,17 @@ mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int
return a.a;
}
mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int chunk_size, int with_qual, int frag_mode, int *n_)
mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_)
{
return mm_bseq_read3(fp, chunk_size, with_qual, 0, frag_mode, n_);
}
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_)
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_)
{
return mm_bseq_read2(fp, chunk_size, with_qual, 0, n_);
}
mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int with_comment, int *n_)
mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_)
{
int i;
int64_t size = 0;
@ -156,7 +156,7 @@ mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, in
return a.a;
}
mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int *n_)
mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_)
{
return mm_bseq_read_frag2(n_fp, fp, chunk_size, with_qual, 0, n_);
}

10
bseq.h
View File

@ -18,11 +18,11 @@ typedef struct {
mm_bseq_file_t *mm_bseq_open(const char *fn);
void mm_bseq_close(mm_bseq_file_t *fp);
mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int chunk_size, int with_qual, int with_comment, int frag_mode, int *n_);
mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int chunk_size, int with_qual, int frag_mode, int *n_);
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_);
mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int with_comment, int *n_);
mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int chunk_size, int with_qual, int *n_);
mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_);
mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_);
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_);
mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_);
mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_);
int mm_bseq_eof(mm_bseq_file_t *fp);
extern unsigned char seq_nt4_table[256];

5
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.17-r967-dirty"
#define MM_VERSION "2.17-r968-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -167,7 +167,7 @@ int main(int argc, char *argv[])
else if (c == 's') opt.min_dp_max = atoi(o.arg);
else if (c == 'C') opt.noncan = atoi(o.arg);
else if (c == 'I') ipt.batch_size = mm_parse_num(o.arg);
else if (c == 'K') opt.mini_batch_size = (int)mm_parse_num(o.arg);
else if (c == 'K') opt.mini_batch_size = mm_parse_num(o.arg);
else if (c == 'R') rg = o.arg;
else if (c == 'h') fp_help = stdout;
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
@ -382,6 +382,7 @@ int main(int argc, char *argv[])
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
if (mm_verbose >= 3) mm_idx_stat(mi);
if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1);
ret = 0;
if (!(opt.flag & MM_F_FRAG_MODE)) {
for (i = o.ind + 1; i < argc; ++i) {
ret = mm_map_file(mi, argv[i], &opt, n_threads);

3
map.c
View File

@ -399,7 +399,8 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm
**************************/
typedef struct {
int mini_batch_size, n_processed, n_threads, n_fp;
int n_processed, n_threads, n_fp;
int64_t mini_batch_size;
const mm_mapopt_t *opt;
mm_bseq_file_t **fp;
const mm_idx_t *mi;

View File

@ -100,7 +100,7 @@ typedef struct {
// indexing and mapping options
typedef struct {
short k, w, flag, bucket_bits;
int mini_batch_size;
int64_t mini_batch_size;
uint64_t batch_size;
} mm_idxopt_t;
@ -144,7 +144,7 @@ typedef struct {
int32_t min_mid_occ;
int32_t mid_occ; // ignore seeds with occurrences above this threshold
int32_t max_occ;
int mini_batch_size; // size of a batch of query bases to process in parallel
int64_t mini_batch_size; // size of a batch of query bases to process in parallel
int64_t max_sw_mat;
const char *split_prefix;

View File

@ -6,7 +6,7 @@ cdef extern from "minimap.h":
#
ctypedef struct mm_idxopt_t:
short k, w, flag, bucket_bits
int mini_batch_size
int64_t mini_batch_size
uint64_t batch_size
ctypedef struct mm_mapopt_t:
@ -42,7 +42,7 @@ cdef extern from "minimap.h":
int32_t min_mid_occ
int32_t mid_occ
int32_t max_occ
int mini_batch_size
int64_t mini_batch_size
int64_t max_sw_mat
const char *split_prefix