diff --git a/index.c b/index.c index 3766fb7..8495ebd 100644 --- a/index.c +++ b/index.c @@ -246,7 +246,6 @@ static void *worker_pipeline(void *shared, int step, void *in) s->seq = mm_bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch if (s->seq) { uint32_t old_m, m; - uint64_t sum_len, old_max_len, max_len; assert((uint64_t)p->mi->n_seq + s->n_seq <= UINT32_MAX); // to prevent integer overflow // make room for p->mi->seq old_m = p->mi->n_seq, m = p->mi->n_seq + s->n_seq; @@ -254,13 +253,16 @@ static void *worker_pipeline(void *shared, int step, void *in) if (old_m != m) p->mi->seq = (mm_idx_seq_t*)krealloc(p->mi->km, p->mi->seq, m * sizeof(mm_idx_seq_t)); // make room for p->mi->S - for (i = 0, sum_len = 0; i < s->n_seq; ++i) sum_len += s->seq[i].l_seq; - old_max_len = (p->sum_len + 7) / 8; - max_len = (p->sum_len + sum_len + 7) / 8; - kroundup64(old_max_len); kroundup64(max_len); - if (old_max_len != max_len) { - p->mi->S = (uint32_t*)realloc(p->mi->S, max_len * 4); - memset(&p->mi->S[old_max_len], 0, 4 * (max_len - old_max_len)); + if (!(p->mi->flag & MM_I_NO_SEQ)) { + uint64_t sum_len, old_max_len, max_len; + for (i = 0, sum_len = 0; i < s->n_seq; ++i) sum_len += s->seq[i].l_seq; + old_max_len = (p->sum_len + 7) / 8; + max_len = (p->sum_len + sum_len + 7) / 8; + kroundup64(old_max_len); kroundup64(max_len); + if (old_max_len != max_len) { + p->mi->S = (uint32_t*)realloc(p->mi->S, max_len * 4); + memset(&p->mi->S[old_max_len], 0, 4 * (max_len - old_max_len)); + } } // populate p->mi->seq for (i = 0; i < s->n_seq; ++i) { @@ -273,10 +275,12 @@ static void *worker_pipeline(void *shared, int step, void *in) seq->len = s->seq[i].l_seq; seq->offset = p->sum_len; // copy the sequence - for (j = 0; j < seq->len; ++j) { // TODO: this is not the fastest way, but let's first see if speed matters here - uint64_t o = p->sum_len + j; - int c = seq_nt4_table[(uint8_t)s->seq[i].seq[j]]; - mm_seq4_set(p->mi->S, o, c); + if (!(p->mi->flag & MM_I_NO_SEQ)) { + for (j = 0; j < seq->len; ++j) { // TODO: this is not the fastest way, but let's first see if speed matters here + uint64_t o = p->sum_len + j; + int c = seq_nt4_table[(uint8_t)s->seq[i].seq[j]]; + mm_seq4_set(p->mi->S, o, c); + } } // update p->sum_len and p->mi->n_seq p->sum_len += seq->len; @@ -370,7 +374,8 @@ void mm_idx_dump(FILE *fp, const mm_idx_t *mi) fwrite(x, 8, 2, fp); } } - fwrite(mi->S, 4, (sum_len + 7) / 8, fp); + if (!(mi->flag & MM_I_NO_SEQ)) + fwrite(mi->S, 4, (sum_len + 7) / 8, fp); fflush(fp); } @@ -420,8 +425,10 @@ mm_idx_t *mm_idx_load(FILE *fp) kh_val(h, k) = x[1]; } } - mi->S = (uint32_t*)malloc((sum_len + 7) / 8 * 4); - fread(mi->S, 4, (sum_len + 7) / 8, fp); + if (!(mi->flag & MM_I_NO_SEQ)) { + mi->S = (uint32_t*)malloc((sum_len + 7) / 8 * 4); + fread(mi->S, 4, (sum_len + 7) / 8, fp); + } return mi; } diff --git a/main.c b/main.c index 648b0df..74b0ef9 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.5-r573-dirty" +#define MM_VERSION "2.5-r574-dirty" #ifdef __linux__ #include @@ -43,6 +43,7 @@ static struct option long_options[] = { { "end-bonus", required_argument, 0, 0 }, { "no-pairing", no_argument, 0, 0 }, { "splice-flank", optional_argument, 0, 0 }, + { "idx-no-seq", no_argument, 0, 0 }, { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -138,6 +139,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr else if (c == 0 && long_idx ==17) opt.end_bonus = atoi(optarg); // --end-bonus else if (c == 0 && long_idx ==18) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing + else if (c == 0 && long_idx ==20) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq else if (c == 0 && long_idx == 14) { // --frag if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) opt.flag |= MM_F_FRAG_MODE; @@ -196,6 +198,8 @@ int main(int argc, char *argv[]) fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n"); return 1; } + if (!fnw && !(opt.flag&MM_F_CIGAR)) + ipt.flag |= MM_I_NO_SEQ; if (argc == optind || fp_help == stdout) { fprintf(fp_help, "Usage: minimap2 [options] | [query.fa] [...]\n"); @@ -259,11 +263,18 @@ int main(int argc, char *argv[]) } if (!idx_rdr->is_idx && fnw == 0 && argc - optind < 2) { fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n"); + mm_idx_reader_close(idx_rdr); return 1; } if (opt.best_n == 0 && (opt.flag&MM_F_CIGAR) && mm_verbose >= 2) fprintf(stderr, "[WARNING]\033[1;31m `-N 0' reduces alignment accuracy. Please use --secondary=no to suppress secondary alignments.\033[0m\n"); while ((mi = mm_idx_reader_read(idx_rdr, n_threads)) != 0) { + if ((opt.flag & MM_F_CIGAR) && (mi->flag & MM_I_NO_SEQ)) { + fprintf(stderr, "[ERROR] the prebuilt index doesn't contain sequences.\n"); + mm_idx_destroy(mi); + mm_idx_reader_close(idx_rdr); + return 1; + } if ((opt.flag & MM_F_OUT_SAM) && idx_rdr->n_parts == 1) { if (mm_idx_reader_eof(idx_rdr)) { mm_write_sam_hdr(mi, rg, MM_VERSION, argc, argv);