r574: build index without sequences
This commit is contained in:
parent
2f463b1db0
commit
131cfc6938
37
index.c
37
index.c
|
|
@ -246,7 +246,6 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
s->seq = mm_bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch
|
||||
if (s->seq) {
|
||||
uint32_t old_m, m;
|
||||
uint64_t sum_len, old_max_len, max_len;
|
||||
assert((uint64_t)p->mi->n_seq + s->n_seq <= UINT32_MAX); // to prevent integer overflow
|
||||
// make room for p->mi->seq
|
||||
old_m = p->mi->n_seq, m = p->mi->n_seq + s->n_seq;
|
||||
|
|
@ -254,13 +253,16 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
if (old_m != m)
|
||||
p->mi->seq = (mm_idx_seq_t*)krealloc(p->mi->km, p->mi->seq, m * sizeof(mm_idx_seq_t));
|
||||
// make room for p->mi->S
|
||||
for (i = 0, sum_len = 0; i < s->n_seq; ++i) sum_len += s->seq[i].l_seq;
|
||||
old_max_len = (p->sum_len + 7) / 8;
|
||||
max_len = (p->sum_len + sum_len + 7) / 8;
|
||||
kroundup64(old_max_len); kroundup64(max_len);
|
||||
if (old_max_len != max_len) {
|
||||
p->mi->S = (uint32_t*)realloc(p->mi->S, max_len * 4);
|
||||
memset(&p->mi->S[old_max_len], 0, 4 * (max_len - old_max_len));
|
||||
if (!(p->mi->flag & MM_I_NO_SEQ)) {
|
||||
uint64_t sum_len, old_max_len, max_len;
|
||||
for (i = 0, sum_len = 0; i < s->n_seq; ++i) sum_len += s->seq[i].l_seq;
|
||||
old_max_len = (p->sum_len + 7) / 8;
|
||||
max_len = (p->sum_len + sum_len + 7) / 8;
|
||||
kroundup64(old_max_len); kroundup64(max_len);
|
||||
if (old_max_len != max_len) {
|
||||
p->mi->S = (uint32_t*)realloc(p->mi->S, max_len * 4);
|
||||
memset(&p->mi->S[old_max_len], 0, 4 * (max_len - old_max_len));
|
||||
}
|
||||
}
|
||||
// populate p->mi->seq
|
||||
for (i = 0; i < s->n_seq; ++i) {
|
||||
|
|
@ -273,10 +275,12 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
seq->len = s->seq[i].l_seq;
|
||||
seq->offset = p->sum_len;
|
||||
// copy the sequence
|
||||
for (j = 0; j < seq->len; ++j) { // TODO: this is not the fastest way, but let's first see if speed matters here
|
||||
uint64_t o = p->sum_len + j;
|
||||
int c = seq_nt4_table[(uint8_t)s->seq[i].seq[j]];
|
||||
mm_seq4_set(p->mi->S, o, c);
|
||||
if (!(p->mi->flag & MM_I_NO_SEQ)) {
|
||||
for (j = 0; j < seq->len; ++j) { // TODO: this is not the fastest way, but let's first see if speed matters here
|
||||
uint64_t o = p->sum_len + j;
|
||||
int c = seq_nt4_table[(uint8_t)s->seq[i].seq[j]];
|
||||
mm_seq4_set(p->mi->S, o, c);
|
||||
}
|
||||
}
|
||||
// update p->sum_len and p->mi->n_seq
|
||||
p->sum_len += seq->len;
|
||||
|
|
@ -370,7 +374,8 @@ void mm_idx_dump(FILE *fp, const mm_idx_t *mi)
|
|||
fwrite(x, 8, 2, fp);
|
||||
}
|
||||
}
|
||||
fwrite(mi->S, 4, (sum_len + 7) / 8, fp);
|
||||
if (!(mi->flag & MM_I_NO_SEQ))
|
||||
fwrite(mi->S, 4, (sum_len + 7) / 8, fp);
|
||||
fflush(fp);
|
||||
}
|
||||
|
||||
|
|
@ -420,8 +425,10 @@ mm_idx_t *mm_idx_load(FILE *fp)
|
|||
kh_val(h, k) = x[1];
|
||||
}
|
||||
}
|
||||
mi->S = (uint32_t*)malloc((sum_len + 7) / 8 * 4);
|
||||
fread(mi->S, 4, (sum_len + 7) / 8, fp);
|
||||
if (!(mi->flag & MM_I_NO_SEQ)) {
|
||||
mi->S = (uint32_t*)malloc((sum_len + 7) / 8 * 4);
|
||||
fread(mi->S, 4, (sum_len + 7) / 8, fp);
|
||||
}
|
||||
return mi;
|
||||
}
|
||||
|
||||
|
|
|
|||
13
main.c
13
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.5-r573-dirty"
|
||||
#define MM_VERSION "2.5-r574-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -43,6 +43,7 @@ static struct option long_options[] = {
|
|||
{ "end-bonus", required_argument, 0, 0 },
|
||||
{ "no-pairing", no_argument, 0, 0 },
|
||||
{ "splice-flank", optional_argument, 0, 0 },
|
||||
{ "idx-no-seq", no_argument, 0, 0 },
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
|
|
@ -138,6 +139,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr
|
||||
else if (c == 0 && long_idx ==17) opt.end_bonus = atoi(optarg); // --end-bonus
|
||||
else if (c == 0 && long_idx ==18) opt.flag |= MM_F_INDEPEND_SEG; // --no-pairing
|
||||
else if (c == 0 && long_idx ==20) ipt.flag |= MM_I_NO_SEQ; // --idx-no-seq
|
||||
else if (c == 0 && long_idx == 14) { // --frag
|
||||
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
|
||||
opt.flag |= MM_F_FRAG_MODE;
|
||||
|
|
@ -196,6 +198,8 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, "[ERROR]\033[1;31m --splice and --frag should not be specified at the same time.\033[0m\n");
|
||||
return 1;
|
||||
}
|
||||
if (!fnw && !(opt.flag&MM_F_CIGAR))
|
||||
ipt.flag |= MM_I_NO_SEQ;
|
||||
|
||||
if (argc == optind || fp_help == stdout) {
|
||||
fprintf(fp_help, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
|
||||
|
|
@ -259,11 +263,18 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
if (!idx_rdr->is_idx && fnw == 0 && argc - optind < 2) {
|
||||
fprintf(stderr, "[ERROR] missing input: please specify a query file to map or option -d to keep the index\n");
|
||||
mm_idx_reader_close(idx_rdr);
|
||||
return 1;
|
||||
}
|
||||
if (opt.best_n == 0 && (opt.flag&MM_F_CIGAR) && mm_verbose >= 2)
|
||||
fprintf(stderr, "[WARNING]\033[1;31m `-N 0' reduces alignment accuracy. Please use --secondary=no to suppress secondary alignments.\033[0m\n");
|
||||
while ((mi = mm_idx_reader_read(idx_rdr, n_threads)) != 0) {
|
||||
if ((opt.flag & MM_F_CIGAR) && (mi->flag & MM_I_NO_SEQ)) {
|
||||
fprintf(stderr, "[ERROR] the prebuilt index doesn't contain sequences.\n");
|
||||
mm_idx_destroy(mi);
|
||||
mm_idx_reader_close(idx_rdr);
|
||||
return 1;
|
||||
}
|
||||
if ((opt.flag & MM_F_OUT_SAM) && idx_rdr->n_parts == 1) {
|
||||
if (mm_idx_reader_eof(idx_rdr)) {
|
||||
mm_write_sam_hdr(mi, rg, MM_VERSION, argc, argv);
|
||||
|
|
|
|||
Loading…
Reference in New Issue