diff --git a/.gitignore b/.gitignore index 2a8ecdc..af1bb41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.cproject +.project .*.swp *.a *.o diff --git a/Makefile b/Makefile index f7892e8..6ed7bae 100644 --- a/Makefile +++ b/Makefile @@ -21,8 +21,8 @@ all:$(PROG) extra:all $(PROG_EXTRA) -minimap2:main.o libminimap2.a - $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) +minimap2:main.o getopt.o libminimap2.a + $(CC) $(CFLAGS) main.o getopt.o -o $@ -L. -lminimap2 $(LIBS) minimap2-lite:example.o libminimap2.a $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) @@ -30,8 +30,8 @@ minimap2-lite:example.o libminimap2.a libminimap2.a:$(OBJS) $(AR) -csru $@ $(OBJS) -sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h - $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz +sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h + $(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz clean: rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* @@ -46,6 +46,7 @@ bseq.o: bseq.h kseq.h chain.o: minimap.h mmpriv.h bseq.h kalloc.h example.o: minimap.h kseq.h format.o: kalloc.h mmpriv.h minimap.h bseq.h +getopt.o: getopt.h hit.o: mmpriv.h minimap.h bseq.h kalloc.h index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h kalloc.o: kalloc.h @@ -53,7 +54,7 @@ ksw2_extd2_sse.o: ksw2.h kalloc.h ksw2_exts2_sse.o: ksw2.h kalloc.h ksw2_extz2_sse.o: ksw2.h kalloc.h ksw2_ll_sse.o: ksw2.h kalloc.h -main.o: bseq.h minimap.h mmpriv.h +main.o: bseq.h minimap.h mmpriv.h getopt.h map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h misc.o: minimap.h ksort.h sdust.o: kalloc.h kdq.h kvec.h sdust.h diff --git a/align.c b/align.c index 84a7336..163a299 100644 --- a/align.c +++ b/align.c @@ -138,8 +138,10 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) { int i; fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, flag=%d, zdrop=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, flag, opt->zdrop); - for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); fputc('\n', stderr); - for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr); + for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); + fputc('\n', stderr); + for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); + fputc('\n', stderr); } if (opt->flag & MM_F_SPLICE) ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, opt->zdrop, flag, ez); diff --git a/example.c b/example.c index ca9da09..ccb1e68 100644 --- a/example.c +++ b/example.c @@ -35,13 +35,13 @@ int main(int argc, char *argv[]) opt.flag |= MM_F_CIGAR; // perform alignment mm_tbuf_t *tbuf = mm_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence - const mm_reg1_t *reg; + mm_reg1_t *reg; int j, i, n_reg; // get all hits for the query reg = mm_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &opt, 0); // traverse hits and print them out for (j = 0; j < n_reg; ++j) { - const mm_reg1_t *r = ®[j]; + mm_reg1_t *r = ®[j]; assert(r->p); // with MM_F_CIGAR, this should not be NULL printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, @@ -49,7 +49,9 @@ int main(int argc, char *argv[]) for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! printf("%d%c", r->p->cigar[i]>>4, "MIDSHN"[r->p->cigar[i]&0xf]); putchar('\n'); + free(r->p); } + free(reg); } mm_tbuf_destroy(tbuf); diff --git a/getopt.c b/getopt.c new file mode 100644 index 0000000..7139bfd --- /dev/null +++ b/getopt.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include "getopt.h" + +char *optarg; +int optind=1, opterr=1, optopt, __optpos, optreset=0; + +#define optpos __optpos + +static void __getopt_msg(const char *a, const char *b, const char *c, size_t l) +{ + FILE *f = stderr; +#if !defined(WIN32) && !defined(_WIN32) + flockfile(f); +#endif + fputs(a, f); + fwrite(b, strlen(b), 1, f); + fwrite(c, 1, l, f); + fputc('\n', f); +#if !defined(WIN32) && !defined(_WIN32) + funlockfile(f); +#endif +} + +int getopt(int argc, char * const argv[], const char *optstring) +{ + int i, c, d; + int k, l; + char *optchar; + + if (!optind || optreset) { + optreset = 0; + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!optpos) optpos++; + c = argv[optind][optpos], k = 1; + optchar = argv[optind]+optpos; + optopt = c; + optpos += k; + + if (!argv[optind][optpos]) { + optind++; + optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i], l = 1; + if (l>0) i+=l; else i++; + } while (l && d != c); + + if (d != c) { + if (optstring[0] != ':' && opterr) + __getopt_msg(argv[0], ": unrecognized option: ", optchar, k); + return '?'; + } + if (optstring[i] == ':') { + if (optstring[i+1] == ':') optarg = 0; + else if (optind >= argc) { + if (optstring[0] == ':') return ':'; + if (opterr) __getopt_msg(argv[0], + ": option requires an argument: ", + optchar, k); + return '?'; + } + if (optstring[i+1] != ':' || optpos) { + optarg = argv[optind++] + optpos; + optpos = 0; + } + } + return c; +} + +static void permute(char *const *argv, int dest, int src) +{ + char **av = (char **)argv; + char *tmp = av[src]; + int i; + for (i=src; i>dest; i--) + av[i] = av[i-1]; + av[dest] = tmp; +} + +static int __getopt_long_core(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) +{ + optarg = 0; + if (longopts && argv[optind][0] == '-' && + ((longonly && argv[optind][1] && argv[optind][1] != '-') || + (argv[optind][1] == '-' && argv[optind][2]))) + { + int colon = optstring[optstring[0]=='+'||optstring[0]=='-']==':'; + int i, cnt, match; + char *opt; + for (cnt=i=0; longopts[i].name; i++) { + const char *name = longopts[i].name; + opt = argv[optind]+1; + if (*opt == '-') opt++; + for (; *name && *name == *opt; name++, opt++); + if (*opt && *opt != '=') continue; + match = i; + if (!*name) { + cnt = 1; + break; + } + cnt++; + } + if (cnt==1) { + i = match; + optind++; + optopt = longopts[i].val; + if (*opt == '=') { + if (!longopts[i].has_arg) { + if (colon || !opterr) + return '?'; + __getopt_msg(argv[0], + ": option does not take an argument: ", + longopts[i].name, + strlen(longopts[i].name)); + return '?'; + } + optarg = opt+1; + } else if (longopts[i].has_arg == required_argument) { + if (!(optarg = argv[optind])) { + if (colon) return ':'; + if (!opterr) return '?'; + __getopt_msg(argv[0], + ": option requires an argument: ", + longopts[i].name, + strlen(longopts[i].name)); + return '?'; + } + optind++; + } + if (idx) *idx = i; + if (longopts[i].flag) { + *longopts[i].flag = longopts[i].val; + return 0; + } + return longopts[i].val; + } + if (argv[optind][1] == '-') { + if (!colon && opterr) + __getopt_msg(argv[0], cnt ? + ": option is ambiguous: " : + ": unrecognized option: ", + argv[optind]+2, + strlen(argv[optind]+2)); + optind++; + return '?'; + } + } + return getopt(argc, argv, optstring); +} + +static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly) +{ + int ret, skipped, resumed; + if (!optind || optreset) { + optreset = 0; + __optpos = 0; + optind = 1; + } + if (optind >= argc || !argv[optind]) return -1; + skipped = optind; + if (optstring[0] != '+' && optstring[0] != '-') { + int i; + for (i=optind; ; i++) { + if (i >= argc || !argv[i]) return -1; + if (argv[i][0] == '-' && argv[i][1]) break; + } + optind = i; + } + resumed = optind; + ret = __getopt_long_core(argc, argv, optstring, longopts, idx, longonly); + if (resumed > skipped) { + int i, cnt = optind-resumed; + for (i=0; i #include +#if defined(WIN32) || defined(_WIN32) +#include // for open(2) +#else #include +#endif #include #include #include "kthread.h" diff --git a/kalloc.c b/kalloc.c index 9356d40..fa9bc88 100644 --- a/kalloc.c +++ b/kalloc.c @@ -46,7 +46,7 @@ static size_t *morecore(kmem_t *km, size_t nu) up = (size_t*)malloc(rnu * sizeof(size_t)); if (!up) { /* fail to allocate memory */ km_stat(km); - fprintf(stderr, "[morecore] %lu bytes requested but not available.\n", rnu * sizeof(size_t)); + fprintf(stderr, "[morecore] %lu bytes requested but not available.\n", (unsigned long)rnu * sizeof(size_t)); exit(1); } /* put the pointer in km->list_head */ @@ -210,5 +210,5 @@ void km_stat(const void *_km) --n_blocks; frag = 1.0/1024.0 * n_units * sizeof(size_t) / n_blocks; fprintf(stderr, "[kr_stat] tot=%lu, free=%lu, n_block=%u, max_block=%lu, frag_len=%.3fK\n", - km->total_allocated, n_units * sizeof(size_t), n_blocks, max_block * sizeof(size_t), frag); + (unsigned long)km->total_allocated, (unsigned long)n_units * sizeof(size_t), n_blocks, (unsigned long)max_block * sizeof(size_t), frag); } diff --git a/kdq.h b/kdq.h index 4a67c0b..8ae5c97 100644 --- a/kdq.h +++ b/kdq.h @@ -3,11 +3,12 @@ #include #include +#include #include "kalloc.h" #define __KDQ_TYPE(type) \ typedef struct { \ - size_t front:58, bits:6, count, mask; \ + uint64_t front:58, bits:6, count, mask; \ type *a; \ void *km; \ } kdq_##type##_t; diff --git a/ksort.h b/ksort.h index 1f2a3dd..7cd65a5 100644 --- a/ksort.h +++ b/ksort.h @@ -30,6 +30,7 @@ #include #include +#include typedef struct { void *left, *right; @@ -78,6 +79,7 @@ typedef const char *ksstr_t; #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) #define RS_MIN_SIZE 64 +#define RS_MAX_BITS 8 #define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \ typedef struct { \ @@ -98,7 +100,8 @@ typedef const char *ksstr_t; { \ rstype_t *i; \ int size = 1<b = k->e = beg; \ for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \ for (k = b + 1; k != be; ++k) \ @@ -127,7 +130,7 @@ typedef const char *ksstr_t; void radix_sort_##name(rstype_t *beg, rstype_t *end) \ { \ if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \ - else rs_sort_##name(beg, end, 8, sizeof_key * 8 - 8); \ + else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \ } #endif diff --git a/kthread.c b/kthread.c index f991714..249ceea 100644 --- a/kthread.c +++ b/kthread.c @@ -1,6 +1,11 @@ #include #include #include +#include + +#if (defined(WIN32) || defined(_WIN32)) && defined(_MSC_VER) +#define __sync_fetch_and_add(ptr, addend) _InterlockedExchangeAdd((void*)ptr, addend) +#endif /************ * kt_for() * @@ -52,12 +57,13 @@ void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n) kt_for_t t; pthread_t *tid; t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; - t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t)); - tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); + t.w = (ktf_worker_t*)calloc(n_threads, sizeof(ktf_worker_t)); + tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); for (i = 0; i < n_threads; ++i) t.w[i].t = &t, t.w[i].i = i; for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + free(tid); free(t.w); } else { long j; for (j = 0; j < n; ++j) func(data, j, 0); @@ -135,16 +141,17 @@ void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_d pthread_mutex_init(&aux.mutex, 0); pthread_cond_init(&aux.cv, 0); - aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t)); + aux.workers = (ktp_worker_t*)calloc(n_threads, sizeof(ktp_worker_t)); for (i = 0; i < n_threads; ++i) { ktp_worker_t *w = &aux.workers[i]; w->step = 0; w->pl = &aux; w->data = 0; w->index = aux.index++; } - tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); + tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + free(tid); free(aux.workers); pthread_mutex_destroy(&aux.mutex); pthread_cond_destroy(&aux.cv); diff --git a/main.c b/main.c index d67b61c..b99fa5d 100644 --- a/main.c +++ b/main.c @@ -1,24 +1,26 @@ -#include #include #include #include -#include -#include #include "bseq.h" #include "minimap.h" #include "mmpriv.h" +#include "getopt.h" -#define MM_VERSION "2.1-r311" +#define MM_VERSION "2.1-r335-dirty" +#ifdef __linux__ +#include +#include void liftrlimit() { -#ifdef __linux__ struct rlimit r; getrlimit(RLIMIT_AS, &r); r.rlim_cur = r.rlim_max; setrlimit(RLIMIT_AS, &r); -#endif } +#else +void liftrlimit() {} +#endif static struct option long_options[] = { { "bucket-bits", required_argument, 0, 0 }, @@ -250,6 +252,7 @@ int main(int argc, char *argv[]) mm_idx_t *mi; if (fpr) { mi = mm_idx_load(fpr); + if (mi == 0) break; if (idx_par_set && mm_verbose >= 2 && (mi->k != k || mi->w != w || mi->is_hpc != is_hpc)) fprintf(stderr, "[WARNING] \033[1;31mIndexing parameters on the command line (-k/-w/-H) overridden by parameters in the prebuilt index.\033[0m\n"); } else { diff --git a/map.c b/map.c index f531efa..14366ad 100644 --- a/map.c +++ b/map.c @@ -382,7 +382,11 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int pipeline_t pl; memset(&pl, 0, sizeof(pipeline_t)); pl.fp = mm_bseq_open(fn); - if (pl.fp == 0) return -1; + if (pl.fp == 0) { + if (mm_verbose >= 1) + fprintf(stderr, "ERROR: failed to open file '%s'\n", fn); + return -1; + } pl.opt = opt, pl.mi = idx; pl.n_threads = n_threads, pl.mini_batch_size = mini_batch_size; if ((opt->flag & MM_F_OUT_SAM) && !(opt->flag & MM_F_NO_SAM_SQ)) diff --git a/misc.c b/misc.c index f6699ed..d7dd1e4 100644 --- a/misc.c +++ b/misc.c @@ -1,17 +1,102 @@ -#include -#include #include "minimap.h" int mm_verbose = 3; int mm_dbg_flag = 0; double mm_realtime0; +#if defined(WIN32) || defined(_WIN32) +#include + +struct timezone +{ + __int32 tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +/* + * gettimeofday.c + * Win32 gettimeofday() replacement + * taken from PostgreSQL, according to + * https://stackoverflow.com/questions/1676036/what-should-i-use-to-replace-gettimeofday-on-windows + * + * src/port/gettimeofday.c + * + * Copyright (c) 2003 SRA, Inc. + * Copyright (c) 2003 SKC, Inc. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +/* FILETIME of Jan 1 1970 00:00:00. */ +static const unsigned __int64 epoch = ((unsigned __int64) 116444736000000000ULL); + +/* + * timezone information is stored outside the kernel so tzp isn't used anymore. + * + * Note: this function is not for Win32 high precision timing purpose. See + * elapsed_time(). + */ +int gettimeofday(struct timeval * tp, struct timezone *tzp) +{ + FILETIME file_time; + SYSTEMTIME system_time; + ULARGE_INTEGER ularge; + + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + ularge.LowPart = file_time.dwLowDateTime; + ularge.HighPart = file_time.dwHighDateTime; + + tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L); + tp->tv_usec = (long) (system_time.wMilliseconds * 1000); + + return 0; +} + +// taken from https://stackoverflow.com/questions/5272470/c-get-cpu-usage-on-linux-and-windows +double cputime() +{ + HANDLE hProcess = GetCurrentProcess(); + FILETIME ftCreation, ftExit, ftKernel, ftUser; + SYSTEMTIME stKernel; + SYSTEMTIME stUser; + + GetProcessTimes(hProcess, &ftCreation, &ftExit, &ftKernel, &ftUser); + FileTimeToSystemTime(&ftKernel, &stKernel); + FileTimeToSystemTime(&ftUser, &stUser); + + double kernelModeTime = ((stKernel.wHour * 60.) + stKernel.wMinute * 60.) + stKernel.wSecond * 1. + stKernel.wMilliseconds / 1000.; + double userModeTime = ((stUser.wHour * 60.) + stUser.wMinute * 60.) + stUser.wSecond * 1. + stUser.wMilliseconds / 1000.; + + return kernelModeTime + userModeTime; +} +#else +#include +#include + double cputime() { struct rusage r; getrusage(RUSAGE_SELF, &r); return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); } +#endif /* WIN32 || _WIN32 */ double realtime() { diff --git a/sdust.c b/sdust.c index 07339dd..f1e47e8 100644 --- a/sdust.c +++ b/sdust.c @@ -176,7 +176,7 @@ uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n) #ifdef _SDUST_MAIN #include #include -#include +#include "getopt.h" #include "kseq.h" KSEQ_INIT(gzFile, gzread) diff --git a/sketch.c b/sketch.c index b141cba..db359eb 100644 --- a/sketch.c +++ b/sketch.c @@ -77,11 +77,10 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i { uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, kmer[2] = {0,0}; int i, j, l, buf_pos, min_pos, kmer_span = 0; - mm128_t *buf, min = { UINT64_MAX, UINT64_MAX }; + mm128_t buf[256], min = { UINT64_MAX, UINT64_MAX }; tiny_queue_t tq; - assert(len > 0 && w > 0 && k > 0 && k <= 28); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice - buf = (mm128_t*)alloca(w * 16); + assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice memset(buf, 0xff, w * 16); memset(&tq, 0, sizeof(tiny_queue_t)); kv_resize(mm128_t, km, *p, p->n + len/w);