diff --git a/Makefile b/Makefile index 1d6ae44..ed85639 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o AOBJS= QSufSort.o bwt_gen.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ - is.o bwtindex.o bwape.o kopen.o pemerge.o \ + is.o bwtindex.o bwape.o kopen.o pemerge.o maxk.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o PROG= bwa @@ -45,7 +45,8 @@ depend: QSufSort.o: QSufSort.h bamlite.o: bamlite.h malloc_wrap.h bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h khash.h -bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kseq.h +bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kvec.h +bwa.o: kseq.h bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h bwamem.o: ksort.h utils.h kbtree.h bwamem_extra.o: bwa.h bntseq.h bwt.h bwamem.h kstring.h malloc_wrap.h @@ -79,5 +80,6 @@ kstring.o: kstring.h malloc_wrap.h ksw.o: ksw.h malloc_wrap.h main.o: kstring.h malloc_wrap.h utils.h malloc_wrap.o: malloc_wrap.h +maxk.o: bwa.h bntseq.h bwt.h bwamem.h kseq.h malloc_wrap.h pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h utils.o: utils.h ksort.h malloc_wrap.h kseq.h diff --git a/kthread.c b/kthread.c index 80f84cb..97359c0 100644 --- a/kthread.c +++ b/kthread.c @@ -67,13 +67,15 @@ struct ktp_t; typedef struct { struct ktp_t *pl; - int step, running; + int64_t index; + int step; void *data; } ktp_worker_t; typedef struct ktp_t { void *shared; void *(*func)(void*, int, void*); + int64_t index; int n_workers, n_steps; ktp_worker_t *workers; pthread_mutex_t mutex; @@ -92,13 +94,12 @@ static void *ktp_worker(void *data) // test whether another worker is doing the same step for (i = 0; i < p->n_workers; ++i) { if (w == &p->workers[i]) continue; // ignore itself - if (p->workers[i].running && p->workers[i].step == w->step) + if (p->workers[i].step <= w->step && p->workers[i].index < w->index) break; } - if (i == p->n_workers) break; // no other workers doing w->step; then this worker will + if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps pthread_cond_wait(&p->cv, &p->mutex); } - w->running = 1; pthread_mutex_unlock(&p->mutex); // working on w->step @@ -107,7 +108,7 @@ static void *ktp_worker(void *data) // update step and let other workers know pthread_mutex_lock(&p->mutex); w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps; - w->running = 0; + if (w->step == 0) w->index = p->index++; pthread_cond_broadcast(&p->cv); pthread_mutex_unlock(&p->mutex); } @@ -125,13 +126,15 @@ void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_d aux.n_steps = n_steps; aux.func = func; aux.shared = shared_data; + aux.index = 0; pthread_mutex_init(&aux.mutex, 0); pthread_cond_init(&aux.cv, 0); aux.workers = alloca(n_threads * sizeof(ktp_worker_t)); for (i = 0; i < n_threads; ++i) { ktp_worker_t *w = &aux.workers[i]; - w->step = w->running = 0; w->pl = &aux; w->data = 0; + w->step = 0; w->pl = &aux; w->data = 0; + w->index = aux.index++; } tid = alloca(n_threads * sizeof(pthread_t)); diff --git a/main.c b/main.c index 9f776d4..cdac445 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.12-r1039" +#define PACKAGE_VERSION "0.7.12-r1044" #endif int bwa_fa2pac(int argc, char *argv[]); @@ -25,6 +25,7 @@ int main_mem(int argc, char *argv[]); int main_shm(int argc, char *argv[]); int main_pemerge(int argc, char *argv[]); +int main_maxk(int argc, char *argv[]); static int usage() { @@ -84,6 +85,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1); else if (strcmp(argv[1], "shm") == 0) ret = main_shm(argc-1, argv+1); else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1); + else if (strcmp(argv[1], "maxk") == 0) ret = main_maxk(argc-1, argv+1); else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; diff --git a/maxk.c b/maxk.c new file mode 100644 index 0000000..24b9d54 --- /dev/null +++ b/maxk.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include "bwa.h" +#include "bwamem.h" +#include "kseq.h" +KSEQ_DECLARE(gzFile) + +int main_maxk(int argc, char *argv[]) +{ + int i, c, self = 0, max_len = 0; + uint8_t *cnt = 0; + uint64_t hist[256]; + bwt_t *bwt; + kseq_t *ks; + smem_i *itr; + gzFile fp; + + while ((c = getopt(argc, argv, "s")) >= 0) { + if (c == 's') self = 1; + } + if (optind + 2 > argc) { + fprintf(stderr, "Usage: bwa maxk [-s] \n"); + return 1; + } + fp = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "rb") : gzdopen(fileno(stdin), "rb"); + ks = kseq_init(fp); + bwt = bwt_restore_bwt(argv[optind]); + itr = smem_itr_init(bwt); + if (self) smem_config(itr, 2, INT_MAX, 0); + memset(hist, 0, 8 * 256); + + while (kseq_read(ks) >= 0) { + const bwtintv_v *a; + if (ks->seq.l > max_len) { + max_len = ks->seq.l; + kroundup32(max_len); + cnt = realloc(cnt, max_len); + } + memset(cnt, 0, ks->seq.l); + for (i = 0; i < ks->seq.l; ++i) + ks->seq.s[i] = nst_nt4_table[(int)ks->seq.s[i]]; + smem_set_query(itr, ks->seq.l, (uint8_t*)ks->seq.s); + while ((a = smem_next(itr)) != 0) { + for (i = 0; i < a->n; ++i) { + bwtintv_t *p = &a->a[i]; + int j, l, start = p->info>>32, end = (uint32_t)p->info; + l = end - start < 255? end - start : 255; + for (j = start; j < end; ++j) + cnt[j] = cnt[j] > l? cnt[j] : l; + } + } + for (i = 0; i < ks->seq.l; ++i) ++hist[cnt[i]]; + } + for (i = 0; i < 256; ++i) + printf("%d\t%lld\n", i, (long long)hist[i]); + free(cnt); + + smem_itr_destroy(itr); + bwt_destroy(bwt); + kseq_destroy(ks); + gzclose(fp); + return 0; +}