54 lines
1.3 KiB
C
54 lines
1.3 KiB
C
#include <pthread.h>
|
|
#include <stdlib.h>
|
|
|
|
struct kt_for_t;
|
|
|
|
typedef struct {
|
|
struct kt_for_t *t;
|
|
int i;
|
|
} ktf_worker_t;
|
|
|
|
typedef struct kt_for_t {
|
|
int n_threads, n;
|
|
ktf_worker_t *w;
|
|
void (*func)(void*,int,int);
|
|
void *data;
|
|
} kt_for_t;
|
|
|
|
static inline int steal_work(kt_for_t *t)
|
|
{
|
|
int i, k, min = 0x7fffffff, min_i = -1;
|
|
for (i = 0; i < t->n_threads; ++i)
|
|
if (min > t->w[i].i) min = t->w[i].i, min_i = i;
|
|
k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads);
|
|
return k >= t->n? -1 : k;
|
|
}
|
|
|
|
static void *ktf_worker(void *data)
|
|
{
|
|
ktf_worker_t *w = (ktf_worker_t*)data;
|
|
int i;
|
|
for (;;) {
|
|
i = __sync_fetch_and_add(&w->i, w->t->n_threads);
|
|
if (i >= w->t->n) break;
|
|
w->t->func(w->t->data, i, w - w->t->w);
|
|
}
|
|
while ((i = steal_work(w->t)) >= 0)
|
|
w->t->func(w->t->data, i, w - w->t->w);
|
|
pthread_exit(0);
|
|
}
|
|
|
|
void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n)
|
|
{
|
|
int i;
|
|
kt_for_t t;
|
|
pthread_t *tid;
|
|
t.func = func, t.data = data, t.n_threads = n_threads, t.n = n;
|
|
t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t));
|
|
tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
|
|
for (i = 0; i < n_threads; ++i)
|
|
t.w[i].t = &t, t.w[i].i = i;
|
|
for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]);
|
|
for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
|
|
}
|