基本想法是这样的:
1、预创建的线程通过mutex休眠在线程池中。这样,通过unlock该mutex就可以唤醒该线程了;
2、出于简单性的目标,一个线程池内的所有线程的属性都是相同的。这个属性在创建线程池可以指定;
3、一般来讲,线程池内的线程不能被取消、或者调用pthread_exit()退出。这些管理性工作是由线程池本身完成的。即,在使用线程池借出的线程时,函数返回应该只用return。
4、从线程池“借出”的线程,可以归还给线程池。实际上也必须归还给线程池,这样线程池可以完成最后的清理工作。
5、如果实在需要取消一个线程,那么好吧,只是别忘了告诉线程池你取消了它的手下。
#include "threadpool.h"/* #include了所有必要的系统头文件 */
#define THWK_F_CLEAN1/* 设置此标志着threadpool正在进行清理操作,此时线程退出。 */
#define THWK_F_RUNNING2/* 设置这个标志主要是为了避免一个race condition,后述。 */
struct thread_worker_arg {
void (*action)(void*);/* user programmer指定的实际函数 */
void *what;/* action的参数 */
};
struct thread_worker {
pthread_t id;/* just as its name */
struct thread_worker_arg arg;/* 用于给sleepy_wrapper()传送参数,后述。 */
pthread_mutex_t lock;/* 用于实现线程池内空闲线程的休眠,它实际上并不保护什么临界区。 */
struct thread_worker *next;/* 用于链表线程池内的其他线程 */
unsigned long long delay;/* 未用,计划用于测量调度延迟。 */
unsigned long flags;/* 标志,后述。 */
};
struct thread_pool {
pthread_mutex_t lock;/* 用于同步对于thread_pool自身的访问操作 */
struct thread_worker *first;/* 所有线程链接于此 */
int total;/* 总线程数 */
int current_nr;/* 池内空闲线程数 */
};
/* 未用,计划用于测量调度延迟。 */
inline unsigned long long get_ticks(void)
{
//__asm__ ("rdtsc");
return 0ULL;
}
/* 用于支持线程在被取消时的必要清理操作。 */
static void sleepy_wrapper_cleanup(void *voidp)
{
struct thread_worker *worker = voidp;
pthread_mutex_unlock(&worker->lock);
free(worker);
}
/* 这就是线程池内线程的执行函数了。 */
static void* sleepy_wrapper(void *voidp)
{
struct thread_worker *worker = voidp;
while (1) {
pthread_cleanup_push(sleepy_wrapper_cleanup, worker); /* 预设置上一个清理函数,防止线程取消时内存泄漏。 */
pthread_mutex_lock(&worker->lock); /* 空闲线程应该休眠于此,这个mutex在创建thread pool时就锁住了。或者本循环结束时锁住。 */
worker->delay = get_ticks() - worker->delay; /* 暂时无用。 */
if (THWK_F_CLEAN & worker->flags) /* 线程池正在清理本身,所以线程至此就退出了。 */
goto done; /* 你可能觉得这个goto用得有些多余,但如果不这样编译就会提示句法错误,因为pthread_cleanup_{push,pop}是用宏实现的!你可以参考一下它们的实现。 */
worker->flags |= THWK_F_RUNNING; /* 后述。 */
if (worker->arg.action) /* 进行线程实际的工作 */
worker->arg.action(worker->arg.what);
done:
pthread_mutex_unlock(&worker->lock); /* 解锁这个mutex,允许这个thread的下一次使用 */
pthread_cleanup_pop(0);
if (THWK_F_CLEAN & worker->flags) /* 清理线程池 */
break;
pthread_mutex_lock(&worker->lock); /* 先锁住这个锁,以让本循环开头的pthread_mutex_lock()使线程进入休眠。这个调用应该是成功的,否则就会引用deadlock。 */
worker->flags &= ~THWK_F_RUNNING; /* 设计这个标志的意义在于防止有线程激活操作在以上unlock/lock之间发生,如果这样的话,就会引起deadlock,激活操作的实现后述。 */
}
pthread_exit(0);
}
/* 无需废话的函数。 */
pthread_t thread_pool_rawid(struct thread_worker *worker)
{
return worker->id;
}
/* 如果线程被取消了,通知线程池忘记它,目前的实现很简单。*/
void thread_pool_forget(struct thread_pool *pool, struct thread_worker *worker)
{
pool->total--;
}
/* 线程激活操作 */
void thread_pool_activate(struct thread_worker *worker)
{
worker->delay = get_ticks();
while (thread_pool_is_running(worker)) /* 防止出现deadlock */
;
pthread_mutex_unlock(&worker->lock); /* 使sleepy_wrapper()内循环开头部分的lock()操作返回,即线程得以唤醒执行实际的action(what)。 */
}
/* 另一个无须废话的函数 */
int thread_pool_is_running(struct thread_worker *worker)
{
return (worker->flags & THWK_F_RUNNING);
}
/* 从线程池中借出一个线程,其实就是一个从链表头中摘出thread_worker的简单函数 */
int thread_pool_lend(struct thread_pool *pool, void (*action)(void*), void* what, struct thread_worker **worker)
{
if (!action || !pool || !worker)
return -EINVAL;
pthread_mutex_lock(&pool->lock);
*worker = pool->first;
if (worker) {
(*worker)->arg.action = action;
(*worker)->arg.what = what;
pool->first = (*worker)->next;
(*worker)->next = NULL;
pool->current_nr--;
}
pthread_mutex_unlock(&pool->lock);
return 0;
}
/* 向线程池里归还一个thread,头插法插入thread_worker链表。 */
int thread_pool_giveback(struct thread_pool *pool, struct thread_worker *worker)
{
if (!pool || !worker)
return -EINVAL;
while (thread_pool_is_running(worker))
;
pthread_mutex_lock(&pool->lock);
worker->next = pool->first;
pool->first = worker;
worker->arg.action = NULL;
worker->arg.what = NULL;
pool->current_nr++;
pthread_mutex_unlock(&pool->lock);
return 0;
}
/* 虽然有点长,但仍然是无须废话:线程池创建 */
struct thread_pool* thread_pool_create(int nr_to_create, pthread_attr_t *attr)
{
struct thread_pool *pool;
struct thread_worker *worker;
int i, chk;
if (!nr_to_create)
return NULL;
pool = malloc(sizeof(struct thread_pool));
if (!pool)
return NULL;
pool->first = NULL;
pool->total = 0;
pthread_mutex_init(&pool->lock, NULL);
for (i=0; i<nr_to_create; ++i) {
worker = malloc(sizeof(struct thread_worker));
if (!worker)
break;
memset(worker, 0, sizeof(struct thread_worker));
pthread_mutex_init(&worker->lock, NULL);
pthread_mutex_lock(&worker->lock);
chk = pthread_create(&worker->id, attr, sleepy_wrapper, (void*)worker);
if (chk) {
pthread_mutex_unlock(&worker->lock);
pthread_mutex_destroy(&worker->lock);
free(worker);
break;
}
worker->next = pool->first;
pool->first = worker;
}
pool->total = i;
pool->current_nr = i;
if (0 == i) {
pthread_mutex_destroy(&pool->lock);
free(pool);
pool = NULL;
}
return pool;
}
/* 清理线程池。 */
int thread_pool_clean(struct thread_pool *pool)
{
struct thread_worker *worker;
pthread_mutex_lock(&pool->lock);
if (pool->total != pool->current_nr) {
pthread_mutex_unlock(&pool->lock);
return -EBUSY;
}
while (NULL != (worker = pool->first)) {
worker->flags = THWK_F_CLEAN; /* this is =, rather than |= ! */
pthread_mutex_unlock(&worker->lock);
pthread_join(worker->id, NULL);
pool->first = worker->next;
pthread_mutex_destroy(&worker->lock);
free(worker);
}
pthread_mutex_unlock(&pool->lock);
pthread_mutex_destroy(&pool->lock);
free(pool);
return 0;
}
/* 这是一个使用例子。 */
/* 在我的P4双核机器上,可以比单线程版本快20%,但复杂性远高于20%! :( */
#include "threadpool.h"
unsigned long long sum(unsigned long long start, unsigned long long end)
{
unsigned long long sum;
sum = 0;
for (; start<=end; ++start)
sum += start;
return sum;
}
struct per_sum {
unsigned long long sum, start, end;
pthread_mutex_t lock;
pthread_cond_t cond;
};
void threaded_sum(void *voidp)
{
struct per_sum *per_sum = voidp;
printf("thread %p start/n", voidp);
if (!per_sum) {
//printf("per_sum == NULL/n");
return;
}
per_sum->sum = sum(per_sum->start, per_sum->end);
per_sum->start = per_sum->end = 0;
pthread_mutex_lock(&per_sum->lock);
printf("thread %p exit, end=%lld/n", voidp, per_sum->end);
pthread_cond_signal(&per_sum->cond);
pthread_mutex_unlock(&per_sum->lock);
}
int main(void)
{
#define NR_THREADS2
struct thread_worker* workers[NR_THREADS];
struct per_sum per_sums[NR_THREADS];
struct thread_pool *pool;
int i;
unsigned long long start, end;
unsigned long long result = 0;
unsigned long long delta = 0x10ffffff;
//printf("mutli threading ... ");
pool = thread_pool_create(NR_THREADS, NULL);
if (!pool)
exit(-1);
for (i=0; i<NR_THREADS; ++i) {
if (pthread_mutex_init(&per_sums[i].lock, NULL)) {
printf("failed init mutex/n");
exit(3);
}
if (pthread_cond_init(&per_sums[i].cond, NULL)) {
printf("failed init cond/n");
exit(4);
}
if (thread_pool_lend(pool, threaded_sum, (void*)&per_sums[i], &workers[i])) {
printf("failed to lend thread %d/n", i);
exit(5);
}
}
start = 0;
/* activate threads */
for (i=0; i<NR_THREADS; i++) {
per_sums[i].start = start;
per_sums[i].end = per_sums[i].start + delta;
start = per_sums[i].end + 1;
thread_pool_activate(workers[i]);
}
for (i=0; i<NR_THREADS; i++) {
pthread_mutex_lock(&per_sums[i].lock);
while (per_sums[i].end != 0)
pthread_cond_wait(&per_sums[i].cond, &per_sums[i].lock);
result += per_sums[i].sum;
pthread_mutex_unlock(&per_sums[i].lock);
}
/* activate threads again */
for (i=0; i<NR_THREADS; i++) {
per_sums[i].start = start;
per_sums[i].end = per_sums[i].start + delta;
start = per_sums[i].end + 1;
thread_pool_activate(workers[i]);
}
end = per_sums[NR_THREADS-1].end;
for (i=0; i<NR_THREADS; i++) {
pthread_mutex_lock(&per_sums[i].lock);
while (per_sums[i].end != 0)
pthread_cond_wait(&per_sums[i].cond, &per_sums[i].lock);
result += per_sums[i].sum;
pthread_mutex_unlock(&per_sums[i].lock);
}
for (i=0; i<NR_THREADS; ++i) {
if (thread_pool_giveback(pool, workers[i])) {
printf("failed to giveback thread %d/n", i);
exit(6);
}
pthread_mutex_destroy(&per_sums[i].lock);
pthread_cond_destroy(&per_sums[i].cond);
}
thread_pool_clean(pool);
printf("sum = %lld/n/n", result);
return 0;
}
PS: 是在Linux写的这个程序。完善的话,比如可以根据系统负载调整线程池中线程的数量;增加更完整的性能测量功能,调试功能;提供更方便的线程属性设置接口;在Linux平台上,可以使用clone()提供更为灵活的资源策略,等等。