信相很多人跟我一样,一开始看PHP源码的时候看到一堆的TSRM_CC,TSRM_DC别特蛋疼,大多数函数的声明都会在参数尾末加个TSRM_DC,实着让像我这样之前没搞过多线程程编的很不解理。网上找了找,分析这方面的资料非常少,只找到@张洋的一篇文章,《PHP及Zend Engine的线程全安模型》 。不过看了他的文章,看完还是有很多疑难,他的文章也没细仔说清楚,那就只能自己看源码啦!先是去看了《多线程程序设计》这本书,然后再扫一扫PHP的线程全安相干的源码,算是有点目眉,对@张洋的文章里头提的一些问题也有了自己一些看法。以下是个人一些见拙,有若妥不的方地,迎欢交流探讨,感谢~
假如当初有三个线程 thread1,thread2,thread3,
我们声明一个单简的全局的数组量变,不妨叫 global_arr,
那么各个线程每次读写自己的据数的时候供提一个自己的线程id,到global_arr里头取不就得了,global_arr[0],global_arr[1] ... ,非常单简吧...
pthread_key_t key; //线程的一个key int pthread_key_create(pthread_key *key,void(*destructor)(void *)); //初始化一个key int pthread_key_delete(pthread_key_t key); //删除一个key int pthread_setspecific(pthread_key_t key,const void *value); //为一个key指定value void *pthread_getspecific(pthread_key_t key); //获得一个key指向的value
tsrm_tls_table (意思应该是tsrm thread local storage table)是个哈希表,我们姑且称之为“全局线程私有据数表”,放存的是个一每线程私有据数的指针。
# /PHP_5_4/TSRM/TSRM.h //thr是线程的thread_id,ts是tsrm_tls_table_size,即全局线程私有据数表的小大。 100#define THREAD_HASH_OF(thr,ts) (unsigned long)thr%(unsigned long)ts
tsrm_tls_entry就是个一每线程私有据数的进口啦,从名字能就看出来:tsrm thread local storage entry。
# /PHP_5_4/TSRM/TSRM.c typedef struct _tsrm_tls_entry tsrm_tls_entry; struct _tsrm_tls_entry { void **storage; //放存一个一个resource的指针 int count; //多少个resouce THREAD_T thread_id; //线程的id tsrm_tls_entry *next; //下个entry的指针 };
typedef struct { size_t size; //所占内存小大 ts_allocate_ctor ctor; //constructor ts_allocate_dtor dtor; //destructor int done; //否是经已结束利用,释放掉了 } tsrm_resource_type;
static tsrm_resource_type *resource_types_table=NULL; static int resource_types_table_size;
按照上图那样,tsrm_tls_entry的storage放存的是一些资源的指针,resource_pointer指向的现实的内存块是根据resource_type_table一个个资源描述结构 结构出来的。这样看,resource_type_table是不是像一个具模,然后线程就根据这个具模造出一个个私有据数出来呢?举个象形的例子,这个resource_type_table就像一个放存屋宇程工设计图的本本,里头放存一张张程工设计的纸图,纸图内容分为四类:1.要造建的屋子的小大;2.屋子怎么造建;3.屋子旧了怎么拆掉;4.这张纸图否是时过了。而个一每线程像一个个屋宇造建的队团,它们在全国各地根据一张张的纸图,把一个个屋子给建起来。后以就把屋子卖给不同的人群做不同的事(租地下室给码农啦,投资增值啦...尼玛不说了)。
/* Startup TSRM (call once for the entire process) TSRM动启函数,全体生命周期只执行一遍,前两个参数:1.要预动启多少个线程2.要预配分多少个资源*/ TSRM_API int tsrm_startup(int expected_threads, int expected_resources, int debug_level, char *debug_filename) /* Shutdown TSRM (call once for the entire process) TSRM闭关函数,全体生命周期只执行一遍*/ TSRM_API void tsrm_shutdown(void) /* allocates a new thread-safe-resource id ,往resource_type_table里添加一种资源类型,各个线程按这个类型配分一块内存块,回返这个资源类型的id*/ TSRM_API ts_rsrc_id ts_allocate_id(ts_rsrc_id *rsrc_id, size_t size, ts_allocate_ctor ctor, ts_allocate_dtor dtor) /* 生成一个新的线程私有据数结构,并按照resource_type_table把资源配分给它 */ static void allocate_new_resource(tsrm_tls_entry **thread_resources_ptr, THREAD_T thread_id) /* fetches the requested resource for the current thread 根据资源的id回返指定线程有拥的的资源*/ TSRM_API void *ts_resource_ex(ts_rsrc_id id, THREAD_T *th_id) /* 面下这几个跟上下文相干的api是给那些对全体线程全安模型一目了然的开发人员用的。 * frees an interpreter context. You are responsible for making sure that * it is not linked into the TSRM hash, and not marked as the current interpreter * 释放一个上下文,你必须确保它没链到全局的tsrm_tls_table以及没有把它做当后以线程的上下文。*/ void tsrm_free_interpreter_context(void *context) void *tsrm_set_interpreter_context(void *new_ctx) /* 设置后以线程的上下文,并把老的上下文回返*/ void *tsrm_new_interpreter_context(void) /* 设置一个新的上下文,用到下面的tsrm_set_interpreter_context */ void ts_free_thread(void) /* 把后以的线程的所占的resource都释放掉。*/ void ts_free_worker_threads(void) /* 把除了后以线程的其它线程的resource都释放掉*/ void ts_free_id(ts_rsrc_id id) /* 把resource_type_table里头指定资源id的资源(包含有所线程中,资源id为此id的资源)全释放掉,并标记done */ /*面下是一些单简的工具*/ TSRM_API THREAD_T tsrm_thread_id(void) /* 取得后以线程的id */ TSRM_API MUTEX_T tsrm_mutex_alloc(void) /* 配分一个锁*/ TSRM_API void tsrm_mutex_free(MUTEX_T mutexp) /* 删除锁*/ TSRM_API int tsrm_mutex_lock(MUTEX_T mutexp) /* 锁加 */ TSRM_API int tsrm_mutex_unlock(MUTEX_T mutexp) /* 锁解*/ TSRM_API int tsrm_sigmask(int how, const sigset_t *set, sigset_t *oldset) /* 信号相干 */ /*设置线程初始柄句*/ TSRM_API void *tsrm_set_new_thread_begin_handler(tsrm_thread_begin_func_t new_thread_begin_handler) /*设置线程结束柄句*/ TSRM_API void *tsrm_set_new_thread_end_handler(tsrm_thread_end_func_t new_thread_end_handler) /*DEBUG支撑*/ int tsrm_error(int level, const char *format, ...) void tsrm_error_set(int level, char *debug_filename)
/* Startup TSRM (call once for the entire process) */ /* tsrm动启函数 */ TSRM_API int tsrm_startup(int expected_threads, int expected_resources, int debug_level, char *debug_filename) { #if defined(GNUPTH) pth_init(); #elif defined(PTHREADS) pthread_key_create( &tls_key, 0 ); //初始化线程的tls_key #elif defined(TSRM_ST) st_init(); st_key_create(&tls_key, 0); #elif defined(TSRM_WIN32) tls_key = TlsAlloc(); #elif defined(BETHREADS) tls_key = tls_allocate(); #endif tsrm_error_file = stderr; tsrm_error_set(debug_level, debug_filename); //初始化全局线程私有据数表 tsrm_tls_table_size = expected_threads; tsrm_tls_table = (tsrm_tls_entry **) calloc(tsrm_tls_table_size, sizeof(tsrm_tls_entry *)); if (!tsrm_tls_table) { TSRM_ERROR((TSRM_ERROR_LEVEL_ERROR, "Unable to allocate TLS table")); return 0; } //初始化全局资源表,id_count是一个计数器的全局量变 id_count=0; resource_types_table_size = expected_resources; resource_types_table = (tsrm_resource_type *) calloc(resource_types_table_size, sizeof(tsrm_resource_type)); if (!resource_types_table) { TSRM_ERROR((TSRM_ERROR_LEVEL_ERROR, "Unable to allocate resource types table")); free(tsrm_tls_table); tsrm_tls_table = NULL; return 0; } //初始化锁 tsmm_mutex = tsrm_mutex_alloc(); //初始化线程的开始和结束柄句 tsrm_new_thread_begin_handler = tsrm_new_thread_end_handler = NULL; TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Started up TSRM, %d expected \ threads, %d expected resources", expected_threads, expected_resources)); return 1; } /* Shutdown TSRM (call once for the entire process) TSRM结束函数,做一些内存理清作工*/ TSRM_API void tsrm_shutdown(void) { int i; //把tsrm_tls_table有所线程的私有据数全体干掉 if (tsrm_tls_table) { for (i=0; i<tsrm_tls_table_size; i++) { tsrm_tls_entry *p = tsrm_tls_table[i], *next_p; while (p) { int j; next_p = p->next; for (j=0; j<p->count; j++) { if (p->storage[j]) { if (resource_types_table && !resource_types_table[j].done && resource_types_table[j].dtor) { resource_types_table[j].dtor(p->storage[j], &p->storage); } free(p->storage[j]); } } free(p->storage); free(p); p = next_p; } } free(tsrm_tls_table); tsrm_tls_table = NULL; } //释放资源表 if (resource_types_table) { free(resource_types_table); resource_types_table=NULL; } //释放锁 tsrm_mutex_free(tsmm_mutex); tsmm_mutex = NULL; TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Shutdown TSRM")); if (tsrm_error_file!=stderr) { fclose(tsrm_error_file); } //删除线程的key #if defined(GNUPTH) pth_kill(); #elif defined(PTHREADS) pthread_setspecific(tls_key, 0); pthread_key_delete(tls_key); #elif defined(TSRM_WIN32) TlsFree(tls_key); #endif } /* 往resource_type_table里添加一种资源类型,各个线程按这个类型配分一块内存块,回返这个资源类型的id*/ TSRM_API ts_rsrc_id ts_allocate_id(ts_rsrc_id *rsrc_id, size_t size, ts_allocate_ctor ctor, ts_allocate_dtor dtor) { int i; TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Obtaining a new resource id, %d bytes", size)); //锁加 tsrm_mutex_lock(tsmm_mutex); /* 取得一个资源的id ,这里为啥要用到TSRM_SHUFFLE_RSRC_ID呢,目标是为了与resource_types_table_size对应, * resource_types_table[0] 放存第一个资源,rsrc_id = id_count = 1 ,resource_types_table_size = 1; * resource_types_table[1] 放存第二个资源,rsrc_id = id_count = 2 ,resource_types_table_size = 2; * resource_types_table[2] 放存第三个资源,rsrc_id = id_count = 3 ,resource_types_table_size = 3; * 现实上,根据id_count的名称,作者的本意应该是用这个量变来计数有多少个资源的id的,而个一每线程的tsrm_tls_entry也有个count成员,与这个是对应的。 * 这样 id_count 与 resource_types_table_size能就停止直接比拟,而且保障回返的rsrc_id大于0, * 只不过在现实存保到resource_types_table里的时候,要把id unshuffle一下,也就是减一作操,因为索引是从0开始的。 */ *rsrc_id = TSRM_SHUFFLE_RSRC_ID(id_count++); TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Obtained resource id %d", *rsrc_id)); /* 全局资源表扩容,添加一个新的资源类型*/ /* store the new resource type in the resource sizes table */ if (resource_types_table_size < id_count) { resource_types_table = (tsrm_resource_type *) realloc(resource_types_table, sizeof(tsrm_resource_type)*id_count); if (!resource_types_table) { tsrm_mutex_unlock(tsmm_mutex); TSRM_ERROR((TSRM_ERROR_LEVEL_ERROR, "Unable to allocate storage for resource")); *rsrc_id = 0; return 0; } resource_types_table_size = id_count; } resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].size = size; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].ctor = ctor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].dtor = dtor; resource_types_table[TSRM_UNSHUFFLE_RSRC_ID(*rsrc_id)].done = 0; /* 把有所经已active的线程的资源根据id_count扩容, * 也就是说,假如入来原线程只存保了1~3三个资源,当初id_count如果经过一些作操,增加到5个了,那么线程必须把4和5两个资源给补齐。*/ /* enlarge the arrays for the already active threads */ for (i=0; i<tsrm_tls_table_size; i++) { tsrm_tls_entry *p = tsrm_tls_table[i]; while (p) { if (p->count < id_count) { int j; p->storage = (void *) realloc(p->storage, sizeof(void *)*id_count); for (j=p->count; j<id_count; j++) { p->storage[j] = (void *) malloc(resource_types_table[j].size); if (resource_types_table[j].ctor) { resource_types_table[j].ctor(p->storage[j], &p->storage); } } p->count = id_count; } p = p->next; } } tsrm_mutex_unlock(tsmm_mutex); TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Successfully allocated new resource id %d", *rsrc_id)); return *rsrc_id; } /* 生成一个新的线程私有据数结构,并按照resource_type_table把资源配分给它 * 意注,首先扫下这个函数,并没有锁加作操,只有锁解作操,所以,用使这个函数前应首先锁加 tsmm_mutex * 前面的ts_resource_ex确切就是这么干的。 */ static void allocate_new_resource(tsrm_tls_entry **thread_resources_ptr, THREAD_T thread_id) { int i; TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Creating data structures for thread %x", thread_id)); /*生成一个线程的私有据数结构,叫资源进口吧*/ (*thread_resources_ptr) = (tsrm_tls_entry *) malloc(sizeof(tsrm_tls_entry)); (*thread_resources_ptr)->storage = (void **) malloc(sizeof(void *)*id_count); (*thread_resources_ptr)->count = id_count; (*thread_resources_ptr)->thread_id = thread_id; (*thread_resources_ptr)->next = NULL; /* Set thread local storage to this new thread resources structure */ /把后以线程的key指向这个资源进口/ tsrm_tls_set(*thread_resources_ptr); if (tsrm_new_thread_begin_handler) { tsrm_new_thread_begin_handler(thread_id, &((*thread_resources_ptr)->storage)); } /* 按照resrouce_types_table 把有所资源给拷贝一份,那些done的不拷。*/ for (i=0; i<id_count; i++) { if (resource_types_table[i].done) { (*thread_resources_ptr)->storage[i] = NULL; } else { (*thread_resources_ptr)->storage[i] = (void *) malloc(resource_types_table[i].size); if (resource_types_table[i].ctor) { resource_types_table[i].ctor((*thread_resources_ptr)->storage[i], &(*thread_resources_ptr)->storage); } } } if (tsrm_new_thread_end_handler) { tsrm_new_thread_end_handler(thread_id, &((*thread_resources_ptr)->storage)); } tsrm_mutex_unlock(tsmm_mutex); } /* 根据资源的id回返指定线程有拥的的资源 */ TSRM_API void *ts_resource_ex(ts_rsrc_id id, THREAD_T *th_id) { THREAD_T thread_id; int hash_value; tsrm_tls_entry *thread_resources; /* 这个 NETWART 表现还没搞清楚什么情况下回触发,待究研,mark一下========================================*/ #ifdef NETWARE /* The below if loop is added for NetWare to fix an abend while unloading PHP * when an Apache unload command is issued on the system console. * While exiting from PHP, at the end for some reason, this function is called * with tsrm_tls_table = NULL. When this happened, the server abends when * tsrm_tls_table is accessed since it is NULL. */ if(tsrm_tls_table) { #endif /* 如果没传线程的id进来,就认默是问访后以线程 */ if (!th_id) { /* Fast path for looking up the resources for the current * thread. Its used by just about every call to * ts_resource_ex(). This avoids the need for a mutex lock * and our hashtable lookup. */ /* 速快获得线程的私有据数的指针,放在key里头 ,避免每次都去tsrm_tls_table里头找一遍,因为这个table是个哈希表,找起来还是蛮消费资源的*/ thread_resources = tsrm_tls_get(); if (thread_resources) { TSRM_ERROR((TSRM_ERROR_LEVEL_INFO, "Fetching resource id %d \ for current thread %d", id, (long) thread_resources->thread_id)); /* Read a specific resource from the thread's resources. * This is called outside of a mutex, so have to be aware about external * changes to the structure as we read it. */ TSRM_SAFE_RETURN_RSRC(thread_resources->storage, id, thread_resources->count); } thread_id = tsrm_thread_id(); } else { thread_id = *th_id; } /* 在线程的key里头没找着,因原是能可还没为这个线程配分资源呢!*/ TSRM_ERROR((TSRM_ERROR_LEVEL_INFO, "Fetching resource id %d for thread %ld", id, (long) thread_id)); /* 锁加 */ tsrm_mutex_lock(tsmm_mutex); hash_value = THREAD_HASH_OF(thread_id, tsrm_tls_table_size); thread_resources = tsrm_tls_table[hash_value]; if (!thread_resources) {/* 还没为这个线程配分资源,配分之~*/ allocate_new_resource(&tsrm_tls_table[hash_value], thread_id); return ts_resource_ex(id, &thread_id); } else {/* 在同一个hash值的单链里表,找之~ 如果这个链里表也没有,配分之~*/ do { if (thread_resources->thread_id == thread_id) { break; } if (thread_resources->next) { thread_resources = thread_resources->next; } else { allocate_new_resource(&thread_resources->next, thread_id); return ts_resource_ex(id, &thread_id); /* * thread_resources = thread_resources->next; * break; */ } } while (thread_resources); } tsrm_mutex_unlock(tsmm_mutex); /* Read a specific resource from the thread's resources. * This is called outside of a mutex, so have to be aware about external * changes to the structure as we read it. */ TSRM_SAFE_RETURN_RSRC(thread_resources->storage, id, thread_resources->count); #ifdef NETWARE } /* if(tsrm_tls_table) */ #endif } /* frees an interpreter context. You are responsible for making sure that * it is not linked into the TSRM hash, and not marked as the current interpreter */ /* 把一个线程的资源上下文释放掉,当然,必须保障它没用了,也就是即不是后以线程的上下文,也不是别的线程的上下文。 */ void tsrm_free_interpreter_context(void *context) { tsrm_tls_entry *next, *thread_resources = (tsrm_tls_entry*)context; int i; while (thread_resources) { next = thread_resources->next; for (i=0; i<thread_resources->count; i++) { if (resource_types_table[i].dtor) { resource_types_table[i].dtor(thread_resources->storage[i], &thread_resources->storage); } } for (i=0; i<thread_resources->count; i++) { free(thread_resources->storage[i]); } free(thread_resources->storage); free(thread_resources); thread_resources = next; } } /* 为后以线程设置一个新的上下文,并把老的上下文的指针回返 */ void *tsrm_set_interpreter_context(void *new_ctx) { tsrm_tls_entry *current; current = tsrm_tls_get(); /* TODO: unlink current from the global linked list, and replace it * it with the new context, protected by mutex where/if appropriate */ /* Set thread local storage to this new thread resources structure */ tsrm_tls_set(new_ctx); /* return old context, so caller can restore it when they're done */ return current; } /* 这个函数相当于用下面的函数生成一个新的上下文, * 然后后以线程切换回来原的上下文,回返这个新的上下文的指针,相当于拷贝了一份自己的孪生兄弟出来。 */ void *tsrm_new_interpreter_context(void) { tsrm_tls_entry *new_ctx, *current; THREAD_T thread_id; thread_id = tsrm_thread_id(); tsrm_mutex_lock(tsmm_mutex); current = tsrm_tls_get(); allocate_new_resource(&new_ctx, thread_id); /* switch back to the context that was in use prior to our creation * of the new one */ return tsrm_set_interpreter_context(current); } /* 把后以线程的资源都释放掉 */ void ts_free_thread(void) { tsrm_tls_entry *thread_resources; int i; THREAD_T thread_id = tsrm_thread_id(); int hash_value; tsrm_tls_entry *last=NULL; tsrm_mutex_lock(tsmm_mutex); hash_value = THREAD_HASH_OF(thread_id, tsrm_tls_table_size); thread_resources = tsrm_tls_table[hash_value]; while (thread_resources) { if (thread_resources->thread_id == thread_id) { for (i=0; i<thread_resources->count; i++) { if (resource_types_table[i].dtor) { resource_types_table[i].dtor(thread_resources->storage[i], &thread_resources->storage); } } for (i=0; i<thread_resources->count; i++) { free(thread_resources->storage[i]); } free(thread_resources->storage); if (last) { last->next = thread_resources->next; } else { tsrm_tls_table[hash_value] = thread_resources->next; } tsrm_tls_set(0); free(thread_resources); break; } if (thread_resources->next) { last = thread_resources; } thread_resources = thread_resources->next; } tsrm_mutex_unlock(tsmm_mutex); } /* 把除后以线程外的其它线程的资源都释放掉 */ void ts_free_worker_threads(void) { tsrm_tls_entry *thread_resources; int i; THREAD_T thread_id = tsrm_thread_id(); int hash_value; tsrm_tls_entry *last=NULL; tsrm_mutex_lock(tsmm_mutex); hash_value = THREAD_HASH_OF(thread_id, tsrm_tls_table_size); thread_resources = tsrm_tls_table[hash_value]; while (thread_resources) { if (thread_resources->thread_id != thread_id) { for (i=0; i<thread_resources->count; i++) { if (resource_types_table[i].dtor) { resource_types_table[i].dtor(thread_resources->storage[i], &thread_resources->storage); } } for (i=0; i<thread_resources->count; i++) { free(thread_resources->storage[i]); } free(thread_resources->storage); if (last) { last->next = thread_resources->next; } else { tsrm_tls_table[hash_value] = thread_resources->next; } free(thread_resources); if (last) { thread_resources = last->next; } else { thread_resources = tsrm_tls_table[hash_value]; } } else { if (thread_resources->next) { last = thread_resources; } thread_resources = thread_resources->next; } } tsrm_mutex_unlock(tsmm_mutex); } /* 干掉一个资源,并在全局资源表中标记为done */ void ts_free_id(ts_rsrc_id id) { int i; int j = TSRM_UNSHUFFLE_RSRC_ID(id); tsrm_mutex_lock(tsmm_mutex); TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Freeing resource id %d", id)); if (tsrm_tls_table) { for (i=0; i<tsrm_tls_table_size; i++) { tsrm_tls_entry *p = tsrm_tls_table[i]; while (p) { if (p->count > j && p->storage[j]) { if (resource_types_table && resource_types_table[j].dtor) { resource_types_table[j].dtor(p->storage[j], &p->storage); } free(p->storage[j]); p->storage[j] = NULL; } p = p->next; } } } resource_types_table[j].done = 1; tsrm_mutex_unlock(tsmm_mutex); TSRM_ERROR((TSRM_ERROR_LEVEL_CORE, "Successfully freed resource id %d", id)); }
