线程局部变量与 __thread
–五竹,20120/9/03
现在使用多线程开发越来越普遍, 为了提高性能,性能局部变量使用也非常普遍.如线程私有的成员变量,buffer等.
本文首先介绍线程局部变量的2 种使用方法:
1). 常规的使用方式: 相对复杂
2). __thread 修饰符: 使用简单,但容易不正确使用
最后介绍封装的线程局变量操作接口,来解决上述两种使用方法的不足.该方法主要参考了 ACL 库关于这块的实现.
1 常规的使用方式
1
2
3
4
5
6
7
8
9
|
#include <pthread.h>
int
pthread_once(pthread_once_t *once_control,
void
(*init_routine)(
void
));
pthread_once_t once_control = PTHREAD_ONCE_INIT;
int
pthread_key_create(pthread_key_t *key,
void
(*destructor)(
void
*));
void
*pthread_getspecific(pthread_key_t key);
int
pthread_setspecific(pthread_key_t key,
const
void
*value);
|
1). pthread_once 可以保证在整个进程空间init_routine函数仅被调用一次(它解决了多线程环境中使得互斥量和初始化代码都仅被初始化一次的问题)
2). pthread_key_create 的参数之一指一个析构函数指针,当某个线程终止时该析构函数将被调用,并用对于一个进程内的给定键,该函数只能被调用一次
3). pthread_sespecific 和 pthread_getspecific 用来存放和获取与一个键关联的值。
1.1 示例1:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
pthread_key_t key;
pthread_once_t once = PTHREAD_ONCE_INIT;
static
void
destructor(
void
*ptr)
{
free
(ptr);
}
void
init_once(
void
)
{
pthread_key_create(&key, destructor);
}
static
void
*get_buf(
void
)
{
pthread_once(&once, init_once);
if
((ptr = pthread_getspecific(key)) == NULL)
{
ptr =
malloc
(1024);
pthread_setspecific(key, ptr);
}
return
(ptr);
}
static
void
*thread_fn(
void
*arg)
{
char
*ptr = (
char
*) get_buf();
sprintf
(ptr,
"hello world"
);
printf
(
">>%s\n"
, ptr);
return
(NULL);
}
void
test(
void
)
{
int
i, n = 10;
pthread_t tids[10];
for
(i = 0; i < n; i++)
{
pthread_create(&tids[i], NULL, thread_fn, NULL);
}
for
(i = 0; i < n; i++)
{
pthread_join(&tids[i], NULL);
}
}
|
2 __thread 修饰符
The __thread specifier may be applied to any global, file-scoped static, function-scoped static, or static data member of a class. It may not be applied to block-scoped automatic or non-static data member. [参考文档3]
基本类型(如(unsigned) int,long, char,指针,c类型的结构体等 )可以采用用 __thread修饰符来定义线程局部变量.
示例如下:
1
2
3
|
__thread
int
i;
extern
__thread
struct
state s;
static
__thread
char
*p;
|
像 string 等类是不能直接用 __thread 修符的,只能用其指针类型的.如下面的示例是错误的.
1
|
__thread std::string thread_name;
|
下面是正确的:
1
|
__thread std::string * p_thread_name;
|
使用 __thread修饰符来定义一些类的线程局部变量,往往容易造成内存泄漏.
2.1 示例2:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <stdint.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <vector>
#include <string>
#ifdef USE_TLS
#include "Tls.h"
#endif
#define gettid() syscall(__NR_gettid)
using
namespace
std;
class
TlsLeakTest
{
public
:
TlsLeakTest();
~TlsLeakTest();
virtual
void
start();
bool
threadFun();
public
:
static
void
* threadEntry(
void
* arg);
#ifdef USE_TLS
static
void
releaseTls(
void
* ptr);
#endif
protected
:
static
__thread std::string * _vecArray;
uint32_t _threadNum;
std::vector<pthread_t> _vecThreadIds;
};
__thread string * TlsLeakTest::_vecArray=NULL;
TlsLeakTest::TlsLeakTest()
{
_threadNum=10;
}
TlsLeakTest::~TlsLeakTest()
{
}
void
TlsLeakTest::start()
{
pthread_t tid=0;
for
(uint32_t i=0; i < _threadNum; i++)
{
pthread_create(&tid, NULL, TlsLeakTest::threadEntry,
this
);
_vecThreadIds.push_back(tid);
}
//waiting for threads to finish
for
(uint32_t i=0; i<_threadNum; i++)
{
if
(_vecThreadIds[i] != 0)
{
pthread_join(_vecThreadIds[i], NULL);
_vecThreadIds[i]=0;
}
}
}
bool
TlsLeakTest::threadFun()
{
char
buff[128];
int64_t tid=gettid();
for
(int32_t i=0; i < 100 ; i++)
{
if
(NULL == _vecArray )
{
_vecArray=
new
string[100];
#ifdef USE_TLS
Tls::pthread_atexit_add((
void
*)_vecArray,&TlsLeakTest::releaseTls);
printf
(
"register TlsLeakTest::releaseTls(): tid=%ld\n"
,gettid());
#endif
}
sprintf
(buff,
"%ld:%d"
,tid,i);
_vecArray[i]=buff;
usleep(100000);
if
(99 == i)
{
printf
(
"tid=%ld _vecArray's addr=%p _vecArray[99]=%s\n"
,tid,_vecArray,_vecArray[99].c_str());
}
}
return
true
;
}
void
* TlsLeakTest::threadEntry(
void
* arg)
{
TlsLeakTest *test=(TlsLeakTest*)arg;
test->threadFun();
return
NULL;
}
#ifdef USE_TLS
void
TlsLeakTest::releaseTls(
void
* ptr)
{
std::string * vecArray=(std::string *)ptr;
if
(vecArray)
{
delete
[]vecArray;
vecArray=NULL;
printf
(
"TlsLeakTest::releaseTls(): tid=%ld\n"
,gettid());
}
}
#endif
int
main(
int
argc,
char
*argv[])
{
TlsLeakTest test;
test.start();
return
0;
}
|
1) 编译:
g++ -g -o t_tls_mem_leak t_tls_mem_leak.cpp -lpthread
2) 用 valgrind 检查内存泄漏
valgrind -v –leak-check=full –tool=memcheck ./t_tls_mem_leak
==27391==
==27391== HEAP SUMMARY:
==27391== in use at exit: 40,980 bytes in 1,010 blocks
==27391== total heap usage: 1,025 allocs, 15 frees, 44,268 bytes allocated
==27391==
==27391== Searching for pointers to 1,010 not-freed blocks
==27391== Checked 183,352 bytes
==27391==
==27391== 40,980 (8,080 direct, 32,900 indirect) bytes in 10 blocks are definitely lost in loss record 2 of 2
==27391== at 0x4A067A3: operator new[](unsigned long) (vg_replace_malloc.c:305)
==27391== by 0x400C75: TlsLeakTest::threadFun() (t_tls_mem_leak.cpp:72)
==27391== by 0x400E5E: TlsLeakTest::threadEntry(void*) (t_tls_mem_leak.cpp:96)
==27391== by 0x38632064A6: start_thread (pthread_create.c:297)
==27391== by 0x38626D3C2C: clone (in /lib64/libc-2.5.so)
==27391==
==27391== LEAK SUMMARY:
==27391== definitely lost: 8,080 bytes in 10 blocks
==27391== indirectly lost: 32,900 bytes in 1,000 blocks
==27391== possibly lost: 0 bytes in 0 blocks
==27391== still reachable: 0 bytes in 0 blocks
==27391== suppressed: 0 bytes in 0 blocks
==27391==
==27391== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4)
–27391–
–27391– used_suppression: 4 dl-hack3
==27391==
==27391== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4)
3 封装的线程局变量操作接口
于常规的使用方式,每个线程局部变量的创建与使用,都需要通过pthread_once, pthread_key_create, pthread_sespecific 和 pthread_getspecific 来操作,比较复杂,但不会有内存泄漏问题.
__thread 修饰符,使用简单,但往往容易不正确使用造成内存泄漏.
为解决上述两种方法的不足,ACL 库提供了一种线程局部变量操作接口.本文中提供的 Tls 类,是参考了 ACL 库的实现.
主要实现方法如下:
1) 定义一个通用的内存释放结构,成员包括用户自定义的释放函数和参数
1
2
3
4
5
|
typedef
struct
pthread_atexit
{
void
(*free_fn)(
void
*);
void
*arg;
}pthread_atexit_t;
|
2) 通过常规方法,声明一个std::list 类型的线程局部变量,并注册一个退出时的内存释放函数;该释放函数在线程退出时,首先遍列该 list,调用list 成员对象的 free_fn 函数来释放其他的线程局部变量的内存.然后释放自身的内存.
3) 提供一个 static int pthread_atexit_add(void *arg, void (*free_fn)(void *) ) 接口.当用户使次用 __thread 修饰符声明线程局部变量指针时,定义一个相应的释放函数,然后通过上述接口加到线程局部变量内存释放函数list.这样,在线程退出时,用调用该内存释放函数来释放内存.
3.1 Tls 类:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
//Tls.h
#ifndef __SAP_UTIL_TLS_H_
#define __SAP_UTIL_TLS_H_
#include <pthread.h>
class
Tls
{
public
:
static
int
pthread_atexit_add(
void
*arg,
void
(*free_fn)(
void
*) );
static
int
pthread_atexit_remove(
void
*arg,
void
(*free_fn)(
void
*) );
protected
:
static
void
pthread_atexit_done(
void
*arg);
static
void
pthread_atexit_init(
void
);
protected
:
static
pthread_key_t _pthread_atexit_key;
static
pthread_once_t _pthread_atexit_control_once;
};
#endif
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
// Tls.cpp
#include <sys/syscall.h>
#include <list>
#include "Tls.h"
using
namespace
std;
#define gettid() syscall(__NR_gettid)
#define TLS_OUT_OF_INDEXES 0xffffffff
typedef
struct
pthread_atexit
{
void
(*free_fn)(
void
*);
void
*arg;
}pthread_atexit_t;
typedef
std::list<pthread_atexit_t *> TlsList;
pthread_key_t Tls::_pthread_atexit_key = TLS_OUT_OF_INDEXES;
pthread_once_t Tls::_pthread_atexit_control_once = PTHREAD_ONCE_INIT;
void
Tls::pthread_atexit_done(
void
*arg)
{
TlsList *id_list = (TlsList*) arg;
pthread_atexit_t *id_ptr=NULL;
printf
(
"invoke Tls::pthread_atexit_done(): tid=%ld\n"
,gettid());
for
(TlsList::iterator iter=id_list->begin(); iter !=id_list->end(); ++iter)
{
id_ptr = *iter;
if
(id_ptr == NULL)
continue
;
if
(id_ptr->free_fn)
id_ptr->free_fn(id_ptr->arg);
delete
id_ptr;
}
delete
id_list;
}
void
Tls::pthread_atexit_init(
void
)
{
pthread_key_create(&_pthread_atexit_key, pthread_atexit_done);
}
int
Tls::pthread_atexit_add(
void
*arg,
void
(*free_fn)(
void
*))
{
const
char
*myname =
"pthread_atexit_add"
;
pthread_atexit_t *id;
TlsList *id_list;
if
(arg == NULL)
{
return
0;
}
pthread_once(&_pthread_atexit_control_once, pthread_atexit_init);
if
(_pthread_atexit_key == (pthread_key_t) TLS_OUT_OF_INDEXES)
{
printf
(
"%s(%d): _pthread_atexit_key(%d) invalid\n"
,
myname, __LINE__, _pthread_atexit_key);
return
(-1);
}
id =
new
pthread_atexit_t;
if
(id == NULL)
{
printf
(
"%s(%d): new pthread_atexit_t error\n"
, myname, __LINE__);
return
-1;
}
id->free_fn = free_fn;
id->arg = arg;
id_list = (TlsList*) pthread_getspecific(_pthread_atexit_key);
if
(id_list == NULL)
{
id_list =
new
TlsList();
if
(pthread_setspecific(_pthread_atexit_key, id_list) != 0)
{
printf
(
"%s(%d): pthread_setspecific error, key(%d)\n"
,
myname, __LINE__, _pthread_atexit_key);
return
-1;
}
}
id_list->push_back(id);
return
0;
}
int
Tls::pthread_atexit_remove(
void
*arg,
void
(*free_fn)(
void
*))
{
const
char
*myname =
"pthread_atexit_remove"
;
TlsList *id_list;
if
(arg == NULL)
{
return
(-1);
}
if
(_pthread_atexit_key == (pthread_key_t) TLS_OUT_OF_INDEXES)
{
printf
(
"%s(%d): _pthread_atexit_key(%d) invalid\n"
,myname, __LINE__, _pthread_atexit_key);
return
(-1);
}
id_list = (TlsList*) pthread_getspecific(_pthread_atexit_key);
if
(id_list == NULL)
{
printf
(
"%s(%d): _pthread_atexit_key(%d) no exist in tid(%lu)\n"
,
myname, __LINE__, _pthread_atexit_key,(unsigned
long
) pthread_self());
return
(-1);
}
pthread_atexit_t *id_ptr =NULL;
TlsList::iterator iter=id_list->begin();
for
(; iter !=id_list->end(); ++iter)
{
id_ptr = *iter;
if
(id_ptr == NULL)
continue
;
if
(id_ptr->free_fn == free_fn && id_ptr->arg == arg)
{
break
;
}
}
if
(id_ptr != NULL)
{
id_list->erase(iter);
delete
id_ptr;
}
return
(0);
}
|
使用方法:
1) #include “Tls.h”
2) 用 __thread 声明某类型的线程局部变量指针
3) 定义该线程局部变量的内存释放函数
4) 第一次使用该线程局部变量时,分配内存并调用pthread_atexit_add注册内存释放函数
3.2 示例3
示例3 的代码同示例2,主要增加了 USE_TLS 宏定义部分代码。
1) 编译
g++ -g -o t_tls_mem_leak t_tls_mem_leak.cpp Tls.cpp -lpthread -DUSE_TLS
2) 用 valgrind 检查内存泄漏
–21825– REDIR: 0x38626726f0 (free) redirected to 0x4a05d7d (free)
==21825==
==21825== HEAP SUMMARY:
==21825== in use at exit: 0 bytes in 0 blocks
==21825== total heap usage: 1,055 allocs, 1,055 frees, 44,828 bytes allocated
==21825==
==21825== All heap blocks were freed — no leaks are possible
==21825==
==21825== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)
–21825–
–21825– used_suppression: 4 dl-hack3
==21825==
==21825== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)
4 参考文档:
1) 线程局部变量的使用与多线程开发: http://developer.51cto.com/art/200909/153297.htm
2) 再谈线程局部变量 : http://zsxxsz.iteye.com/blog/548903
3) http://gcc.gnu.org/onlinedocs/gcc-4.3.2/gcc/Thread_002dLocal.html