[zz]线程局部变量与 __thread

转载自 http://www.searchtb.com/2012/09/tls.html

 

现在使用多线程开发越来越普遍, 为了提高性能,性能局部变量使用也非常普遍.如线程私有的成员变量,buffer等.

本文首先介绍线程局部变量的2 种使用方法:
1). 常规的使用方式: 相对复杂
2). __thread 修饰符: 使用简单,但容易不正确使用

最后介绍封装的线程局变量操作接口,来解决上述两种使用方法的不足.该方法主要参考了 ACL 库关于这块的实现.

1 常规的使用方式

1
2
3
4
5
6
7
8
9
#include <pthread.h>
 
int pthread_once(pthread_once_t *once_control, void (*init_routine)( void ));
pthread_once_t once_control = PTHREAD_ONCE_INIT;
 
int pthread_key_create(pthread_key_t *key, void (*destructor)( void *));
 
void *pthread_getspecific(pthread_key_t key);
int pthread_setspecific(pthread_key_t key, const void *value);

1). pthread_once 可以保证在整个进程空间init_routine函数仅被调用一次(它解决了多线程环境中使得互斥量和初始化代码都仅被初始化一次的问题)
2). pthread_key_create 的参数之一指一个析构函数指针,当某个线程终止时该析构函数将被调用,并用对于一个进程内的给定键,该函数只能被调用一次
3). pthread_sespecific 和 pthread_getspecific 用来存放和获取与一个键关联的值。

1.1 示例1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
pthread_key_t key;
pthread_once_t once = PTHREAD_ONCE_INIT; 
 
static void destructor( void *ptr)
{
     free (ptr);
 
void init_once( void )
{
     pthread_key_create(&key, destructor);
 
static void *get_buf( void )
{
     pthread_once(&once, init_once); 
 
     if ((ptr = pthread_getspecific(key)) == NULL)
     {
         ptr = malloc (1024);
         pthread_setspecific(key, ptr);
     }
     return (ptr);
 
static void *thread_fn( void *arg)
{
     char *ptr = ( char *) get_buf(); 
 
     sprintf (ptr, "hello world" );
     printf ( ">>%s\n" , ptr);
     return (NULL);
 
void test( void )
{
     int   i, n = 10;
     pthread_t tids[10]; 
 
     for (i = 0; i < n; i++)
     {
         pthread_create(&tids[i], NULL, thread_fn, NULL);
    
 
     for (i = 0; i < n; i++)
     {
         pthread_join(&tids[i], NULL);
     }
}

2 __thread 修饰符
The __thread specifier may be applied to any global, file-scoped static, function-scoped static, or static data member of a class. It may not be applied to block-scoped automatic or non-static data member. [参考文档3]

基本类型(如(unsigned) int,long, char,指针,c类型的结构体等 )可以采用用 __thread修饰符来定义线程局部变量.
示例如下:

1
2
3
__thread int i;
extern __thread struct state s;
static __thread char *p;

像 string 等类是不能直接用 __thread 修符的,只能用其指针类型的.如下面的示例是错误的.

1
__thread std::string    thread_name;

下面是正确的:

1
__thread std::string *  p_thread_name;

使用 __thread修饰符来定义一些类的线程局部变量,往往容易造成内存泄漏.
2.1 示例2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <stdint.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <vector>
#include <string>
 
#ifdef USE_TLS
#include "Tls.h"
#endif
 
#define gettid() syscall(__NR_gettid)
using namespace std;
 
class TlsLeakTest
{
public :
     TlsLeakTest();
     ~TlsLeakTest();
     virtual void start();
     bool threadFun();
public :
     static void * threadEntry( void * arg);
#ifdef USE_TLS
     static void releaseTls( void * ptr);
#endif
protected :
     static __thread std::string *   _vecArray;
     uint32_t                    _threadNum;
     std::vector<pthread_t>        _vecThreadIds;
 
};
 
__thread string *   TlsLeakTest::_vecArray=NULL;
 
TlsLeakTest::TlsLeakTest()
{
     _threadNum=10;
}
TlsLeakTest::~TlsLeakTest()
{
}
void TlsLeakTest::start()
{
     pthread_t tid=0;
     for (uint32_t i=0; i < _threadNum; i++)
     {
          pthread_create(&tid, NULL, TlsLeakTest::threadEntry, this );
         _vecThreadIds.push_back(tid);
     }
     //waiting for threads to finish
     for (uint32_t i=0; i<_threadNum; i++)
     {
         if (_vecThreadIds[i] != 0)
         {
             pthread_join(_vecThreadIds[i], NULL);
             _vecThreadIds[i]=0;
         }
     }
}
bool TlsLeakTest::threadFun()
{
     char buff[128];
     int64_t tid=gettid();
     for (int32_t i=0; i < 100 ; i++)
     {
         if (NULL == _vecArray )
         {
             _vecArray= new string[100];
#ifdef USE_TLS
             Tls::pthread_atexit_add(( void *)_vecArray,&TlsLeakTest::releaseTls);
             printf ( "register TlsLeakTest::releaseTls(): tid=%ld\n" ,gettid());
#endif
         }
         sprintf (buff, "%ld:%d" ,tid,i);
         _vecArray[i]=buff;
         usleep(100000);
         if (99 == i)
         {
             printf ( "tid=%ld _vecArray's addr=%p _vecArray[99]=%s\n"
                                 ,tid,_vecArray,_vecArray[99].c_str());
         }
     }
 
     return true ;
 
}
 
void * TlsLeakTest::threadEntry( void * arg)
{
     TlsLeakTest *test=(TlsLeakTest*)arg;
     test->threadFun();
     return NULL;
}
 
#ifdef USE_TLS
void  TlsLeakTest::releaseTls( void * ptr)
{
     std::string * vecArray=(std::string *)ptr;
     if (vecArray)
     {
         delete []vecArray;
         vecArray=NULL;
         printf ( "TlsLeakTest::releaseTls(): tid=%ld\n" ,gettid());
     }
}
#endif
 
int main( int argc, char *argv[])
{
     TlsLeakTest test;
     test.start();
     return 0;
}

1) 编译:
g++ -g -o t_tls_mem_leak t_tls_mem_leak.cpp -lpthread

2) 用 valgrind 检查内存泄漏

valgrind -v –leak-check=full –tool=memcheck ./t_tls_mem_leak

==27391==
==27391== HEAP SUMMARY:
==27391== in use at exit: 40,980 bytes in 1,010 blocks
==27391== total heap usage: 1,025 allocs, 15 frees, 44,268 bytes allocated
==27391==
==27391== Searching for pointers to 1,010 not-freed blocks
==27391== Checked 183,352 bytes
==27391==
==27391== 40,980 (8,080 direct, 32,900 indirect) bytes in 10 blocks are definitely lost in loss record 2 of 2
==27391== at 0x4A067A3: operator new[](unsigned long) (vg_replace_malloc.c:305)
==27391== by 0x400C75: TlsLeakTest::threadFun() (t_tls_mem_leak.cpp:72)
==27391== by 0x400E5E: TlsLeakTest::threadEntry(void*) (t_tls_mem_leak.cpp:96)
==27391== by 0x38632064A6: start_thread (pthread_create.c:297)
==27391== by 0x38626D3C2C: clone (in /lib64/libc-2.5.so)
==27391==
==27391== LEAK SUMMARY:
==27391== definitely lost: 8,080 bytes in 10 blocks
==27391== indirectly lost: 32,900 bytes in 1,000 blocks
==27391== possibly lost: 0 bytes in 0 blocks
==27391== still reachable: 0 bytes in 0 blocks
==27391== suppressed: 0 bytes in 0 blocks
==27391==
==27391== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4)
–27391–
–27391– used_suppression: 4 dl-hack3
==27391==
==27391== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4)

3 封装的线程局变量操作接口

于常规的使用方式,每个线程局部变量的创建与使用,都需要通过pthread_once, pthread_key_create, pthread_sespecific 和 pthread_getspecific 来操作,比较复杂,但不会有内存泄漏问题.
__thread 修饰符,使用简单,但往往容易不正确使用造成内存泄漏.
为解决上述两种方法的不足,ACL 库提供了一种线程局部变量操作接口.本文中提供的 Tls 类,是参考了 ACL 库的实现.
主要实现方法如下:
1) 定义一个通用的内存释放结构,成员包括用户自定义的释放函数和参数

1
2
3
4
5
typedef struct pthread_atexit
{
     void   (*free_fn)( void *);
     void   *arg;
}pthread_atexit_t;

2) 通过常规方法,声明一个std::list <pthread_atexit_t *="">类型的线程局部变量,并注册一个退出时的内存释放函数;该释放函数在线程退出时,首先遍列该 list,调用list 成员对象的 free_fn 函数来释放其他的线程局部变量的内存.然后释放自身的内存.

3) 提供一个 static int pthread_atexit_add(void *arg, void (*free_fn)(void *) ) 接口.当用户使次用 __thread 修饰符声明线程局部变量指针时,定义一个相应的释放函数,然后通过上述接口加到线程局部变量内存释放函数list.这样,在线程退出时,用调用该内存释放函数来释放内存.

3.1 Tls 类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//Tls.h
#ifndef __SAP_UTIL_TLS_H_
#define __SAP_UTIL_TLS_H_
#include <pthread.h>
 
class Tls
{
public :
     static int pthread_atexit_add( void *arg, void (*free_fn)( void *) );
     static int pthread_atexit_remove( void *arg, void (*free_fn)( void *) );
protected :
     static void pthread_atexit_done( void *arg);
     static void pthread_atexit_init( void );
protected :
     static pthread_key_t    _pthread_atexit_key;
     static pthread_once_t   _pthread_atexit_control_once;
};
 
#endif
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Tls.cpp
#include <sys/syscall.h>
#include <list>
#include "Tls.h"
 
using namespace std;
 
#define gettid() syscall(__NR_gettid)
#define TLS_OUT_OF_INDEXES          0xffffffff
 
typedef struct pthread_atexit
{
     void   (*free_fn)( void *);
     void   *arg;
}pthread_atexit_t;
 
typedef std::list<pthread_atexit_t *> TlsList;
 
pthread_key_t   Tls::_pthread_atexit_key = TLS_OUT_OF_INDEXES;
pthread_once_t  Tls::_pthread_atexit_control_once = PTHREAD_ONCE_INIT;
 
void Tls::pthread_atexit_done( void *arg)
{
     TlsList *id_list = (TlsList*) arg;
     pthread_atexit_t *id_ptr=NULL;
     printf ( "invoke Tls::pthread_atexit_done(): tid=%ld\n" ,gettid());
     for (TlsList::iterator iter=id_list->begin(); iter !=id_list->end(); ++iter)
     {
         id_ptr = *iter;
         if (id_ptr == NULL)
             continue ;
         if (id_ptr->free_fn)
             id_ptr->free_fn(id_ptr->arg);
         delete id_ptr;
     }
     delete id_list;
}
 
void Tls::pthread_atexit_init( void )
{
     pthread_key_create(&_pthread_atexit_key, pthread_atexit_done);
}
 
int Tls::pthread_atexit_add( void *arg, void (*free_fn)( void *))
{
     const char *myname = "pthread_atexit_add" ;
     pthread_atexit_t *id;
     TlsList *id_list;
 
     if (arg == NULL)
     {
         return 0;
     }
     pthread_once(&_pthread_atexit_control_once, pthread_atexit_init);
     if (_pthread_atexit_key == (pthread_key_t) TLS_OUT_OF_INDEXES)
     {
         printf ( "%s(%d): _pthread_atexit_key(%d) invalid\n" ,
                 myname, __LINE__, _pthread_atexit_key);
         return (-1);
     }
 
     id = new pthread_atexit_t;
     if (id == NULL)
     {
         printf ( "%s(%d): new pthread_atexit_t error\n" , myname, __LINE__);
         return -1;
     }
     id->free_fn = free_fn;
     id->arg = arg;
 
     id_list = (TlsList*) pthread_getspecific(_pthread_atexit_key);
     if (id_list == NULL)
     {
         id_list = new TlsList();
         if (pthread_setspecific(_pthread_atexit_key, id_list) != 0)
         {
             printf ( "%s(%d): pthread_setspecific error, key(%d)\n" ,
                     myname, __LINE__, _pthread_atexit_key);
             return -1;
         }
     }
     id_list->push_back(id);
     return 0;
}
 
int Tls::pthread_atexit_remove( void *arg, void (*free_fn)( void *))
{
     const char *myname = "pthread_atexit_remove" ;
     TlsList *id_list;
 
     if (arg == NULL)
     {
         return (-1);
     }
     if (_pthread_atexit_key == (pthread_key_t) TLS_OUT_OF_INDEXES)
     {
         printf ( "%s(%d): _pthread_atexit_key(%d)  invalid\n" ,myname, __LINE__, _pthread_atexit_key);
         return (-1);
     }
     id_list = (TlsList*) pthread_getspecific(_pthread_atexit_key);
     if (id_list == NULL)
     {
         printf ( "%s(%d): _pthread_atexit_key(%d) no exist in tid(%lu)\n" ,
             myname, __LINE__, _pthread_atexit_key,(unsigned long ) pthread_self());
         return (-1);
     }
     pthread_atexit_t *id_ptr =NULL;
     TlsList::iterator iter=id_list->begin();
     for (; iter !=id_list->end(); ++iter)
     {
         id_ptr = *iter;
         if (id_ptr == NULL)
             continue ;
         if (id_ptr->free_fn == free_fn && id_ptr->arg == arg)
         {
             break ;
         }
     }
     if (id_ptr != NULL)
     {
         id_list->erase(iter);
         delete id_ptr;
     }
     return (0);
}

使用方法:
1) #include “Tls.h”
2) 用 __thread 声明某类型的线程局部变量指针
3) 定义该线程局部变量的内存释放函数
4) 第一次使用该线程局部变量时,分配内存并调用pthread_atexit_add注册内存释放函数

3.2 示例3 
示例3 的代码同示例2,主要增加了 USE_TLS 宏定义部分代码。

1) 编译
g++ -g -o t_tls_mem_leak t_tls_mem_leak.cpp Tls.cpp -lpthread -DUSE_TLS

2) 用 valgrind 检查内存泄漏
–21825– REDIR: 0x38626726f0 (free) redirected to 0x4a05d7d (free)
==21825==
==21825== HEAP SUMMARY:
==21825== in use at exit: 0 bytes in 0 blocks
==21825== total heap usage: 1,055 allocs, 1,055 frees, 44,828 bytes allocated
==21825==
==21825== All heap blocks were freed — no leaks are possible
==21825==
==21825== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)
–21825–
–21825– used_suppression: 4 dl-hack3
==21825==
==21825== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)

4 参考文档:
1) 线程局部变量的使用与多线程开发: http://developer.51cto.com/art/200909/153297.htm

2) 再谈线程局部变量 : http://zsxxsz.iteye.com/blog/548903

3) http://gcc.gnu.org/onlinedocs/gcc-4.3.2/gcc/Thread_002dLocal.html

你可能感兴趣的:(thread)