这篇文章主要分析的是360开源的pink高性能网络框架,会列出一些主要技术实现点,其中用到了slash里的部分基础代码,建议clone到本地一起分析,看这个主要是为了分析后面的floyd代码,即raft工程实现(这里也关联了rocksdb,顺便看下leveldb的整体框架及实现原理)。
前几天看到了Pebble分布式框架,看了下介绍,感觉也蛮有意思的,我只分析了下它的线程池实现,也会在这里一并列出;以前也写过多线程开发中的一些坑及注意点,可能并不完善,后续也会慢慢补上。
现在看下pink中的线程相关的代码,然后结合网络模块,整个框架的实现(这里有部分和muduo书上的设计原理类似,那书上介绍了些多线程还是多进程的一些参考点,以及是main thread + multi worker thread还是每个线程一个eventloop等类似的描述),就不一一列出了,并且关于C++11甚至更高的知识点可以查下资料。
一般多线程开发中,必不可少的几个关键点:互斥锁,条件变量,队列(是否阻塞实现),任务(封装成struct或函数对象类似的bind/function),线程间通信等,先分析下多线程框架;
pink中的部分代码:
17 class Thread {
18 public:
19 Thread();
20 virtual ~Thread();
53 private:
54 static void* RunThread(void* arg);
55 virtual void *ThreadMain() = 0;
56
57 slash::Mutex running_mu_;
58 bool running_;
59 pthread_t thread_id_;
60 std::string thread_name_;
61
62 /*
63 * No allowed copy and copy assign
64 */
65 Thread(const Thread&);
66 void operator=(const Thread&);
67 };
22 void* Thread::RunThread(void *arg) {
23 Thread* thread = reinterpret_cast(arg);
24 if (!(thread->thread_name().empty())) {
25 SetThreadName(pthread_self(), thread->thread_name());
26 }
27 thread->ThreadMain();
28 return nullptr;
29 }
30
31 int Thread::StartThread() {
32 slash::MutexLock l(&running_mu_);
33 should_stop_ = false;
34 if (!running_) {
35 running_ = true;
36 return pthread_create(&thread_id_, nullptr, RunThread, (void *)this);
37 }
38 return 0;
39 }
RunThread为线程的入口函数,ThreadMain具体做逻辑,由派生类实现;
running_mu_成员比较奇怪,第一次看见这么设计,不知是不是控制在StartThread的时候,防止StopThread或相反顺序,因为在一些介绍中,多线程+C++对象,会出现很难处理的情况,比如如何保证在一个线程处理一个对象的时候,此时对象已经析构一半的情况等问题?如果能保证编码的规范,感觉可省略;其他几个成员变量不是很重要;
以下是ServerThread部分实现:
110 class ServerThread : public Thread {
111 public:
112 ServerThread(int port, int cron_interval, const ServerHandle *handle);
155 protected:
156 /*
157 * The Epoll event handler
158 */
159 PinkEpoll *pink_epoll_;
161 private:
178 * The tcp server port and address
179 */
180 int port_;
181 std::set ips_;
182 std::vector server_sockets_;
183 std::set server_fds_;
184
185 virtual int InitHandle();
186 virtual void *ThreadMain() override;
187 /*
188 * The server event handle
189 */
190 virtual void HandleConnEvent(PinkFiredEvent *pfe) = 0;
191 };
ServerThread类是Thread具体线程对象,服务端的工作线程,void *ServerThread::ThreadMain() 功能比较简单,就是listen,accept请求,并分发给工作线程HandleNewConn(connfd, ip_port),由派生类实现;
如何结合pink多线程框架实现高性能网络框架,这里不再详细分析tcp/ip中的一些套接字接口,比如socket/bind/listen/accept,不再分析select/epoll/pipe等原理及优缺点,这些都可以从apue等书籍/网络中获取;当然重要的如listen/accept/epoll实现原理做的什么工作,如果能明白的话,也是一种提升;
一些基本信息如下:
15 class PinkItem {
30 private:
31 int fd_;
32 std::string ip_port_;
33 };
这个PinkItem用于描述连接对象信息;class PinkEpoll 封装了epoll模型,并且有事件时用struct PinkFiredEvent表示;class PinkConn用于表示一个连接,且里面有些状态,以及所属工作线程;class ConnFactory用于每个连接,创建PinkConn的工厂函数;
以下是工作线程部分实现:
35 class WorkerThread : public Thread {
36 public:
55 std::queue conn_queue_;
71 std::map conns_;
75 private:
76 ServerThread* server_thread_;
77 ConnFactory *conn_factory_;
83 int notify_receive_fd_;
84 int notify_send_fd_;
89 PinkEpoll *pink_epoll_;
102 }
17 WorkerThread::WorkerThread(ConnFactory *conn_factory,
18 ServerThread* server_thread,
19 int cron_interval)
20 : private_data_(nullptr),
21 server_thread_(server_thread),
22 conn_factory_(conn_factory),
23 cron_interval_(cron_interval),
24 keepalive_timeout_(kDefaultKeepAliveTime) {
25 /*
26 * install the protobuf handler here
27 */
28 pink_epoll_ = new PinkEpoll();
29 int fds[2];
30 if (pipe(fds)) {
31 exit(-1);
32 }
33 notify_receive_fd_ = fds[0];
34 notify_send_fd_ = fds[1];
35 pink_epoll_->PinkAddEvent(notify_receive_fd_, EPOLLIN | EPOLLERR | EPOLLHUP);
36 }
这里每个工作线程创建一个PinkEpoll,并创建了pipe管道用于主线程和工作线程通信(dpdk中的多线程框架也是这么干的),conn_queue_用于主线程接收到创建连接请求,然后把套接字和端口塞进去,在epoll_wait后工作线程可读事件,由工作线程取出创建PinkConn,conn_queue_这个在工作线程取的时候,没加互斥锁的保护,而是由可读事件来驱动,在主线程中是用了工作线程的互斥锁,没问题的;
重要的主函数部分如下:
73 void *WorkerThread::ThreadMain() {
86 int timeout = cron_interval_;
87 if (timeout <= 0) {
88 timeout = PINK_CRON_INTERVAL;
89 }
90
91 while (!should_stop()) {
92 if (cron_interval_ > 0) {
93 gettimeofday(&now, NULL);
94 if (when.tv_sec > now.tv_sec ||
95 (when.tv_sec == now.tv_sec && when.tv_usec > now.tv_usec)) {
96 timeout = (when.tv_sec - now.tv_sec) * 1000 +
97 (when.tv_usec - now.tv_usec) / 1000;
98 } else {
99 DoCronTask();
100 when.tv_sec = now.tv_sec + (cron_interval_ / 1000);
101 when.tv_usec = now.tv_usec + ((cron_interval_ % 1000) * 1000);
102 timeout = cron_interval_;
103 }
104 }
105
106 nfds = pink_epoll_->PinkPoll(timeout);
以上算出超时时间,如果超时了做DoCronTask(目前功能比较简单,就关闭连接),然后调用PinkPoll,其中PinkPoll实现是基于epoll的epoll_wait等待事件返回;
67 int PinkEpoll::PinkPoll(const int timeout) {
68 int retval, numevents = 0;
69 retval = epoll_wait(epfd_, events_, PINK_MAX_CLIENTS, timeout);
70 if (retval > 0) {
71 numevents = retval;
72 for (int i = 0; i < numevents; i++) {
73 int mask = 0;
74 firedevent_[i].fd = (events_ + i)->data.fd;
75
76 if ((events_ + i)->events & EPOLLIN) {
77 mask |= EPOLLIN;
78 }
79 if ((events_ + i)->events & EPOLLOUT) {
80 mask |= EPOLLOUT;
81 }
82 if ((events_ + i)->events & EPOLLERR) {
83 mask |= EPOLLERR;
84 }
85 if ((events_ + i)->events & EPOLLHUP) {
86 mask |= EPOLLHUP;
87 }
88 firedevent_[i].mask = mask;
89 }
90 }
91 return numevents;
92 }
108 for (int i = 0; i < nfds; i++) {
109 pfe = (pink_epoll_->firedevent()) + i;
110 if (pfe->fd == notify_receive_fd_) {
111 if (pfe->mask & EPOLLIN) {
112 read(notify_receive_fd_, bb, 1);
113 {
114 slash::MutexLock l(&mutex_);
115 ti = conn_queue_.front();
116 conn_queue_.pop();
117 }
118 PinkConn *tc = conn_factory_->NewPinkConn(
119 ti.fd(), ti.ip_port(),
120 server_thread_, private_data_);
121 if (!tc || !tc->SetNonblock()) {
122 delete tc;
123 continue;
124 }
epoll_wait返回后,依次处理事件,如果是notify_receive_fd_(主线程告诉工作线程有新连接)并且是in事件,则表示有新的连接进来,创建PinkConn并设置非阻塞;并对这个fd增加in事件;
剩下的代码则是处理连接的读写事件,一部分代码如下:
158 if (pfe->mask & EPOLLOUT && in_conn->is_reply()) {
159 WriteStatus write_status = in_conn->SendReply();
160 in_conn->set_last_interaction(now);
161 if (write_status == kWriteAll) {
162 pink_epoll_->PinkModEvent(pfe->fd, 0, EPOLLIN); // Remove EPOLLOUT
163 in_conn->set_is_reply(false);
164 } else if (write_status == kWriteHalf) {
165 pink_epoll_->PinkModEvent(pfe->fd, EPOLLIN, EPOLLOUT);
166 continue; // send all write buffer,
167 // in case of next GetRequest()
168 // pollute the write buffer
169 } else if (write_status == kWriteError) {
170 should_close = 1;
171 }
172 }
如果是可写,write_status看有没有都写完,并设置相关的事件mask值;还有处理可读事件等就不分析了;关闭一个连接只在发生错误时关闭,还有一种是超时关闭;
242 if (keepalive_timeout_ > 0 &&
243 (now.tv_sec - conn->last_interaction().tv_sec > keepalive_timeout_))
以下是主线程部分实现:
24 class DispatchThread : public ServerThread {
25 public:
60 void HandleNewConn(const int connfd, const std::string& ip_port) override;
61 private:
62 /*
63 * Here we used auto poll to find the next work thread,
64 * last_thread_ is the last work thread
65 */
66 int last_thread_;
67 int work_num_;
68 /*
69 * This is the work threads
70 */
71 WorkerThread** worker_thread_;
72 int queue_limit_;
73 std::map localdata_;
82 };
DispatchThread的构造函数的工作就是初始化一些参数,并创建指定个数的工作线程,并初始化;重点看下主线程如何与工作线程通信的,即HandleNewConn;
152 void DispatchThread::HandleNewConn(
153 const int connfd, const std::string& ip_port) {
154 // Slow workers may consume many fds.
155 // We simply loop to find next legal worker.
156 PinkItem ti(connfd, ip_port);
157 int next_thread = last_thread_;
158 bool find = false;
159 for (int cnt = 0; cnt < work_num_; cnt++) {
160 {
161 slash::MutexLock l(&worker_thread_[next_thread]->mutex_);
162 std::queue *q = &(worker_thread_[next_thread]->conn_queue_);
163 if (q->size() < static_cast(queue_limit_)) {
164 q->push(ti);
165 find = true;
166 break;
167 }
168 }
169 next_thread = (next_thread + 1) % work_num_;
170 }
171
172 if (find) {
173 write(worker_thread_[next_thread]->notify_send_fd(), "", 1);
174 last_thread_ = (next_thread + 1) % work_num_;
175 log_info("find worker(%d), refresh the last_thread_ to %d",
176 next_thread, last_thread_);
177 } else {
178 log_info("all workers are full, queue limit is %d", queue_limit_);
179 // every worker is full
180 // TODO(anan) maybe add log
181 close(connfd);
182 }
183 }
首先轮询工作线程,如果某个工作线程的处理连接数少的话,则把这个新的连接交由它处理;找到一个工作线程后,就写一个空白符到pipe中去,相应的工作线程发生可读事件,唤醒后,获取连接信息创建连接对象等,并增加索引等有新连接来时分发给对应索引的工作线和,如果工作线程都忙的话则关闭socket连接;有些辅助的代码就不分析了;
整个框架工作流程如下:
创建多个工作线程,不直接监听,由主线程listen/accept,然后交由工作线程处理读写事件,并处理超时等;整个框架是比较简单的,然后看GitHub上的性能测试数据:
机器配置:
CPU, Intel(R) Xeon(R) CPU E5-2630 0 @ 2.30GHz, 24 cores
Memory, 142 GB
数据:client count = 100, qps = 476000左右
client count = 200, qps = 692000左右
client count = 400, qps = 917000左右
client count = 800, qps = 931000左右
顺便贴一下Pebble中的多线程框架(部分代码),其实差不多。
23 class Thread
24 {
25 public:
26 Thread();
27 virtual ~Thread();
28
29 virtual void Run() = 0;
30
31 bool Start();
32
33 bool Join();
34
35 void Exit();
36 private:
37 ::pthread_t m_thread_id;
38 };
25 static void* ThreadEntry(void* arg) {
26 Thread* thread = reinterpret_cast(arg);
27
28 thread->Run();
29
30 return NULL;
31 }
32
33 bool Thread::Start() {
34 if (::pthread_create(&m_thread_id, NULL, ThreadEntry, this) != 0) {
35 return false;
36 }
37
38 return true;
39 }
36 class ThreadPool {
37 public:
92 private:
93 struct Task {
94 public:
95 cxx::function fun;
96 int64_t task_id;
97 };
98
99 class InnerThread : public Thread {
100 public:
101 InnerThread(BlockingQueue* pending_queue,
102 BlockingQueue* finished_queue, BlockingQueue* working_queue);
103
104 virtual void Run();
105 void Terminate(bool waiting = true);
106 private:
107 BlockingQueue* m_pending_queue;
108 BlockingQueue* m_finished_queue;
109 BlockingQueue* m_working_queue;
110 bool m_exit;
111 bool m_waiting;
112 };
113
114 std::vector m_threads;
115 BlockingQueue m_pending_queue;
116 BlockingQueue m_finished_queue;
117 BlockingQueue m_working_queue;
118 bool m_exit;
119 bool m_initialized;
120 uint32_t m_thread_num;
121 int32_t m_mode;
122 };
62 int ThreadPool::AddTask(cxx::function& fun, int64_t task_id) {
63 if (!m_initialized) {
64 return -1;
65 }
66
67 if (m_mode == NO_PENDING) {
68 Stats stat;
69 GetStatus(&stat);
70 if (stat.working_thread_num + stat.pending_task_num >= m_thread_num) {
71 return -2;
72 }
73 }
74
75 Task t;
76 t.fun = fun;
77 t.task_id = task_id;
78
79 m_pending_queue.PushBack(t);
80
81 return 0;
82 }
117 ThreadPool::InnerThread::InnerThread(BlockingQueue* pending_queue,
118 BlockingQueue* finished_queue, BlockingQueue* working_queue) :
119 m_pending_queue(pending_queue),
120 m_finished_queue(finished_queue),
121 m_working_queue(working_queue),
122 m_exit(false),
123 m_waiting(true) {
124 }
125
126 void ThreadPool::InnerThread::Run() {
127 while (1) {
128
129 if (m_exit && ((!m_waiting) || (m_waiting && m_pending_queue->IsEmpty()))) {
130 break;
131 }
132
133 Task t;
134 bool ret = m_pending_queue->TimedPopFront(&t, 1000);
135 if (ret) {
136 m_working_queue->PushBack(0); // 不关心元素的值,只是通过队列size记录忙的线程数
137 t.fun();
138 if (t.task_id >= 0) {
139 m_finished_queue->PushBack(t.task_id);
140 }
141 int64_t tmp;
142 m_working_queue->TryPopBack(&tmp);
143 }
144 }
145 }
多个工作线程共享一个阻塞任务队列;
28 template
29 class BlockingQueue
30 {
31 public:
32 typedef T ValueType;
33 typedef std::deque UnderlyContainerType;
44 void PushFront(const T& value)
45 {
46 {
47 AutoLocker locker(m_mutex);
48 while (UnlockedIsFull())
49 {
50 m_cond_not_full.Wait(&m_mutex);
51 }
52 m_queue.push_front(value);
53 }
54 m_cond_not_empty.Signal();
55 }
56
57 /// @brief try push element in to front of queue
58 /// @param value to be pushed
59 /// @note if queue is full, return false
60 bool TryPushFront(const T& value)
61 {
62 {
63 AutoLocker locker(&m_mutex);
64 if (UnlockedIsFull())
65 return false;
66 m_queue.push_front(value);
67 }
68 m_cond_not_empty.Signal();
69 return true;
70 }
173 bool TimedPushFront(const T& value, int timeout_in_ms)
174 {
175 bool success = false;
176 {
177 AutoLocker locker(&m_mutex);
178
179 if (UnlockedIsFull())
180 m_cond_not_full.TimedWait(&m_mutex, timeout_in_ms);
181
182 if (!UnlockedIsFull())
183 {
184 m_queue.push_front(value);
185 success = true;
186 }
187 }
188
189 if (success)
190 m_cond_not_empty.Signal();
191
192 return success;
193 }
377 private:
378 size_t m_max_elements;
379 UnderlyContainerType m_queue;
380
381 Mutex m_mutex;
382 ConditionVariable m_cond_not_empty;
383 ConditionVariable m_cond_not_full;
384 };
用两个条件变量表示是否满还是是否空,阻塞在不同的条件上;处理时需要使用while而不是if包住,防止假唤醒;