先声明,如果可能分析有误,敬请见谅。
我们从strace角度进行分析nginx访问memcached:
nginx单进程模式下:
[root@localhost sbin]# strace -p 13303
Process 13303 attached - interrupt to quit
gettimeofday({1281607164, 794683}, NULL) = 0
epoll_wait(9, {{EPOLLIN, {u32=158186800, u64=13834971005043195184}}}, 512, -1) = 1
gettimeofday({1281607175, 954973}, NULL) = 0
accept(6, {sa_family=AF_INET, sin_port=htons(4847), sin_addr=inet_addr("61.135.255.88")}, [16]) = 7
ioctl(7, FIONBIO, [1]) = 0
epoll_ctl(9, EPOLL_CTL_ADD, 7, {EPOLLIN|EPOLLET, {u32=158186984, u64=3458764513978727912}}) = 0
epoll_wait(9, {{EPOLLIN, {u32=158186984, u64=3458764513978727912}}}, 512, 60000) = 1
gettimeofday({1281607175, 956459}, NULL) = 0
recv(7, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024
recv(7, "Invalid%20license%20key%20%20Abo"..., 3515, 0) = 288
epoll_ctl(9, EPOLL_CTL_MOD, 7, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=158186984, u64=52946776435310056}}) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 10
ioctl(10, FIONBIO, [1]) = 0
epoll_ctl(9, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=158187076, u64=13834962827425463876}}) = 0
connect(10, {sa_family=AF_INET, sin_port=htons(22422), sin_addr=inet_addr("61.135.250.208")}, 16) = -1 EINPROGRESS (Operation now in progress)
epoll_wait(9, {{EPOLLOUT, {u32=158186984, u64=52946776435310056}}}, 512, 60000) = 1
gettimeofday({1281607175, 957018}, NULL) = 0
recv(7, 0xbfffacd3, 1, MSG_PEEK) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(9, {{EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 59999) = 1
gettimeofday({1281607175, 957136}, NULL) = 0
getsockopt(10, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
writev(10, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29
epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 60000) = 1
gettimeofday({1281607175, 957479}, NULL) = 0
recv(10, "STORED/r/n", 4096, 0) = 8
setsockopt(7, SOL_TCP, TCP_NODELAY, [1], 4) = 0
writev(7, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159
write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242
recv(7, 0x96d04e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158186984, u64=52946776435310056}}}, 512, 65000) = 1
gettimeofday({1281607176, 820436}, NULL) = 0
recv(7, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024
recv(7, "Invalid%20license%20key%20%20Abo"..., 3515, 0) = 288
getsockopt(10, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
writev(10, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29
recv(10, 0x96d2a00, 4096, 0) = -1 EAGAIN (Resource temporarily unavailable)
recv(7, 0xbfffacd3, 1, MSG_PEEK) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 60000) = 1
gettimeofday({1281607176, 820964}, NULL) = 0
recv(10, "STORED/r/n", 4096, 0) = 8
writev(7, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159
write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242
recv(7, 0x96d04e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(9,
一切正常,并没有对后端的连接进行关闭
多进程模式下:
[root@localhost sbin]# strace -p 12216
Process 12216 attached - interrupt to quit
gettimeofday({1281606539, 572443}, NULL) = 0
epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532648, u64=13826798420996627944}}}, 512, 59245) = 1
gettimeofday({1281606555, 539391}, NULL) = 0
gettid() = 12216
write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 89) = 89
recv(8, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024
recv(8, "Invalid%20license%20key%20%20Abo"..., 3515, 0) = 262
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 9
ioctl(9, FIONBIO, [1]) = 0
epoll_ctl(11, EPOLL_CTL_ADD, 9, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=141532740, u64=13826797235585654340}}) = 0
connect(9, {sa_family=AF_INET, sin_port=htons(22422), sin_addr=inet_addr("61.135.250.208")}, 16) = -1 EINPROGRESS (Operation now in progress)
epoll_wait(11, {{EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1
gettimeofday({1281606555, 540126}, NULL) = 0
gettid() = 12216
write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 93) = 93
getsockopt(9, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
writev(9, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29
epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1
gettimeofday({1281606555, 540696}, NULL) = 0
gettid() = 12216
write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 93) = 93
recv(9, "STORED/r/n", 4096, 0) = 8
writev(8, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159
write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242
recv(8, 0x86ee4e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532648, u64=13826798420996627944}}}, 512, 65000) = 1
gettimeofday({1281606569, 521328}, NULL) = 0
gettid() = 12216
write(5, "2010/08/12 17:49:29 [emerg] 1221"..., 99) = 99
recv(8, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024
recv(8, "Invalid%20license%20key%20%20Abo"..., 3515, 0) = 288
getsockopt(9, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
writev(9, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29
recv(9, 0x86f0a00, 4096, 0) = -1 EAGAIN (Resource temporarily unavailable)
epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1
gettimeofday({1281606569, 522019}, NULL) = 0
gettid() = 12216
write(5, "2010/08/12 17:49:29 [emerg] 1221"..., 99) = 99
recv(9, "STORED/r/n", 4096, 0) = 8
writev(8, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159
write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242
recv(8, 0x86ee4e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable)
close(9) = 0
epoll_wait(11,
我们可以看出针对fd为9的句柄进行了close操作,fd为9就是nginx与后端的连接,为啥多了close操作呢?
我们继续分析:
对比单进程和多进程下,发现多进程下多执行了ngx_event_process_posted中的 ev->handler(ev)操作,下面是多进程执行的情况
Breakpoint 2, ngx_event_process_posted (cycle=0x838bcf8, posted=0x80d09ec) at src/event/ngx_event_posted.c:28
28 ev = (ngx_event_t *) *posted;
(gdb) n
33 if (ev == NULL) {
(gdb) p ev
$1 = (ngx_event_t *) 0x83b3da0
(gdb) n
37 ngx_delete_posted_event(ev);
(gdb) n
39 ev->handler(ev);
(gdb) s
ngx_http_keepalive_handler (rev=0x83b3da0) at src/http/ngx_http_request.c:2489
2489 c = rev->data;
(gdb) n
Breakpoint 2, ngx_event_process_posted (cycle=0x838bcf8, posted=0x80d09ec) at src/event/ngx_event_posted.c:28
28 ev = (ngx_event_t *) *posted;
(gdb)
33 if (ev == NULL) {
(gdb) p ev
$2 = (ngx_event_t *) 0x83b3dd4
(gdb)
37 ngx_delete_posted_event(ev);
(gdb) n
39 ev->handler(ev);
(gdb) s
ngx_http_upstream_keepalive_close_handler (ev=0x83b3dd4)
at /home/wangbin/work/memcached/keepalive/ngx_http_upstream_keepalive_module.c:394
394 c = ev->data;
从这里可以看出多进程情况下,执行了ngx_http_upstream_keepalive_close_handler操作,后续关闭的操作我就不分析了。
为啥多出关闭操作呢,好像跟post和events相关,我们找到如下可候选的地方:
1 src/core/ngx_connection.c|740| <<<unknown>>> ngx_mutex_lock(ngx_posted_events_mutex);
2 src/core/ngx_connection.c|759| <<<unknown>>> ngx_mutex_unlock(ngx_posted_events_mutex);
7 src/event/modules/ngx_epoll_module.c|442| <<<unknown>>> ngx_mutex_lock(ngx_posted_events_mutex);
8 src/event/modules/ngx_epoll_module.c|512| <<<unknown>>> &ngx_posted_accept_events :
&ngx_posted_events);
9 src/event/modules/ngx_epoll_module.c|533| <<<unknown>>> ngx_locked_post_event(wev, &ngx_posted_events);
37 src/event/ngx_event.c|272| <<<unknown>>> ngx_event_process_posted(cycle, &ngx_posted_events);
59 src/event/ngx_event_timer.c|139| <<<unknown>>> ngx_post_event(ev, &ngx_posted_events);
60 src/http/ngx_http_request.c|208| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events);
61 src/http/ngx_http_request.c|2349| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events);
62 src/http/ngx_http_request.c|2472| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events);
我们一一排出候选,做大量实验(不好意思,对nginx不是太熟悉,只能采用土方法)
最终发现了一个突破点:
531 为啥调用了ngx_locked_post_event,不输出log
532 if (flags & NGX_POST_EVENTS) {
533 ngx_locked_post_event(wev, &ngx_posted_events);
534 ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
535 " 535 process id:%l:",(long int)syscall(224));//加入log,看多进程和单进程之间的差别
536
其中
492 #define NGX_UPDATE_TIME 1
493 #define NGX_POST_EVENTS 2
494 #define NGX_POST_THREAD_EVENTS 4
发现单进程情况下,利用现成的连接操作,没有上面的log输出,而多进程情况下,有输出,从上面的log输出是否可以看出
是有可能是flags影响了前面的结果,我们利用这个线索继续分析:
我们发现单进程情况下,flags=1,多进程情况下,flags=3,差别就在这儿
那在什么地方设置了这些呢,我们可以追踪到ngx_process_events_and_timers函数,发现了 ngx_accept_mutex_held变量控制了flags的值是1还是3,ngx_process_events_and_timers函数内的 部分代码如下:
221 if (ngx_use_accept_mutex) {
222 if (ngx_accept_disabled > 0) {
223 ngx_accept_disabled--;
224
225 } else {
226 if (ngx_trylock_accept_mutex(cycle) == NGX_ERROR) {
227 return;
228 }
229
230 if (ngx_accept_mutex_held) {
231 flags |= NGX_POST_EVENTS;
232
233 } else {
234 if (timer == NGX_TIMER_INFINITE
235 || timer > ngx_accept_mutex_delay)
236 {
237 timer = ngx_accept_mutex_delay;
238 }
239 }
240 }
241 }
242
ngx_accept_mutex_held又是什么来控制的呢,我们又追踪到ngx_trylock_accept_mutex函数,部分代码函数如下:
261 if (ngx_shmtx_trylock(&ngx_accept_mutex)) {
262
263 ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
264 "accept mutex locked");
265
266 if (ngx_accept_mutex_held
267 && ngx_accept_events == 0
268 && !(ngx_event_flags & NGX_USE_RTSIG_EVENT))
269 {
270 return NGX_OK;
271 }
272
273 if (ngx_enable_accept_events(cycle) == NGX_ERROR) {
274 ngx_shmtx_unlock(&ngx_accept_mutex);
275 return NGX_ERROR;
276 }
277
278 ngx_accept_events = 0;
279 ngx_accept_mutex_held = 1;
280
281 return NGX_OK;
282 }
所以ngx_accept_mutex变量控制了ngx_accept_mutex_held,而ngx_accept_mutex_held又控制了flags,
flags控制了ngx_locked_post_event函数的执行或者log的输出,所以ngx_accept_mutex很有可能就是导致连接关闭的根本原因
于是在这个假设下,设置了accept_mutex off,做大量实验,发现连接不关闭了,然后继续对代码进行分析,确实发现
accept_mutex影响了后端连接的关闭。
设置accept_mutex off会不会影响程序功能呢,我们引用nginx作者的话:
OS may wake all processes waiting on accept() and select(), this is
called http://en.wikipedia.org/wiki/Thundering_herd_problem
This is a problem if you have a lot of workers as in Apache (hundreds
and more), but this insensible if you have just several workers as nginx
usually has. Therefore turning accept_mutex off is as scheduling
incoming
connection by OS via select/kqueue/epoll/etc (but not accept().
从广告投放设置这个参数后,运行了8个多月,功能正常,而且性能提升了不少,而且也顺便解决了很多问题(nginx报无法建立连接,time_wait过多引起的问题)。
一般情况下,维持100多个后端连接就能处理1500~2000 reqs/s 应该没有问题(假设不用keepalive,那就是2000×60=12万的time_wait状态,高峰流量就会有20万左右,如果不设置一些系统参数的话,就会出问题了)
下面是具体应用例子:
在某一台后端服务器下,保持的连接数量:
[wangbin@bgp198-134 ~]$ netstat -n |grep '11311'|grep '10.120.12.' |wc -l
79
这是所有tcp状态的情况:
[wangbin@bgp198-134 ~]$ netstat -n | awk '/^tcp/ {++state[$NF]} END {for(key in state) print key,"/t",state[key]}'
TIME_WAIT 2393
CLOSE_WAIT 3
FIN_WAIT1 1738
FIN_WAIT2 460
ESTABLISHED 10586
SYN_RECV 288
CLOSING 10
LAST_ACK 104
我们发现用了keepalive后,好处多多了